date:20161006

Re: [Qemu-devel] [RFC 1/4] spapr_pci: Delegate placement of PCI host bridges to machine type

2016-10-06 Thread Alexey Kardashevskiy

On 07/10/16 16:10, David Gibson wrote:
> On Fri, Oct 07, 2016 at 02:57:43PM +1100, Alexey Kardashevskiy wrote:
>> On 06/10/16 14:03, David Gibson wrote:
>>> The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
>>> for a PAPR guest.  Unlike on x86, it's routine on Power (both bare metal
>>> and PAPR guests) to have numerous independent PHBs, each controlling a
>>> separate PCI domain.
>>>
>>> There are two ways of configuring the spapr-pci-host-bridge device: first
>>> it can be done fully manually, specifying the locations and sizes of all
>>> the IO windows.  This gives the most control, but is very awkward with 6
>>> mandatory parameters.  Alternatively just an "index" can be specified
>>> which essentially selects from an array of predefined PHB locations.
>>> The PHB at index 0 is automatically created as the default PHB.
>>>
>>> The current set of default locations causes some problems for guests with
>>> large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
>>> GPGPU cards via VFIO).  Obviously, for migration we can only change the
>>> locations on a new machine type, however.
>>>
>>> This is awkward, because the placement is currently decided within the
>>> spapr-pci-host-bridge code, so it breaks abstraction to look inside the
>>> machine type version.
>>>
>>> So, this patch delegates the "default mode" PHB placement from the
>>> spapr-pci-host-bridge device back to the machine type via a public method
>>> in sPAPRMachineClass.  It's still a bit ugly, but it's about the best we
>>> can do.
>>>
>>> For now, this just changes where the calculation is done.  It doesn't
>>> change the actual location of the host bridges, or any other behaviour.
>>>
>>> Signed-off-by: David Gibson 
>>> ---
>>>  hw/ppc/spapr.c  | 34 ++
>>>  hw/ppc/spapr_pci.c  | 22 --
>>>  include/hw/pci-host/spapr.h | 11 +--
>>>  include/hw/ppc/spapr.h  |  4 
>>>  4 files changed, 47 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index 03e3803..f6e9c2a 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -2370,6 +2370,39 @@ static HotpluggableCPUList 
>>> *spapr_query_hotpluggable_cpus(MachineState *machine)
>>>  return head;
>>>  }
>>>  
>>> +static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
>>> +uint64_t *buid, hwaddr *pio, hwaddr 
>>> *pio_size,
>>> +hwaddr *mmio, hwaddr *mmio_size,
>>> +unsigned n_dma, uint32_t *liobns, Error 
>>> **errp)
>>> +{
>>> +const uint64_t base_buid = 0x8002000ULL;
>>> +const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
>>> +const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
>>> +const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
>>> +const hwaddr pio_offset = 0x8000; /* 2 GiB */
>>> +const uint32_t max_index = 255;
>>> +
>>> +hwaddr phb_base;
>>> +int i;
>>> +
>>> +if (index > max_index) {
>>> +error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
>>> +   max_index);
>>> +return;
>>> +}
>>> +
>>> +*buid = base_buid + index;
>>> +for (i = 0; i < n_dma; ++i) {
>>> +liobns[i] = SPAPR_PCI_LIOBN(index, i);
>>> +}
>>> +
>>> +phb_base = phb0_base + index * phb_spacing;
>>> +*pio = phb_base + pio_offset;
>>> +*pio_size = SPAPR_PCI_IO_WIN_SIZE;
>>> +*mmio = phb_base + mmio_offset;
>>> +*mmio_size = SPAPR_PCI_MMIO_WIN_SIZE;
>>> +}
>>> +
>>>  static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>  {
>>>  MachineClass *mc = MACHINE_CLASS(oc);
>>> @@ -2406,6 +2439,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
>>> void *data)
>>>  mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
>>>  fwc->get_dev_path = spapr_get_fw_dev_path;
>>>  nc->nmi_monitor_handler = spapr_nmi;
>>> +smc->phb_placement = spapr_phb_placement;
>>>  }
>>>  
>>>  static const TypeInfo spapr_machine_info = {
>>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>>> index 4f00865..c0fc964 100644
>>> --- a/hw/ppc/spapr_pci.c
>>> +++ b/hw/ppc/spapr_pci.c
>>> @@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error 
>>> **errp)
>>>  sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
>>>  
>>>  if (sphb->index != (uint32_t)-1) {
>>> -hwaddr windows_base;
>>> +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>>> +Error *local_err = NULL;
>>>  
>>>  if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
>>> (uint32_t)-1)
>>>  || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 
>>> 2)
>>> @@ -1322,21 +1323,14 @@ static void spapr_phb_realize(DeviceState *dev, 
>>> Error **errp)
>>>  return;
>>>  }
>>>  
>>> -if (sphb->index

Re: [Qemu-devel] [RFC v2] libvirt vGPU QEMU integration

2016-10-06 Thread Kirti Wankhede

Ping..

Pulling the questions at the top.

>> Will libvirt report 'description' RO attribute, its output would be
>> string, so that user could be able to see the configuration of that
>> profile?
>>
>
> Daniel,
> Waiting for your input on this.
>
>> We can have 'class' as optional attribute. So Intel don't have to
>> provide 'class' attribute and they don't have to specify mandatory
>> attributes of that class. We would provide 'class' attribute and provide
>> mandatory attributes.
>

Thanks,
Kirti


On 10/3/2016 1:50 PM, Kirti Wankhede wrote:
> 
> 
> On 9/30/2016 10:49 AM, Kirti Wankhede wrote:
>>
> ...
> 
>>> Hi Daniel,
>>>
>>> Here you are proposing to add a class named "gpu", which will make all 
>>> those gpu
>>> related attributes mandatory, which libvirt can allow user to better
>>> parse/present a particular mdev configuration?
>>>
>>> I am just wondering if there is another option that we just make all 
>>> those
>>> attributes that a mdev device can have as optional but still meaningful 
>>> to
>>> libvirt, so libvirt can still parse / recognize them as an class "mdev".
>>
>> 'mdev' isn't a class - mdev is the name of the kernel module. The class
>> refers to the broad capability of the device. class would be things
>> like "gpu", "nic", "fpga" or other such things. The point of the class
>> is to identify which other attributes will be considered mandatory.
>>
>>
>
> Thanks Daniel. This class definition makes sense to me.
>
> However I'm not sure whether we should define such common mandatory 
> attributes
> of a 'gpu' class now. Intel will go with a 2's power sharing of type 
> definition... actual
> type name to be finalized, but an example looks like below:
>
> [GVTG-SKL-x2]: available instances (2)
> [GVTG-SKL-x4]: available instances (4)
> [GVTG-SKL-x8]: available instances (8)
> ...
>
> User can create different types of vGPUs simultaneously. A GVTG-SKL-x2 
> type
> vGPU will get half of the physical GPU resource, while a GVTG-SKL-x4 type 
> will
> get a quarter. However it's unclear to me how we want to enumerate those
> resources into resolution or heads. I feel it'd be more reasonable for us 
> to push
> initial libvirt mdev support w/o vgpu specific class definition, until we 
> see
> a clear value of doing so (at that time we then follow Daniel's guideline 
> to define
> mandatory attributes common to all GPU vendors).

 Libvirt won't report arbitrary vendor define attributes. So if we are not
 going to define a gpu class & associated attributes, then there will be
 no reporting of the 'heads', 'resolution', 'fb_length' data described
 above.

>>>
>>> yes, that's my point. I think nvidia may put them into the 'description' 
>>> attribute
>>> just for descriptive purpose for now.
>>
>>
>> Will libvirt report 'description' RO attribute, its output would be
>> string, so that user could be able to see the configuration of that
>> profile?
>>
> 
> Daniel,
> Waiting for your input on this.
> 
>> We can have 'class' as optional attribute. So Intel don't have to
>> provide 'class' attribute and they don't have to specify mandatory
>> attributes of that class. We would provide 'class' attribute and provide
>> mandatory attributes.
> 
> 
> Thanks,
> Kirti
>

Re: [Qemu-devel] [RFC 1/4] spapr_pci: Delegate placement of PCI host bridges to machine type

2016-10-06 Thread David Gibson

On Fri, Oct 07, 2016 at 02:57:43PM +1100, Alexey Kardashevskiy wrote:
> On 06/10/16 14:03, David Gibson wrote:
> > The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
> > for a PAPR guest.  Unlike on x86, it's routine on Power (both bare metal
> > and PAPR guests) to have numerous independent PHBs, each controlling a
> > separate PCI domain.
> > 
> > There are two ways of configuring the spapr-pci-host-bridge device: first
> > it can be done fully manually, specifying the locations and sizes of all
> > the IO windows.  This gives the most control, but is very awkward with 6
> > mandatory parameters.  Alternatively just an "index" can be specified
> > which essentially selects from an array of predefined PHB locations.
> > The PHB at index 0 is automatically created as the default PHB.
> > 
> > The current set of default locations causes some problems for guests with
> > large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
> > GPGPU cards via VFIO).  Obviously, for migration we can only change the
> > locations on a new machine type, however.
> > 
> > This is awkward, because the placement is currently decided within the
> > spapr-pci-host-bridge code, so it breaks abstraction to look inside the
> > machine type version.
> > 
> > So, this patch delegates the "default mode" PHB placement from the
> > spapr-pci-host-bridge device back to the machine type via a public method
> > in sPAPRMachineClass.  It's still a bit ugly, but it's about the best we
> > can do.
> > 
> > For now, this just changes where the calculation is done.  It doesn't
> > change the actual location of the host bridges, or any other behaviour.
> > 
> > Signed-off-by: David Gibson 
> > ---
> >  hw/ppc/spapr.c  | 34 ++
> >  hw/ppc/spapr_pci.c  | 22 --
> >  include/hw/pci-host/spapr.h | 11 +--
> >  include/hw/ppc/spapr.h  |  4 
> >  4 files changed, 47 insertions(+), 24 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 03e3803..f6e9c2a 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2370,6 +2370,39 @@ static HotpluggableCPUList 
> > *spapr_query_hotpluggable_cpus(MachineState *machine)
> >  return head;
> >  }
> >  
> > +static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
> > +uint64_t *buid, hwaddr *pio, hwaddr 
> > *pio_size,
> > +hwaddr *mmio, hwaddr *mmio_size,
> > +unsigned n_dma, uint32_t *liobns, Error 
> > **errp)
> > +{
> > +const uint64_t base_buid = 0x8002000ULL;
> > +const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
> > +const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
> > +const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
> > +const hwaddr pio_offset = 0x8000; /* 2 GiB */
> > +const uint32_t max_index = 255;
> > +
> > +hwaddr phb_base;
> > +int i;
> > +
> > +if (index > max_index) {
> > +error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
> > +   max_index);
> > +return;
> > +}
> > +
> > +*buid = base_buid + index;
> > +for (i = 0; i < n_dma; ++i) {
> > +liobns[i] = SPAPR_PCI_LIOBN(index, i);
> > +}
> > +
> > +phb_base = phb0_base + index * phb_spacing;
> > +*pio = phb_base + pio_offset;
> > +*pio_size = SPAPR_PCI_IO_WIN_SIZE;
> > +*mmio = phb_base + mmio_offset;
> > +*mmio_size = SPAPR_PCI_MMIO_WIN_SIZE;
> > +}
> > +
> >  static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >  {
> >  MachineClass *mc = MACHINE_CLASS(oc);
> > @@ -2406,6 +2439,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
> > void *data)
> >  mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
> >  fwc->get_dev_path = spapr_get_fw_dev_path;
> >  nc->nmi_monitor_handler = spapr_nmi;
> > +smc->phb_placement = spapr_phb_placement;
> >  }
> >  
> >  static const TypeInfo spapr_machine_info = {
> > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > index 4f00865..c0fc964 100644
> > --- a/hw/ppc/spapr_pci.c
> > +++ b/hw/ppc/spapr_pci.c
> > @@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> > **errp)
> >  sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
> >  
> >  if (sphb->index != (uint32_t)-1) {
> > -hwaddr windows_base;
> > +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > +Error *local_err = NULL;
> >  
> >  if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
> > (uint32_t)-1)
> >  || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 
> > 2)
> > @@ -1322,21 +1323,14 @@ static void spapr_phb_realize(DeviceState *dev, 
> > Error **errp)
> >  return;
> >  }
> >  
> > -if (sphb->index > SPAPR_PCI_MAX_INDEX) {
> > -

Re: [Qemu-devel] [PATCH v4 05/20] ppc/pnv: add a PnvCore object

2016-10-06 Thread David Gibson

On Mon, Oct 03, 2016 at 09:24:41AM +0200, Cédric Le Goater wrote:
> This is largy inspired by sPAPRCPUCore with some simplification, no
> hotplug for instance. But the differences are small and the objects
> could possibly be merged.
> 
> A set of PnvCore objects is added to the PnvChip and the device
> tree is populated looping on these cores.
> 
> Real HW cpu ids are now generated depending on the chip cpu model, the
> chip id and a core mask.
> 
> Signed-off-by: Cédric Le Goater 
> ---
> 
>  I did not introduce a single table to construct both the chip types
>  and the corresponding core types yet. Keeping the idea for later as
>  there might be other types to construct with P9 support.
> 
>  Changes since v3:
> 
>  - removed the usage of cpu_index
>  - removed the setting of the msr_mask
>  
>  Changes since v2:
> 
>  - added P9 support
>  - used error_fatal instead of error_abort when setting the chip
>properties
>  - replaced num_cores by nr_cores
>  - removed gservers properties that were unused on powernv. 
>  - used a 'void *' instead of a 'PnvCore *' to hold core Objects of
>potentially different size.
>  - qom: linked the core Objects to the chip 
>  - moved device tree creation under powernv_populate_chip()
>  - added a 'pir' property' for ease of use
> 
>  Changes since v1:
> 
>  - changed name to PnvCore
>  - changed PnvChip core array type to a 'PnvCore *cores'
>  - introduced real cpu hw ids using a core mask from the chip
>  - reworked powernv_create_core_node() which populates the device tree
>  - added missing "ibm,pa-features" property 
>  - smp_cpus representing threads, used smp_cores instead to create the
>cores in the chip.
>  - removed the use of ppc_get_vcpu_dt_id() 
>  - added "POWER8E" and "POWER8NVL" cpu models to exercice the
>PnvChipClass
> 
>  hw/ppc/Makefile.objs  |   2 +-
>  hw/ppc/pnv.c  | 187 
> ++
>  hw/ppc/pnv_core.c | 186 +
>  include/hw/ppc/pnv.h  |   3 +
>  include/hw/ppc/pnv_core.h |  48 
>  5 files changed, 425 insertions(+), 1 deletion(-)
>  create mode 100644 hw/ppc/pnv_core.c
>  create mode 100644 include/hw/ppc/pnv_core.h
> 
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index 8105db7d5600..f8c7d1db9ade 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -6,7 +6,7 @@ obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o 
> spapr_rtas.o
>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
>  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
>  # IBM PowerNV
> -obj-$(CONFIG_POWERNV) += pnv.o
> +obj-$(CONFIG_POWERNV) += pnv.o pnv_core.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>  obj-y += spapr_pci_vfio.o
>  endif
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 758c849702a0..2376bb222918 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -27,6 +27,7 @@
>  #include "hw/ppc/fdt.h"
>  #include "hw/ppc/ppc.h"
>  #include "hw/ppc/pnv.h"
> +#include "hw/ppc/pnv_core.h"
>  #include "hw/loader.h"
>  #include "exec/address-spaces.h"
>  #include "qemu/cutils.h"
> @@ -74,14 +75,162 @@ static void powernv_populate_memory_node(void *fdt, int 
> chip_id, hwaddr start,
>  _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
>  }
>  
> +static int get_cpus_node(void *fdt)
> +{
> +int cpus_offset = fdt_path_offset(fdt, "/cpus");
> +
> +if (cpus_offset < 0) {
> +cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
> +  "cpus");
> +if (cpus_offset) {
> +_FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 
> 0x1)));
> +_FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
> +}
> +}
> +_FDT(cpus_offset);
> +return cpus_offset;
> +}
> +
> +/*
> + * The PowerNV cores (and threads) need to use real HW ids and not an
> + * incremental index like it has been done on other platforms. This HW
> + * id is stored in the CPU PIR, it is used to create cpu nodes in the
> + * device tree, used in XSCOM to address cores and in interrupt
> + * servers.
> + */
> +static void powernv_create_core_node(PnvChip *chip, PnvCore *pc, void *fdt)
> +{
> +CPUState *cs = CPU(DEVICE(pc->threads));
> +DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +PowerPCCPU *cpu = POWERPC_CPU(cs);
> +int smt_threads = ppc_get_compat_smt_threads(cpu);
> +CPUPPCState *env = &cpu->env;
> +PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
> +uint32_t servers_prop[smt_threads];
> +int i;
> +uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
> +   0x, 0x};
> +uint32_t tbfreq = PNV_TIMEBASE_FREQ;
> +uint32_t cpufreq = 10;
> +uint32_t page_sizes_prop[64];
> +size_t page_sizes_prop_size;
> +const uint8_t pa_features[] = { 24, 0,
> +0xf6, 0x3f, 0xc7, 0xc0,

Re: [Qemu-devel] [PATCH v4 04/20] ppc/pnv: add a PIR handler to PnvChip

2016-10-06 Thread David Gibson

On Mon, Oct 03, 2016 at 09:24:40AM +0200, Cédric Le Goater wrote:
> The Processor Identification Register (PIR) is a register that holds a
> processor identifier which is used for bus transactions (XSCOM) and
> for processor differentiation in multiprocessor systems. It also used
> in the interrupt vector entries (IVE) to identify the thread serving
> the interrupts.
> 
> P9 and P8 have some differences in the CPU PIR encoding.
> 
> Signed-off-by: Cédric Le Goater 

Reviewed-by: David Gibson 

Looks fine, although it's a bit hard to be sure since I haven't read
the patches which actually use this yet.

> ---
> 
>  Changes since v3 :
> 
>  - added a couple more comments on the bits definition
>   
>  hw/ppc/pnv.c | 30 ++
>  include/hw/ppc/pnv.h |  2 ++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index fc930be94f53..758c849702a0 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -239,6 +239,32 @@ static void ppc_powernv_init(MachineState *machine)
>  g_free(chip_typename);
>  }
>  
> +/*
> + *0:21  Reserved - Read as zeros
> + *   22:24  Chip ID
> + *   25:28  Core number
> + *   29:31  Thread ID
> + */
> +static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
> +{
> +return (chip->chip_id << 7) | (core_id << 3);
> +}
> +
> +/*
> + *0:48  Reserved - Read as zeroes
> + *   49:52  Node ID
> + *   53:55  Chip ID
> + *   56 Reserved - Read as zero
> + *   57:61  Core number
> + *   62:63  Thread ID
> + *
> + * We only care about the lower bits. uint32_t is fine for the moment.
> + */
> +static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
> +{
> +return (chip->chip_id << 8) | (core_id << 2);
> +}
> +
>  /* Allowed core identifiers on a POWER8 Processor Chip :
>   *
>   * 
> @@ -274,6 +300,7 @@ static void pnv_chip_power8e_class_init(ObjectClass 
> *klass, void *data)
>  k->chip_type = PNV_CHIP_POWER8E;
>  k->chip_cfam_id = 0x221ef0498000ull;  /* P8 Murano DD2.1 */
>  k->cores_mask = POWER8E_CORE_MASK;
> +k->core_pir = pnv_chip_core_pir_p8;
>  dc->desc = "PowerNV Chip POWER8E";
>  }
>  
> @@ -293,6 +320,7 @@ static void pnv_chip_power8_class_init(ObjectClass 
> *klass, void *data)
>  k->chip_type = PNV_CHIP_POWER8;
>  k->chip_cfam_id = 0x220ea0498000ull; /* P8 Venice DD2.0 */
>  k->cores_mask = POWER8_CORE_MASK;
> +k->core_pir = pnv_chip_core_pir_p8;
>  dc->desc = "PowerNV Chip POWER8";
>  }
>  
> @@ -312,6 +340,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass 
> *klass, void *data)
>  k->chip_type = PNV_CHIP_POWER8NVL;
>  k->chip_cfam_id = 0x120d30498000ull;  /* P8 Naples DD1.0 */
>  k->cores_mask = POWER8_CORE_MASK;
> +k->core_pir = pnv_chip_core_pir_p8;
>  dc->desc = "PowerNV Chip POWER8NVL";
>  }
>  
> @@ -331,6 +360,7 @@ static void pnv_chip_power9_class_init(ObjectClass 
> *klass, void *data)
>  k->chip_type = PNV_CHIP_POWER9;
>  k->chip_cfam_id = 0x100d10498000ull; /* P9 Nimbus DD1.0 */
>  k->cores_mask = POWER9_CORE_MASK;
> +k->core_pir = pnv_chip_core_pir_p9;
>  dc->desc = "PowerNV Chip POWER9";
>  }
>  
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> index 2c225c928974..c676f800e28e 100644
> --- a/include/hw/ppc/pnv.h
> +++ b/include/hw/ppc/pnv.h
> @@ -56,6 +56,8 @@ typedef struct PnvChipClass {
>  PnvChipType  chip_type;
>  uint64_t chip_cfam_id;
>  uint64_t cores_mask;
> +
> +uint32_t (*core_pir)(PnvChip *chip, uint32_t core_id);
>  } PnvChipClass;
>  
>  #define TYPE_PNV_CHIP_POWER8E TYPE_PNV_CHIP "-POWER8E"

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v4 03/20] ppc/pnv: add a core mask to PnvChip

2016-10-06 Thread Benjamin Herrenschmidt

On Fri, 2016-10-07 at 15:32 +1100, David Gibson wrote:
> On Mon, Oct 03, 2016 at 09:24:39AM +0200, Cédric Le Goater wrote:
> > This will be used to build real HW ids for the cores and enforce
> some
> > limits on the available cores per chip.
> 
> Is there actually a practical reason to allow the user (or machine
> type) to override the default core mask?

None other than mimmicing real HW ... some cores can be disabled
on some chips and we *might* want to mimmic that for some test
scenarios.

Cheers,
Ben.

Re: [Qemu-devel] [PATCH v4 01/20] ppc/pnv: add skeleton PowerNV platform

2016-10-06 Thread David Gibson

On Mon, Oct 03, 2016 at 09:24:37AM +0200, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> The goal is to emulate a PowerNV system at the level of the skiboot
> firmware, which loads the OS and provides some runtime services. Power
> Systems have a lower firmware (HostBoot) that does low level system
> initialization, like DRAM training. This is beyond the scope of what
> qemu will address in a PowerNV guest.
> 
> No devices yet, not even an interrupt controller. Just to get started,
> some RAM to load the skiboot firmware, the kernel and initrd. The
> device tree is fully created in the machine reset op.
> 
> Signed-off-by: Benjamin Herrenschmidt 
> [clg: - updated for qemu-2.7
>   - replaced fprintf by error_report
>   - used a common definition of _FDT macro
>   - removed VMStateDescription as migration is not yet supported
>   - added IBM Copyright statements
>   - reworked kernel_filename handling
>   - merged PnvSystem and sPowerNVMachineState
>   - removed PHANDLE_XICP
>   - added ppc_create_page_sizes_prop helper
>   - removed nmi support
>   - removed kvm support
>   - updated powernv machine to version 2.8
>   - removed chips and cpus, They will be provided in another patches
>   - added a machine reset routine to initialize the device tree (also)
>   - french has a squelette and english a skeleton.
>   - improved commit log.
>   - reworked prototypes parameters
>   - added a check on the ram size (thanks to Michael Ellerman)
>   - fixed chip-id cell
>   - changed MAX_CPUS to 2048
>   - simplified memory node creation to one node only
>   - removed machine version
>   - rewrote the device tree creation with the fdt "rw" routines
>   - s/sPowerNVMachineState/PnvMachineState/
>   - etc.]
> Signed-off-by: Cédric Le Goater 
> ---
>  Changes since v3:
> 
>  - fixed printf format for hwaddr
>  - used fdt_pack() before writing the tree in memory 
>  - removed the requirement on having a kernel loaded as running with
>just a firmware is fine. We will need to discuss the inclusion of
>the file skiboot.lid under qemu.

Yes, this isn't terribly useful without it.  The normal procedure for
new roms is this:

   1. Get the upstream git tree for the ROM mirrored to qemu.org
   2. Add a git submodule under roms/ referencing the git mirror on
  qemu.org
   3. Add a pre-built ROM binary to pc-bios/
   4. Add a brief description of the ROM, including upstream git URL
  to pc-bios/README

Steps 2, 3 & 4 can (and usually should) be a single commit.

This code is looking close enough, that having a usable rom image is
probably the last thing stopping merge, at least of these initial
patches.

Probably best to get underway with the rom inclusion ASAP.

>  Changes since v2:
> 
>  - some more copyright header cleanups
>  - remove fdt_addr field from PnvMachineState
> 
>  Changes since v1:
> 
>  - changed MAX_CPUS to 2048
>  - simplified memory node creation to one node only
>  - removed machine version 
>  - rewrote the device tree creation with the fdt "rw" routines
>  - s/sPowerNVMachineState/PnvMachineState/
>  - block_default_type is back to IF_IDE because of the AHCI device
> 
>  default-configs/ppc64-softmmu.mak |   1 +
>  hw/ppc/Makefile.objs  |   2 +
>  hw/ppc/pnv.c  | 223 
> ++
>  include/hw/ppc/pnv.h  |  38 +++
>  4 files changed, 264 insertions(+)
>  create mode 100644 hw/ppc/pnv.c
>  create mode 100644 include/hw/ppc/pnv.h
> 
> diff --git a/default-configs/ppc64-softmmu.mak 
> b/default-configs/ppc64-softmmu.mak
> index db5a4d6f5eea..67a9bcaa67fa 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -39,6 +39,7 @@ CONFIG_I8259=y
>  CONFIG_XILINX=y
>  CONFIG_XILINX_ETHLITE=y
>  CONFIG_PSERIES=y
> +CONFIG_POWERNV=y
>  CONFIG_PREP=y
>  CONFIG_MAC=y
>  CONFIG_E500=y
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index 99a0d4e581bf..8105db7d5600 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -5,6 +5,8 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
>  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
>  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
>  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
> +# IBM PowerNV
> +obj-$(CONFIG_POWERNV) += pnv.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>  obj-y += spapr_pci_vfio.o
>  endif
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> new file mode 100644
> index ..02fc4826baa4
> --- /dev/null
> +++ b/hw/ppc/pnv.c
> @@ -0,0 +1,223 @@
> +/*
> + * QEMU PowerPC PowerNV machine model
> + *
> + * Copyright (c) 2016, IBM Corporation.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Fo

Re: [Qemu-devel] [PATCH v4 01/20] ppc/pnv: add skeleton PowerNV platform

2016-10-06 Thread David Gibson

On Fri, Oct 07, 2016 at 03:14:48PM +1100, David Gibson wrote:
> On Mon, Oct 03, 2016 at 09:24:37AM +0200, Cédric Le Goater wrote:
> > From: Benjamin Herrenschmidt 
> > 
> > The goal is to emulate a PowerNV system at the level of the skiboot
> > firmware, which loads the OS and provides some runtime services. Power
> > Systems have a lower firmware (HostBoot) that does low level system
> > initialization, like DRAM training. This is beyond the scope of what
> > qemu will address in a PowerNV guest.
> > 
> > No devices yet, not even an interrupt controller. Just to get started,
> > some RAM to load the skiboot firmware, the kernel and initrd. The
> > device tree is fully created in the machine reset op.
> > 
> > Signed-off-by: Benjamin Herrenschmidt 
> > [clg: - updated for qemu-2.7
> >   - replaced fprintf by error_report
> >   - used a common definition of _FDT macro
> >   - removed VMStateDescription as migration is not yet supported
> >   - added IBM Copyright statements
> >   - reworked kernel_filename handling
> >   - merged PnvSystem and sPowerNVMachineState
> >   - removed PHANDLE_XICP
> >   - added ppc_create_page_sizes_prop helper
> >   - removed nmi support
> >   - removed kvm support
> >   - updated powernv machine to version 2.8
> >   - removed chips and cpus, They will be provided in another patches
> >   - added a machine reset routine to initialize the device tree (also)
> >   - french has a squelette and english a skeleton.
> >   - improved commit log.
> >   - reworked prototypes parameters
> >   - added a check on the ram size (thanks to Michael Ellerman)
> >   - fixed chip-id cell
> >   - changed MAX_CPUS to 2048
> >   - simplified memory node creation to one node only
> >   - removed machine version
> >   - rewrote the device tree creation with the fdt "rw" routines
> >   - s/sPowerNVMachineState/PnvMachineState/
> >   - etc.]
> > Signed-off-by: Cédric Le Goater 
> > ---
> >  Changes since v3:
> > 
> >  - fixed printf format for hwaddr
> >  - used fdt_pack() before writing the tree in memory 
> >  - removed the requirement on having a kernel loaded as running with
> >just a firmware is fine. We will need to discuss the inclusion of
> >the file skiboot.lid under qemu.
> 
> Yes, this isn't terribly useful without it.  The normal procedure for
> new roms is this:
> 
>1. Get the upstream git tree for the ROM mirrored to qemu.org
>2. Add a git submodule under roms/ referencing the git mirror on
>   qemu.org
>3. Add a pre-built ROM binary to pc-bios/
>4. Add a brief description of the ROM, including upstream git URL
>   to pc-bios/README
> 
> Steps 2, 3 & 4 can (and usually should) be a single commit.
> 
> This code is looking close enough, that having a usable rom image is
> probably the last thing stopping merge, at least of these initial
> patches.
> 
> Probably best to get underway with the rom inclusion ASAP.

Sorry, forgot to add, with the exception of the ROM question, and the
one tiny code nit:

Reviewed-by: David Gibson 

> 
> >  Changes since v2:
> > 
> >  - some more copyright header cleanups
> >  - remove fdt_addr field from PnvMachineState
> > 
> >  Changes since v1:
> > 
> >  - changed MAX_CPUS to 2048
> >  - simplified memory node creation to one node only
> >  - removed machine version 
> >  - rewrote the device tree creation with the fdt "rw" routines
> >  - s/sPowerNVMachineState/PnvMachineState/
> >  - block_default_type is back to IF_IDE because of the AHCI device
> > 
> >  default-configs/ppc64-softmmu.mak |   1 +
> >  hw/ppc/Makefile.objs  |   2 +
> >  hw/ppc/pnv.c  | 223 
> > ++
> >  include/hw/ppc/pnv.h  |  38 +++
> >  4 files changed, 264 insertions(+)
> >  create mode 100644 hw/ppc/pnv.c
> >  create mode 100644 include/hw/ppc/pnv.h
> > 
> > diff --git a/default-configs/ppc64-softmmu.mak 
> > b/default-configs/ppc64-softmmu.mak
> > index db5a4d6f5eea..67a9bcaa67fa 100644
> > --- a/default-configs/ppc64-softmmu.mak
> > +++ b/default-configs/ppc64-softmmu.mak
> > @@ -39,6 +39,7 @@ CONFIG_I8259=y
> >  CONFIG_XILINX=y
> >  CONFIG_XILINX_ETHLITE=y
> >  CONFIG_PSERIES=y
> > +CONFIG_POWERNV=y
> >  CONFIG_PREP=y
> >  CONFIG_MAC=y
> >  CONFIG_E500=y
> > diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> > index 99a0d4e581bf..8105db7d5600 100644
> > --- a/hw/ppc/Makefile.objs
> > +++ b/hw/ppc/Makefile.objs
> > @@ -5,6 +5,8 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
> >  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
> >  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
> >  obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
> > +# IBM PowerNV
> > +obj-$(CONFIG_POWERNV) += pnv.o
> >  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
> >  obj-y += spapr_pci_vfio.o
> >  endif
> > diff --git a/hw/ppc/pnv.c

Re: [Qemu-devel] [PATCH v4 03/20] ppc/pnv: add a core mask to PnvChip

2016-10-06 Thread David Gibson

On Mon, Oct 03, 2016 at 09:24:39AM +0200, Cédric Le Goater wrote:
> This will be used to build real HW ids for the cores and enforce some
> limits on the available cores per chip.

Is there actually a practical reason to allow the user (or machine
type) to override the default core mask?

> 
> Signed-off-by: Cédric Le Goater 

Apart from the above and one comment below,

Reviewed-by: David Gibson 

> ---
> 
>  Changes since v3 :
> 
>  - reworked pnv_chip_core_sanitize() to return errors and to check the
>maximum of cores against the instance cores_mask
>   
>  Changes since v2 :
> 
>  - added POWER9 support
>  - removed cores_max 
>  - introduces a pnv_chip_core_sanitize() helper to check the core
>ids_mask and the maximum number of cores
> 
>  hw/ppc/pnv.c | 78 
> +++-
>  include/hw/ppc/pnv.h |  4 +++
>  2 files changed, 81 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 08f72dbdca97..fc930be94f53 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -227,11 +227,44 @@ static void ppc_powernv_init(MachineState *machine)
>  snprintf(chip_name, sizeof(chip_name), "chip[%d]", CHIP_HWID(i));
>  object_property_add_child(OBJECT(pnv), chip_name, chip, 
> &error_fatal);
>  object_property_set_int(chip, CHIP_HWID(i), "chip-id", &error_fatal);
> +object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
> +/*
> + * We could customize cores_mask for the chip here. May be
> + * using a powernv machine property, like 'num-chips'. Let the
> + * chip choose the default for now.

I don't think you need any special mechanism for this.  If you just
remove this explicit assignment the chip default will apply, but the
user can alter it using -global.

> + */
> +object_property_set_int(chip, 0x0, "cores-mask", &error_fatal);
>  object_property_set_bool(chip, true, "realized", &error_fatal);
>  }
>  g_free(chip_typename);
>  }
>  
> +/* Allowed core identifiers on a POWER8 Processor Chip :
> + *
> + * 
> + *  EX1  - Venice only
> + *  EX2  - Venice only
> + *  EX3  - Venice only
> + *  EX4
> + *  EX5
> + *  EX6
> + *  
> + *  EX9  - Venice only
> + *  EX10 - Venice only
> + *  EX11 - Venice only
> + *  EX12
> + *  EX13
> + *  EX14
> + * 
> + */
> +#define POWER8E_CORE_MASK  (0x7070ull)
> +#define POWER8_CORE_MASK   (0x7e7eull)
> +
> +/*
> + * POWER9 has 24 cores, ids starting at 0x20
> + */
> +#define POWER9_CORE_MASK   (0xffull)
> +
>  static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
>  {
>  DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -240,6 +273,7 @@ static void pnv_chip_power8e_class_init(ObjectClass 
> *klass, void *data)
>  k->cpu_model = "POWER8E";
>  k->chip_type = PNV_CHIP_POWER8E;
>  k->chip_cfam_id = 0x221ef0498000ull;  /* P8 Murano DD2.1 */
> +k->cores_mask = POWER8E_CORE_MASK;
>  dc->desc = "PowerNV Chip POWER8E";
>  }
>  
> @@ -258,6 +292,7 @@ static void pnv_chip_power8_class_init(ObjectClass 
> *klass, void *data)
>  k->cpu_model = "POWER8";
>  k->chip_type = PNV_CHIP_POWER8;
>  k->chip_cfam_id = 0x220ea0498000ull; /* P8 Venice DD2.0 */
> +k->cores_mask = POWER8_CORE_MASK;
>  dc->desc = "PowerNV Chip POWER8";
>  }
>  
> @@ -276,6 +311,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass 
> *klass, void *data)
>  k->cpu_model = "POWER8NVL";
>  k->chip_type = PNV_CHIP_POWER8NVL;
>  k->chip_cfam_id = 0x120d30498000ull;  /* P8 Naples DD1.0 */
> +k->cores_mask = POWER8_CORE_MASK;
>  dc->desc = "PowerNV Chip POWER8NVL";
>  }
>  
> @@ -294,6 +330,7 @@ static void pnv_chip_power9_class_init(ObjectClass 
> *klass, void *data)
>  k->cpu_model = "POWER9";
>  k->chip_type = PNV_CHIP_POWER9;
>  k->chip_cfam_id = 0x100d10498000ull; /* P9 Nimbus DD1.0 */
> +k->cores_mask = POWER9_CORE_MASK;
>  dc->desc = "PowerNV Chip POWER9";
>  }
>  
> @@ -304,13 +341,52 @@ static const TypeInfo pnv_chip_power9_info = {
>  .class_init= pnv_chip_power9_class_init,
>  };
>  
> +static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
> +{
> +PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
> +int cores_max;
> +
> +/*
> + * No custom mask for this chip, let's use the default one from *
> + * the chip class
> + */
> +if (!chip->cores_mask) {
> +chip->cores_mask = pcc->cores_mask;
> +}
> +
> +/* filter alien core ids ! some are reserved */
> +if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) {
> +error_setg(errp, "warning: invalid core mask for chip !");
> +return;
> +}
> +chip->cores_mask &= pcc->cores_mask;
> +
> +/* now that we have a sane layout, let check the number of cores */
> +cores_max = hweight_long(chip->cores_mask);
> +if (chip->nr_cores > cores_max) {
> +error_setg(errp, "warnin

Re: [Qemu-devel] [PATCH v4 02/20] ppc/pnv: add a PnvChip object

2016-10-06 Thread David Gibson

On Mon, Oct 03, 2016 at 09:24:38AM +0200, Cédric Le Goater wrote:
> This is is an abstraction of a POWER8 chip which is a set of cores
> plus other 'units', like the pervasive unit, the interrupt controller,
> the memory controller, the on-chip microcontroller, etc. The whole can
> be seen as a socket. It depends on a cpu model and its characteristics:
> max cores, specific inits are defined in a PnvChipClass.
> 
> We start with an near empty PnvChip with only a few cpu constants
> which we will grow in the subsequent patches with the controllers
> required to run the system.
> 
> The Chip CFAM (Common FRU Access Module) ID gives the model of the
> chip and its version number. It is generally the first thing firmwares
> fetch, available at XSCOM PCB address 0xf000f, to start initialization.
> 
> Signed-off-by: Cédric Le Goater 

Reviewed-by: David Gibson 

I do have one small suggested change below, but it's not that important.

> ---
> 
>  chip_type could possibly be removed or calculated from the attribute
>  chip_cfam_id. Let's keep it for now and see how the patchset evolves.
>  This is getting big, maybe should move the code to hw/ppc/pnv_chip.c ?
> 
>  Changes since v3:
> 
>  - removed PnvChipPower* types
>  - removed realize ops of PnvChip
>  - replaced scanf by qemu_strtoul
> 
>  Changes since v2:
> 
>  - forced a POWER8 cpu model if none is specified and check that a
>PnvChip type exist for it
>  - did some renaming to be consistent with the cpu model names
>  - added POWER9 chip
>  - removed empty realize op
>  - renamed atribute chip_f000f in chip_cfam_id
>  - used error_fatal instead of error_abort when setting the chip
>properties
>  - introduced a powernv_populate_chip() routine
> 
>  Changes since v1:
>  
>  - introduced a PnvChipClass depending on the cpu model. It also
>provides some chip constants used by devices, like the cpu model hw
>id (f000f), a enum type (not sure this is useful yet), a custom
>realize ops for customization.
>  - the num-chips property can be configured on the command line.
>  
>  hw/ppc/pnv.c | 194 
> +--
>  include/hw/ppc/pnv.h |  61 
>  2 files changed, 250 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 02fc4826baa4..08f72dbdca97 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -74,6 +74,16 @@ static void powernv_populate_memory_node(void *fdt, int 
> chip_id, hwaddr start,
>  _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
>  }
>  
> +static void powernv_populate_chip(PnvChip *chip, void *fdt)
> +{
> +/* Put all the memory in one node on chip 0 until we find a way to
> + * specify different ranges for each chip
> + */
> +if (chip->chip_id == 0) {
> +powernv_populate_memory_node(fdt, chip->chip_id, 0, ram_size);
> +}
> +}
> +
>  static void *powernv_create_fdt(PnvMachineState *pnv,
>  const char *kernel_cmdline)
>  {
> @@ -81,6 +91,7 @@ static void *powernv_create_fdt(PnvMachineState *pnv,
>  char *buf;
>  const char plat_compat[] = "qemu,powernv\0ibm,powernv";
>  int off;
> +int i;
>  
>  fdt = g_malloc0(FDT_MAX_SIZE);
>  _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
> @@ -115,11 +126,10 @@ static void *powernv_create_fdt(PnvMachineState *pnv,
> &end_prop, sizeof(end_prop;
>  }
>  
> -/* Put all the memory in one node on chip 0 until we find a way to
> - * specify different ranges for each chip
> - */
> -powernv_populate_memory_node(fdt, 0, 0, ram_size);
> -
> +/* Populate device tree for each chip */
> +for (i = 0; i < pnv->num_chips; i++) {
> +powernv_populate_chip(pnv->chips[i], fdt);
> +}
>  return fdt;
>  }
>  
> @@ -147,6 +157,8 @@ static void ppc_powernv_init(MachineState *machine)
>  char *fw_filename;
>  long fw_size;
>  long kernel_size;
> +int i;
> +char *chip_typename;
>  
>  /* allocate RAM */
>  if (ram_size < (1 * G_BYTE)) {
> @@ -191,6 +203,172 @@ static void ppc_powernv_init(MachineState *machine)
>  exit(1);
>  }
>  }
> +
> +/* We need some cpu model to instantiate the PnvChip class */
> +if (machine->cpu_model == NULL) {
> +machine->cpu_model = "POWER8";
> +}
> +
> +/* Create the processor chips */
> +chip_typename = g_strdup_printf(TYPE_PNV_CHIP "-%s", machine->cpu_model);
> +if (!object_class_by_name(chip_typename)) {
> +error_report("qemu: invalid CPU model '%s' for %s machine",
> + machine->cpu_model, MACHINE_GET_CLASS(machine)->name);
> +exit(1);
> +}
> +
> +pnv->chips = g_new0(PnvChip *, pnv->num_chips);
> +for (i = 0; i < pnv->num_chips; i++) {
> +char chip_name[32];
> +Object *chip = object_new(chip_typename);
> +
> +pnv->chips[i] = PNV_CHIP(chip);
> +
> +snprint

Re: [Qemu-devel] [PATCH v5] MC146818 RTC: coordinate guest clock base to destination host after migration

2016-10-06 Thread Michael S. Tsirkin

On Mon, Sep 26, 2016 at 08:41:01PM +0800, Junlian Bell wrote:
> qemu tracks guest time based on vector [base_rtc, last_update], in which
> last_update stands for a monotonic tick which is actually uptime of the
> host.
> according to rtc implementation codes of recent releases and upstream,
> after
> migration, the time base vector [base_rtc, last_update] isn't updated to
> coordinate with the destionation host, ie. qemu doesnt update last_update
> to
> uptime of the destination host.
> what problem have we got because of this bug? after migration, guest time
> may
> jump back to several days ago, that will make some critical business
> applications,
> such as lotus notes, malfunction.
> this patch is trying to fix the problem. first, when vmsave in progress,
> we
> rtc_update_time to refresh time stamp in cmos array, then during
> vmrestore,
> we rtc_set_time to update qemu base_rtc and last_update variable according
> to time
> stamp in cmos array.
> 
> Signed-off-by: Junlian Bell 

Seems to make sense superficially but I'm not a good judge
of the timer code.

Paolo, any comment on this?

Acked-by: Michael S. Tsirkin 

> ---
>  hw/timer/mc146818rtc.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
> index ea625f2..4e4af43 100644
> --- a/hw/timer/mc146818rtc.c
> +++ b/hw/timer/mc146818rtc.c
> @@ -717,11 +717,19 @@ static void rtc_set_date_from_host(ISADevice *dev)
>  rtc_set_cmos(s, &tm);
>  }
>  
> +static void rtc_pre_save(void *opaque)
> +{
> +RTCState *s = opaque;
> +
> +rtc_update_time(s);
> +}
> +
>  static int rtc_post_load(void *opaque, int version_id)
>  {
>  RTCState *s = opaque;
>  
> -if (version_id <= 2) {
> +if (version_id <= 2 ||
> +rtc_clock == QEMU_CLOCK_REALTIME){
>  rtc_set_time(s);
>  s->offset = 0;
>  check_update_timer(s);
> @@ -764,6 +772,7 @@ static const VMStateDescription vmstate_rtc = {
>  .name = "mc146818rtc",
>  .version_id = 3,
>  .minimum_version_id = 1,
> +.pre_save = rtc_pre_save,
>  .post_load = rtc_post_load,
>  .fields = (VMStateField[]) {
>  VMSTATE_BUFFER(cmos_data, RTCState),
> -- 
> 2.9.0.windows.1

Re: [Qemu-devel] [RFC 1/4] spapr_pci: Delegate placement of PCI host bridges to machine type

2016-10-06 Thread Alexey Kardashevskiy

On 06/10/16 14:03, David Gibson wrote:
> The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
> for a PAPR guest.  Unlike on x86, it's routine on Power (both bare metal
> and PAPR guests) to have numerous independent PHBs, each controlling a
> separate PCI domain.
> 
> There are two ways of configuring the spapr-pci-host-bridge device: first
> it can be done fully manually, specifying the locations and sizes of all
> the IO windows.  This gives the most control, but is very awkward with 6
> mandatory parameters.  Alternatively just an "index" can be specified
> which essentially selects from an array of predefined PHB locations.
> The PHB at index 0 is automatically created as the default PHB.
> 
> The current set of default locations causes some problems for guests with
> large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
> GPGPU cards via VFIO).  Obviously, for migration we can only change the
> locations on a new machine type, however.
> 
> This is awkward, because the placement is currently decided within the
> spapr-pci-host-bridge code, so it breaks abstraction to look inside the
> machine type version.
> 
> So, this patch delegates the "default mode" PHB placement from the
> spapr-pci-host-bridge device back to the machine type via a public method
> in sPAPRMachineClass.  It's still a bit ugly, but it's about the best we
> can do.
> 
> For now, this just changes where the calculation is done.  It doesn't
> change the actual location of the host bridges, or any other behaviour.
> 
> Signed-off-by: David Gibson 
> ---
>  hw/ppc/spapr.c  | 34 ++
>  hw/ppc/spapr_pci.c  | 22 --
>  include/hw/pci-host/spapr.h | 11 +--
>  include/hw/ppc/spapr.h  |  4 
>  4 files changed, 47 insertions(+), 24 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 03e3803..f6e9c2a 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2370,6 +2370,39 @@ static HotpluggableCPUList 
> *spapr_query_hotpluggable_cpus(MachineState *machine)
>  return head;
>  }
>  
> +static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
> +uint64_t *buid, hwaddr *pio, hwaddr 
> *pio_size,
> +hwaddr *mmio, hwaddr *mmio_size,
> +unsigned n_dma, uint32_t *liobns, Error 
> **errp)
> +{
> +const uint64_t base_buid = 0x8002000ULL;
> +const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
> +const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
> +const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
> +const hwaddr pio_offset = 0x8000; /* 2 GiB */
> +const uint32_t max_index = 255;
> +
> +hwaddr phb_base;
> +int i;
> +
> +if (index > max_index) {
> +error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
> +   max_index);
> +return;
> +}
> +
> +*buid = base_buid + index;
> +for (i = 0; i < n_dma; ++i) {
> +liobns[i] = SPAPR_PCI_LIOBN(index, i);
> +}
> +
> +phb_base = phb0_base + index * phb_spacing;
> +*pio = phb_base + pio_offset;
> +*pio_size = SPAPR_PCI_IO_WIN_SIZE;
> +*mmio = phb_base + mmio_offset;
> +*mmio_size = SPAPR_PCI_MMIO_WIN_SIZE;
> +}
> +
>  static void spapr_machine_class_init(ObjectClass *oc, void *data)
>  {
>  MachineClass *mc = MACHINE_CLASS(oc);
> @@ -2406,6 +2439,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
> void *data)
>  mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
>  fwc->get_dev_path = spapr_get_fw_dev_path;
>  nc->nmi_monitor_handler = spapr_nmi;
> +smc->phb_placement = spapr_phb_placement;
>  }
>  
>  static const TypeInfo spapr_machine_info = {
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 4f00865..c0fc964 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)
>  sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
>  
>  if (sphb->index != (uint32_t)-1) {
> -hwaddr windows_base;
> +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> +Error *local_err = NULL;
>  
>  if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
> (uint32_t)-1)
>  || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
> @@ -1322,21 +1323,14 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)
>  return;
>  }
>  
> -if (sphb->index > SPAPR_PCI_MAX_INDEX) {
> -error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
> -   SPAPR_PCI_MAX_INDEX);
> +smc->phb_placement(spapr, sphb->index,
> +   &sphb->buid, &sphb->io_win_addr, 
> &sphb->io_win_size,
> +   &sphb->mem_win_addr, &

Re: [Qemu-devel] [Qemu-block] [PATCH] block/gluster: add support for SEEK_DATA/SEEK_HOLE

2016-10-06 Thread Jeff Cody

On Thu, Oct 06, 2016 at 05:09:59PM -0500, Eric Blake wrote:
> On 03/07/2016 01:14 PM, Eric Blake wrote:
> > [adding qemu-devel; ALL patches must cc qemu-devel even when sent to
> > another list]
> > 
> > On 03/07/2016 11:04 AM, Niels de Vos wrote:
> >> GlusterFS 3.8 contains support for SEEK_DATA and SEEK_HOLE. This makes
> >> it possible to detect sparse areas in files.
> >>
> >> Signed-off-by: Niels de Vos 
> >>
> >> --
> >> Tested by compiling and running "qemu-img map gluster://..." with a
> >> build of the current master branch of glusterfs. Using a Fedora
> >> cloud image (in raw format) shows many SEEK procudure calls going back
> >> and forth over the network. The output of "qemu map" matches the output
> >> when run against the image on the local filesystem.
> >> ---
> 
> I hit a weird failure when trying to compile this on an older RHEL 6
> box, where /usr/include/unistd.h is too old to include SEEK_DATA and
> SEEK_HOLE:
> 
> block/gluster.c: In function ‘qemu_gluster_test_seek’:
> block/gluster.c:684: error: ‘SEEK_DATA’ undeclared (first use in this
> function)
> block/gluster.c:684: error: (Each undeclared identifier is reported only
> once
> block/gluster.c:684: error: for each function it appears in.)
> block/gluster.c: In function ‘find_allocation’:
> block/gluster.c:1202: error: ‘SEEK_DATA’ undeclared (first use in this
> function)
> block/gluster.c:1234: error: ‘SEEK_HOLE’ undeclared (first use in this
> function)
> 
> The patch has been in place for several months (which shows how seldom I
> compile on that particular box), but it makes me wonder why none of the
> autobuilders have hit this failure.  But since the code mentions that it
> shamelessly copies from raw-posix.c, and that file in turn has #ifdef
> guards to only do SEEK_HOLE optimizations if the system headers defined
> SEEK_HOLE in the first place, it sounds like you need to do a followup
> patch along those lines.
> 
> 

Ooof.

I just sent a patch for this, but I haven't been able to test it yet.  I'm
in the process of doing that, but since you have a build env already set up
to do it, would you mind trying the patch (just for compilation on RHEL6)?

Thanks,
Jeff

[Qemu-devel] [PATCH 1/1] block: add gluster ifdef guard checks for SEEK_DATA/SEEK_HOLE support

2016-10-06 Thread Jeff Cody

Add checks to see if the system compiling QEMU has support for
SEEK_HOLE/SEEK_DATA.  If the system does not, we will flag that seek
data is unsupported in gluster.

Note: this is not a check on whether the gluster server itself supports
SEEK_DATA (that is already done during runtime), but rather if the
compilation environment supports SEEK_DATA.

Signed-off-by: Jeff Cody 
---

Note: this patch is untested on older systems that do not support SEEK_DATA
(e.g.. RHEL6).  This won't be pulled into my tree until it is verified.


 block/gluster.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index e7bd13c..acb1934 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -672,8 +672,10 @@ static void qemu_gluster_parse_flags(int bdrv_flags, int 
*open_flags)
  */
 static bool qemu_gluster_test_seek(struct glfs_fd *fd)
 {
-off_t ret, eof;
+off_t ret = 0;
+off_t eof;
 
+#if defined SEEK_HOLE && defined SEEK_DATA
 eof = glfs_lseek(fd, 0, SEEK_END);
 if (eof < 0) {
 /* this should never occur */
@@ -682,6 +684,7 @@ static bool qemu_gluster_test_seek(struct glfs_fd *fd)
 
 /* this should always fail with ENXIO if SEEK_DATA is supported */
 ret = glfs_lseek(fd, eof, SEEK_DATA);
+#endif
 return (ret < 0) && (errno == ENXIO);
 }
 
@@ -1185,9 +1188,10 @@ static int find_allocation(BlockDriverState *bs, off_t 
start,
 off_t offs;
 
 if (!s->supports_seek_data) {
-return -ENOTSUP;
+goto exit;
 }
 
+#if defined SEEK_HOLE && defined SEEK_DATA
 /*
  * SEEK_DATA cases:
  * D1. offs == start: start is in data
@@ -1251,6 +1255,9 @@ static int find_allocation(BlockDriverState *bs, off_t 
start,
 
 /* D1 and H1 */
 return -EBUSY;
+#endif
+exit:
+return -ENOTSUP;
 }
 
 /*
-- 
2.7.4

Re: [Qemu-devel] [QEMU PATCH v5 2/6] migration: spapr_drc: defined VMStateDescription struct

2016-10-06 Thread David Gibson

On Wed, Oct 05, 2016 at 12:38:53PM +0100, Dr. David Alan Gilbert wrote:
> * Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
> > To manage hotplug/unplug of dynamic resources such as PCI cards,
> > memory, and CPU on sPAPR guests, a firmware abstraction known as
> > a Dynamic Resource Connector (DRC) is used to assign a particular
> > dynamic resource to the guest, and provide an interface for the
> > guest to manage configuration/removal of the resource associated
> > with it.
> > 
> > To migrate the hotplugged resources in migration, the
> > associated DRC state need be migrated. To migrate the DRC state,
> > we defined the VMStateDescription struct for spapr_drc to enable
> > the transmission of spapr_drc state in migration.
> > 
> > Not all the elements in the DRC state are migrated. Only those
> > ones modifiable or needed by guest actions or device add/remove
> > operation are migrated. From the perspective of device
> > hotplugging, if we hotplug a device on the source, we need to
> > "coldplug" it on the target. The states across two hosts for the
> > same device are not the same. Ideally we want the states be same
> > after migration so that the device would function as hotplugged
> > on the target. For example we can unplug it. The minimum DRC
> > state we need to transfer should cover all the pieces changed by
> > hotplugging. Out of the elements of the DRC state, isolation_state,
> > allocation_sate, and configured are involved in the DR state
> > transition diagram from PAPR+ 2.7, 13.4. configured and signalled
> > are needed in attaching and detaching devices. indicator_state
> > provides users with hardware state information. These 6 elements
> > are migrated.
> > 
> > detach_cb in the DRC state is a function pointer that cannot be
> > migrated. We set it right after DRC state is migrated so that
> > a migrated hot-unplug event could finish its work.
> 
> Be careful with that; it'll get tricky if you have a bunch of different
> possible callbacks.   If you want to explicitly migrate it then you'd
> have to have an enum of different functions that could be called rather
> than storing the pointer explicitly.

Quite.  Looking at the code, I'm quite baffled as to how detach_cb and
detach_cb_opaque get populated in the first place anyway.  All the
assignments I can see seem to be assigning the same variable to itself
(loaded in the caller, passed to another function, stored back to the
same place in the callee).

> 
> > The instance_id is used to identify objects in migration. We set
> > instance_id of DRC using the unique index so that it is the same
> > across migration.
> > 
> > Signed-off-by: Jianjun Duan 
> 
> I think this is OK from a migration point of view; I'll leave
> it to someone else to check the Power side of things.
> 
> > ---
> >  hw/ppc/spapr_drc.c | 69 
> > ++
> >  hw/ppc/spapr_pci.c | 22 +++
> >  include/hw/ppc/spapr_drc.h |  9 ++
> >  3 files changed, 100 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > index 6e54fd4..369ec02 100644
> > --- a/hw/ppc/spapr_drc.c
> > +++ b/hw/ppc/spapr_drc.c
> > @@ -615,6 +615,71 @@ static void spapr_dr_connector_instance_init(Object 
> > *obj)
> >  NULL, NULL, NULL, NULL);
> >  }
> >  
> > +static bool spapr_drc_needed(void *opaque)
> > +{
> > +sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> > +sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > +bool rc = false;
> > +sPAPRDREntitySense value;
> > +
> > +drck->entity_sense(drc, &value);
> > +/* If no dev is plugged in there is no need to migrate the DRC state */
> > +if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) {
> > +return false;
> > +}
> > +/*
> > + * If there is dev plugged in, we need to migrate the DRC state when
> > + * it is different from cold-plugged state
> > + */
> > +switch(drc->type) {
> > +/* for PCI type */
> > +case SPAPR_DR_CONNECTOR_TYPE_PCI:
> > +rc = !((drc->isolation_state == 
> > SPAPR_DR_ISOLATION_STATE_UNISOLATED) &&
> > +   (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) 
> > &&
> > +   drc->configured && drc->signalled && 
> > !drc->awaiting_release);
> > +break;
> > +/* for LMB type */
> > +case SPAPR_DR_CONNECTOR_TYPE_LMB:
> > +rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) 
> > &&
> > +   (drc->allocation_state == 
> > SPAPR_DR_ALLOCATION_STATE_UNUSABLE) &&
> > +   drc->configured && drc->signalled && 
> > !drc->awaiting_release);
> > +break;
> > +default:
> > +;
> > +}
> > +
> > +return rc;
> > +}
> > +
> > +/* detach_cb needs be set since it is not migrated */
> > +static void postmigrate_set_detach_cb(sPAPRDRConnector *drc,
> > +  spapr_drc_detach_cb *detach_cb)
> > +{
> >

Re: [Qemu-devel] [QEMU PATCH v5 2/6] migration: spapr_drc: defined VMStateDescription struct

2016-10-06 Thread David Gibson

On Mon, Oct 03, 2016 at 11:24:53AM -0700, Jianjun Duan wrote:
> To manage hotplug/unplug of dynamic resources such as PCI cards,
> memory, and CPU on sPAPR guests, a firmware abstraction known as
> a Dynamic Resource Connector (DRC) is used to assign a particular
> dynamic resource to the guest, and provide an interface for the
> guest to manage configuration/removal of the resource associated
> with it.
> 
> To migrate the hotplugged resources in migration, the
> associated DRC state need be migrated. To migrate the DRC state,
> we defined the VMStateDescription struct for spapr_drc to enable
> the transmission of spapr_drc state in migration.
> 
> Not all the elements in the DRC state are migrated. Only those
> ones modifiable or needed by guest actions or device add/remove
> operation are migrated. From the perspective of device
> hotplugging, if we hotplug a device on the source, we need to
> "coldplug" it on the target. The states across two hosts for the
> same device are not the same. Ideally we want the states be same
> after migration so that the device would function as hotplugged
> on the target. For example we can unplug it. The minimum DRC
> state we need to transfer should cover all the pieces changed by
> hotplugging. Out of the elements of the DRC state, isolation_state,
> allocation_sate, and configured are involved in the DR state
> transition diagram from PAPR+ 2.7, 13.4. configured and signalled
> are needed in attaching and detaching devices. indicator_state
> provides users with hardware state information. These 6 elements
> are migrated.

Hmm.. are you saying that the DRC state of a coldplugged device (after
we've fully booted) is different from the DRC state of a hotplugged
device (after all the hotplug operations have fully completed)?

If that's correct that sounds like a general bug in the DRC state
management, not something only related to migration.

Looking at the code, though, that doesn't really seem to be what it's
doing.

> detach_cb in the DRC state is a function pointer that cannot be
> migrated. We set it right after DRC state is migrated so that
> a migrated hot-unplug event could finish its work.
> 
> The instance_id is used to identify objects in migration. We set
> instance_id of DRC using the unique index so that it is the same
> across migration.
> 
> Signed-off-by: Jianjun Duan 
> ---
>  hw/ppc/spapr_drc.c | 69 
> ++
>  hw/ppc/spapr_pci.c | 22 +++
>  include/hw/ppc/spapr_drc.h |  9 ++
>  3 files changed, 100 insertions(+)
> 
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 6e54fd4..369ec02 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -615,6 +615,71 @@ static void spapr_dr_connector_instance_init(Object *obj)
>  NULL, NULL, NULL, NULL);
>  }
>  
> +static bool spapr_drc_needed(void *opaque)
> +{
> +sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
> +sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +bool rc = false;
> +sPAPRDREntitySense value;
> +
> +drck->entity_sense(drc, &value);
> +/* If no dev is plugged in there is no need to migrate the DRC state */
> +if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) {
> +return false;
> +}
> +/*
> + * If there is dev plugged in, we need to migrate the DRC state when
> + * it is different from cold-plugged state
> + */
> +switch(drc->type) {
> +/* for PCI type */
> +case SPAPR_DR_CONNECTOR_TYPE_PCI:
> +rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) 
> &&
> +   (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) &&
> +   drc->configured && drc->signalled && !drc->awaiting_release);
> +break;
> +/* for LMB type */
> +case SPAPR_DR_CONNECTOR_TYPE_LMB:
> +rc = !((drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
> +   (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) 
> &&
> +   drc->configured && drc->signalled && !drc->awaiting_release);
> +break;

What about CPU type?z

> +default:
> +;
> +}
> +
> +return rc;
> +}
> +
> +/* detach_cb needs be set since it is not migrated */
> +static void postmigrate_set_detach_cb(sPAPRDRConnector *drc,
> +  spapr_drc_detach_cb *detach_cb)
> +{
> +drc->detach_cb = detach_cb;
> +}
> +
> +/* return the unique drc index as instance_id for qom interfaces*/
> +static int get_instance_id(DeviceState *dev)
> +{
> +return (int)get_index(SPAPR_DR_CONNECTOR(OBJECT(dev)));
> +}
> +
> +static const VMStateDescription vmstate_spapr_drc = {
> +.name = "spapr_drc",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.needed = spapr_drc_needed,
> +.fields  = (VMStateField []) {
> +VMSTATE_UINT32(isolation_state, sPAPRDRConnector),
> +VMSTATE_UINT32(allocat

Re: [Qemu-devel] [QEMU PATCH v5 1/6] migration: alternative way to set instance_id in SaveStateEntry

2016-10-06 Thread David Gibson

On Wed, Oct 05, 2016 at 09:44:57AM -0700, Jianjun Duan wrote:
> Please see comments below:
> 
> On 10/05/2016 03:12 AM, Dr. David Alan Gilbert wrote:
> > * Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
> >> In QOM(QEMU Object Model) migrated objects are identified with instance_id
> >> which is calculated automatically using their path in the QOM composition
> >> tree. For some objects, this path could change from source to target in
> >> migration. To migrate such objects, we need to make sure the instance_id 
> >> does
> >> not change from source to target. We add a hook in DeviceClass to do 
> >> customized
> >> instance_id calculation in such cases.
> > 
> > Can you explain a bit about why the path changes from source to destination;
> > the path here should be a feature of the guest state not the host, and so I
> > don't understand why it changes.
> Please see the discussion with David in the previous versions:
> http://lists.nongnu.org/archive/html/qemu-ppc/2016-06/msg00062.html

Um.. your description above really isn't an accurate summary of that
discussion.

The point is not that the qom path will vary from source to
destination for some arbitrary reason, but rather that we anticipate
future changes in the QOM structure.  Specifically we're considering
eliminating the DRC objects, and folding their (limited) state into an
array in the parent object (either the machine or a PCI host bridge).

That would change the qom paths, and hence the auto-generated instance
ids, which would break migration between qemu versions before and
after the restructure.

I'm not sure that changing the instance ids is enough though, anyway,
since we're talking about eliminating the object entirely, the
class/type information in the migration stream also wouldn't match.

Dave, if you have ideas on how to deal with that, I'd love to hear
them

> 
> >> As a result, in these cases compat will not be set in the concerned
> >> SaveStateEntry. This will prevent the inconsistent idstr to be sent over in
> >> migration. We could have set alias_id in a similar way. But that will be
> >> overloading the purpose of alias_id.
> >>
> >> The first application will be setting instance_id for DRC using its unique
> >> index. Doing this makes the instance_id of DRC to be consistent across 
> >> migration
> >> and supports flexible management of DRC objects in migration.
> > 
> > Is there a reason to use a custom instance_id rather than a custom idstr
> 
> It can be done either way. But it is easier to deal with a integer than
> a string.

A bit, but I don't think that's a good enough reason to introduce a
second mechanism for overriding instance id allocations.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson

signature.asc
Description: PGP signature

Re: [Qemu-devel] [QEMU PATCH v5 5/6] migration: spapr: migrate ccs_list in spapr state

2016-10-06 Thread David Gibson

On Mon, Oct 03, 2016 at 11:24:56AM -0700, Jianjun Duan wrote:
> ccs_list in spapr state maintains the device tree related
> information on the rtas side for hotplugged devices. In racing
> situations between hotplug events and migration operation, a rtas
> hotplug event could be migrated from the source guest to target
> guest, or the source guest could have not yet finished fetching
> the device tree when migration is started, the target will try
> to finish fetching the device tree. By migrating ccs_list, the
> target can fetch the device tree properly.
> 
> ccs_list is put in a subsection in the spapr state VMSD to make
> sure migration across different versions is not broken.
> 
> Signed-off-by: Jianjun Duan 

I'm still not entirely convinced we need to migrate the ccs_list.
What would happen if we did this:

   * Keep a flag which indicates whether the guest is in the middle of
 the configure_connector process.
   - I'm not sure if that would need to be a new bit of state, or
 if we could deduce it from the value of the isolation and
 allocation states
   - If it's new state, we'd need to migrate it, obviously not if
 we can derive it from other state flags

   * On the destination during post_load, if there was an in-progress
 configure_connector on the source, we set another "stale
 configure" flag

   * When a configure_connector call is attempted on the destination
 with the stale configure flag set, return an error

The question is, if we choose the right error, can we get the guest to
either restart the configure from scratch, or fail gracefully, so the
operator can restart the hotplug

> ---
>  hw/ppc/spapr.c | 34 ++
>  1 file changed, 34 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 63b6a0d..1847d35 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1255,6 +1255,36 @@ static bool version_before_3(void *opaque, int 
> version_id)
>  return version_id < 3;
>  }
>  
> +static bool spapr_ccs_list_needed(void *opaque)
> +{
> +sPAPRMachineState *spapr = (sPAPRMachineState *)opaque;
> +return !QTAILQ_EMPTY(&spapr->ccs_list);
> +}
> +
> +static const VMStateDescription vmstate_spapr_ccs = {
> +.name = "spaprconfigureconnectorstate",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.fields = (VMStateField[]) {
> +VMSTATE_UINT32(drc_index, sPAPRConfigureConnectorState),
> +VMSTATE_INT32(fdt_offset, sPAPRConfigureConnectorState),
> +VMSTATE_INT32(fdt_depth, sPAPRConfigureConnectorState),
> +VMSTATE_END_OF_LIST()
> +},
> +};
> +
> +static const VMStateDescription vmstate_spapr_ccs_list = {
> +.name = "spaprccslist",
> +.version_id = 1,
> +.minimum_version_id = 1,
> +.needed = spapr_ccs_list_needed,
> +.fields = (VMStateField[]) {
> +VMSTATE_QTAILQ_V(ccs_list, sPAPRMachineState, 1,
> + vmstate_spapr_ccs, sPAPRConfigureConnectorState, 
> next),
> +VMSTATE_END_OF_LIST()
> +},
> +};
> +
>  static const VMStateDescription vmstate_spapr = {
>  .name = "spapr",
>  .version_id = 3,
> @@ -1270,6 +1300,10 @@ static const VMStateDescription vmstate_spapr = {
>  VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2),
>  VMSTATE_END_OF_LIST()
>  },
> +.subsections = (const VMStateDescription*[]) {
> +&vmstate_spapr_ccs_list,
> +NULL
> +}
>  };
>  
>  static int htab_save_setup(QEMUFile *f, void *opaque)

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [QEMU PATCH v5 4/6] migration: migrate QTAILQ

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 08:01:56PM +0100, Dr. David Alan Gilbert wrote:
> * Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
> > 
> > 
> > On 10/05/2016 09:56 AM, Dr. David Alan Gilbert wrote:
> > > * Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
> > >> Currently we cannot directly transfer a QTAILQ instance because of the
> > >> limitation in the migration code. Here we introduce an approach to
> > >> transfer such structures. In our approach such a structure is tagged
> > >> with VMS_LINKED. We then modified vmstate_save_state and 
> > >> vmstate_load_state
> > >> so that when VMS_LINKED is encountered, put and get from VMStateInfo are
> > >> called respectively. We created VMStateInfo vmstate_info_qtailq for 
> > >> QTAILQ.
> > >> Similar VMStateInfo can be created for other data structures such as 
> > >> list.
> > >> This approach will be used to transfer pending_events and ccs_list in 
> > >> spapr
> > >> state.
> > >>
> > >> We also create some macros in qemu/queue.h to access a QTAILQ using 
> > >> pointer
> > >> arithmetic. This ensures that we do not depend on the implementation
> > >> details about QTAILQ in the migration code.
> > > 
> > > I think we're going to need a way to have a more flexible
> > > loops; and thus my choice here wouldn't be to use the .get/.put together
> > > with the VMSD; but I think we'll end up needing a new
> > > data structure, maybe a VMStateLoop *loop in VMStateField.
> > > 
> > > So would it be easier if you added that new member, then you wouldn't 
> > > have to
> > > modify every get() and put() function that already exists in the previous 
> > > patch.
> > > 
> > > Specifically, your format of QTAILQ is perfectly reasonable - a
> > > byte before each entry which is 1 to indicate there's an entry or 0
> > > to indicate termination, but there are lots of other variants, e.g.
> > > 
> > >a) put_scsi_requests uses that byte to hold a flag, so it's 0,1,2
> > >   0 still means terminate but 1 or 2 set a flag in the structure.
> > 
> > I quickly take a look of put_scsi_requests. It is transferring a QTAILQ of
> > SCSIRequest. However it goes into the structure inside to dump the
> > elements out.
> > If using my approach, I would have a VMSD for SCSIRequest. The
> > additional byte used to indicate the end of the queue would lie outside
> > the SCSCIRequest data block, so there would be no confusion.
> 
> Hmm OK; I don't think it's that easy but we'll see.
> 
> However, can I make one much simpler request; please split this patch
> so that the VMSTATE_LINKED and 
> vmstate_save_state/vmstate_load_state/vmfield_get_type_name
> are in one patch, while the QTAILQ patches are in a separate patch.
> (I'd be OK if you moved the VMSTATE_LINKED into the previous patch).
> 
> I've just been thinking about a different use for the same mechanism;
> I want to do a:
>   VMSTATE_WITH_TMP(t1*, type1, type2, vmsd)
> 
> which also sets the LINKED, where the .get/.put allocate a temporary
> structure (of type/size type2), set up *tmp = t1 and then do the 
> vmstate_load/save
> using the vmsd on the temporary; something like (untested):
> 
> static int get_tmp(QEMUFile *f, void *pv, size_t unused_size, VMStateField 
> *field)
> {
> const VMStateDescription *vmsd = field->vmsd;
> size_t size = field->size;
> int version_id = field->version_id;
> void *tmp = gmalloc(size);
> int ret;
> 
> *(void **)tmp = pv;
> ret = vmstate_load_state(f, vmsd, tmp, version_id);
> gfree(tmp);
> return ret;
> }
> 
> This can be in a generic macro; and we would impose that type2 must be a 
> struct
> with the first element is 'type1* parent' (compile checked).
> This would work nicely for where we have to do some maths to generate some
> temporary results prior to migration; the .pre_save of the vmsd can read the 
> data
> from pv->parent and write it to the other fields but not have to use
> qemu_get_*/qemu_put_* at all.
> 
> Dave

Oh, I like this idea.  I know there are a number of places where
should-be-obsolete fields are still present in structures purely to
catch incoming migration info which is then converted to the modern
representation in post_load.  This would allow cleaning a bunch of
those up.

It would also mean we don't necessarily need explicit handling of
queues/lists.  I objected to early versions of this series which
dumped the qtailq into an array and used the existing array vmstate
types, because it meant not just an only-for-migration field in the
structure, but a substantial slab of only-for-migration data.

If we added the concept of temporary "catching" structures to the
vmsd, that objection would go away.  I'd be happy enough to
temporarily dump the queue into an array, transfer that over the
stream into another temporary array, then load it into the destination
queue.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_

[Qemu-devel] [PATCH v2] tap-bsd: OpenBSD uses tap(4) now

2016-10-06 Thread Brad Smith

Update the tap-bsd code now that OpenBSD uses tap(4).

Signed-off-by: Brad Smith 
---
v2: Allow the code to deal with newer vs older OpenBSD releases

diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index c506ac3..6c96922 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -35,6 +35,10 @@
 #include 
 #endif
 
+#if defined(__OpenBSD__)
+#include 
+#endif
+
 #ifndef __FreeBSD__
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
  int vnet_hdr_required, int mq_required, Error **errp)
@@ -55,7 +59,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
 if (*ifname) {
 snprintf(dname, sizeof dname, "/dev/%s", ifname);
 } else {
-#if defined(__OpenBSD__)
+#if defined(__OpenBSD__) && OpenBSD < 201605
 snprintf(dname, sizeof dname, "/dev/tun%d", i);
 #else
 snprintf(dname, sizeof dname, "/dev/tap%d", i);

Re: [Qemu-devel] [PATCH] qtest: add read/write accessors with a specific endianness

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 12:03:34PM +0100, Peter Maydell wrote:
> On 6 October 2016 at 04:38, David Gibson  wrote:
> > On Wed, Oct 05, 2016 at 05:31:07AM -0700, Peter Maydell wrote:
> >> On 4 October 2016 at 16:43, David Gibson  
> >> wrote:
> >> > On Tue, Oct 04, 2016 at 01:36:09PM +0100, Peter Maydell wrote:
> >> >> The difficulty with this patch is that it's hard to tell whether
> >> >> it's really required, or if this is just adding an extra layer
> >> >> of byteswapping that should really be done in some other location
> >> >
> >> > Actually, it's neither.  It's not essential for anything, but it
> >> > *removes* an extra layer of byteswapping that really never should have
> >> > been done in the first place.
> >>
> >> The patch is very clearly adding calls to swapping functions.
> >> It looks like it's mostly convenience functions for not doing
> >> those swaps explicitly in the test cases.
> >
> > It's adding 1 swap on top of the memread/memwrite path - that's the
> > path which had no existing swaps (intended primarily for bag-o'-bytes
> > block access AFAICT).
> 
> Yeah, I hadn't noticed it was using the memread/memwrite path.
> I disagree with using that code path for what ought to be
> register read/writes (among other things, it's not clear to me
> that it guarantees that a 4-byte access by the test code is
> always a 4-byte access on the device, etc).

I was concerned about that when I first saw it as well.

But, I traced the path and both the memread() path and the readw/readl
path backend onto cpu_physical_memory_read() with the appropriate
width.  So if the proposed ops are broken in this way, so are
readw/readl.

If we're concerned that that won't always be the case we can still
implement the same operations, but instead of having them be just
wrappers on memread/memwrite, they'd be new primitives passed over the
pipe to the qtest accelerator.

> 
> >> >> in the stack. What's the actual test case here?
> >> >
> >> > The current readw, readl, etc. all work in "guest endianness".  But
> >> > guest endianness is not well defined - there are a number of targets
> >> > which can support either.
> >>
> >> It's guest bus endianness, and it's pretty well defined I think.
> >> (ARM for instance is LE bus even if the CPU is doing BE writes.)
> >
> > I don't see that guest bus endianness is any better defined, or any
> > more useful than "guest endianness".  It might have a vague meaning
> > for ARM (or embedded Power) in the sense that the on-SoC devices will
> > use that endianness.  But since the SoC devices are generally unique
> > to the architecture anyway, you still know their endianness
> > independent of any notion of guest endianness.
> 
> It means "the endianness that you would see if you could snoop
> the data bus at the output of the CPU".

Hrm.  I see two cases here, neither of them makes this clear:

1) The bus spec defines which data lines are MSB, and which are LSB.
In this case, the endianness depends on how those are mapped to byte
addresses when you get out to RAM - that could involve several
intermediate bridges.

2) The bus spec defines which data lines are byte0, 1, 2, etc (in byte
address order).  In this case it really is the CPU which determines
the endianness of its accesses - and that in turn could depend on
modes, TLB entries or instruction formss within the CPU.  So the
question is: the endianness you see if you snoop the bus when the CPU
does... what.. exactly?

> It is definitely well
> defined for every QEMU target because that's what TARGET_WORDS_BIGENDIAN
> tells you. It's not the same as whatever the device thinks it
> might interpret values as (though obviously people don't
> often design systems where they differ).

Honestly I think TAGET_WORDS_BIGENDIAN is just a hangover from when
nearly all CPUs worked exclusively in one endianness.  It's never
terribly well defined.

> >> >  And it's doubly meaningless since it's a
> >> > property of the guest cpu, which we're essentially replacing with the
> >> > qtest stub anyway.
> >>
> >> The stub sits on the same bus the guest cpu would.
> >>
> >> > Furthermore "guest endianness" isn't useful.  With a tiny handful of
> >> > exceptions, all peripherals have their own endianness which is known
> >> > independent of the target.  It makes more sense for test cases to
> >> > explicitly do their accesses in the correct endianness for the device,
> >> > without having to compensate for the fact that it'll be swapped into
> >> > the essentially arbitrary "guest endianness" along the way.
> >>
> >> Here I definitely disagree. I think it makes much more sense
> >> for writes to be "what the guest CPU would write", because that's
> >> where we're injecting them. If we had a test framework where the
> >> test was talking directly to the device, then maybe, but we don't.
> >
> > When I say that guest endianness is not well defined, what I mean is
> > precisely that "what the guest CPU would write" is not well defined.
> > For example

Re: [Qemu-devel] [PATCH] tests: minor cleanups in usb-hcd-uhci-test

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 04:50:48PM +0200, Laurent Vivier wrote:
> Two minor cleanups:
> - exit gracefully in case on unsupported target,
> - put machine command line in a constant to avoid
>   to duplicate it.
> 
> Signed-off-by: Laurent Vivier 

Reviewed-by: David Gibson 

I'm not sure if I should take this through my tree or not.

> ---
>  tests/usb-hcd-uhci-test.c | 15 +--
>  1 file changed, 9 insertions(+), 6 deletions(-)
> 
> diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c
> index 4b951ce..e956b9c 100644
> --- a/tests/usb-hcd-uhci-test.c
> +++ b/tests/usb-hcd-uhci-test.c
> @@ -77,6 +77,9 @@ static void test_usb_storage_hotplug(void)
>  int main(int argc, char **argv)
>  {
>  const char *arch = qtest_get_arch();
> +const char *cmd = "-device piix3-usb-uhci,id=uhci,addr=1d.0"
> +  " -drive id=drive0,if=none,file=/dev/null,format=raw"
> +  " -device usb-tablet,bus=uhci.0,port=1";
>  int ret;
>  
>  g_test_init(&argc, &argv, NULL);
> @@ -87,13 +90,13 @@ int main(int argc, char **argv)
>  qtest_add_func("/uhci/pci/hotplug/usb-storage", 
> test_usb_storage_hotplug);
>  
>  if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
> -qs = qtest_pc_boot("-device piix3-usb-uhci,id=uhci,addr=1d.0"
> -   " -drive 
> id=drive0,if=none,file=/dev/null,format=raw"
> -   " -device usb-tablet,bus=uhci.0,port=1");
> +qs = qtest_pc_boot(cmd);
>  } else if (strcmp(arch, "ppc64") == 0) {
> -qs = qtest_spapr_boot("-device piix3-usb-uhci,id=uhci,addr=1d.0"
> -   " -drive 
> id=drive0,if=none,file=/dev/null,format=raw"
> -   " -device usb-tablet,bus=uhci.0,port=1");
> +qs = qtest_spapr_boot(cmd);
> +} else {
> +g_printerr("usb-hcd-uhci-test tests are only "
> +   "available on x86 or ppc64\n");
> +exit(EXIT_FAILURE);
>  }
>  ret = g_test_run();
>  qtest_shutdown(qs);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [Bug 1630723] [NEW] UART writes to netduino2/stm32f205-soc disappear

2016-10-06 Thread Alistair Francis

QEMU only supports the Netduino (not Netduino 2) it is possible that
the base addresses are different and that is why you aren't seeing the
serial output.

Thanks,

Alistair


On Wed, Oct 5, 2016 at 11:56 AM, Seth  wrote:
> Public bug reported:
>
> Writes to UART 2 and 3 disappear. As a sanity check I put printf
> statements in the function stm32f2xx_usart_write in
> qemu/hw/char/stm32f2xx_usart.c and recompiled qemu. The result confirmed
> text sent to UART1 and UART4 are sent to that function while text sent
> to UART 2 and 3 are not. I assume writes to all 4 need to make it to
> that function for emulations to operate correctly.
>
> Example code that writes to all 4 UARTs/USARTs (does not contain the printf 
> statements mention above):
> https://github.com/skintigh/baremetal_netduino2
>
> ** Affects: qemu
>  Importance: Undecided
>  Status: New
>
> --
> You received this bug notification because you are a member of qemu-
> devel-ml, which is subscribed to QEMU.
> https://bugs.launchpad.net/bugs/1630723
>
> Title:
>   UART writes to netduino2/stm32f205-soc disappear
>
> Status in QEMU:
>   New
>
> Bug description:
>   Writes to UART 2 and 3 disappear. As a sanity check I put printf
>   statements in the function stm32f2xx_usart_write in
>   qemu/hw/char/stm32f2xx_usart.c and recompiled qemu. The result
>   confirmed text sent to UART1 and UART4 are sent to that function while
>   text sent to UART 2 and 3 are not. I assume writes to all 4 need to
>   make it to that function for emulations to operate correctly.
>
>   Example code that writes to all 4 UARTs/USARTs (does not contain the printf 
> statements mention above):
>   https://github.com/skintigh/baremetal_netduino2
>
> To manage notifications about this bug go to:
> https://bugs.launchpad.net/qemu/+bug/1630723/+subscriptions
>

Re: [Qemu-devel] [RFC 1/4] spapr_pci: Delegate placement of PCI host bridges to machine type

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 11:36:12AM +0200, Laurent Vivier wrote:
> 
> 
> On 06/10/2016 05:03, David Gibson wrote:
> > The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
> > for a PAPR guest.  Unlike on x86, it's routine on Power (both bare metal
> > and PAPR guests) to have numerous independent PHBs, each controlling a
> > separate PCI domain.
> > 
> > There are two ways of configuring the spapr-pci-host-bridge device: first
> > it can be done fully manually, specifying the locations and sizes of all
> > the IO windows.  This gives the most control, but is very awkward with 6
> > mandatory parameters.  Alternatively just an "index" can be specified
> > which essentially selects from an array of predefined PHB locations.
> > The PHB at index 0 is automatically created as the default PHB.
> > 
> > The current set of default locations causes some problems for guests with
> > large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
> > GPGPU cards via VFIO).  Obviously, for migration we can only change the
> > locations on a new machine type, however.
> > 
> > This is awkward, because the placement is currently decided within the
> > spapr-pci-host-bridge code, so it breaks abstraction to look inside the
> > machine type version.
> > 
> > So, this patch delegates the "default mode" PHB placement from the
> > spapr-pci-host-bridge device back to the machine type via a public method
> > in sPAPRMachineClass.  It's still a bit ugly, but it's about the best we
> > can do.
> > 
> > For now, this just changes where the calculation is done.  It doesn't
> > change the actual location of the host bridges, or any other behaviour.
> > 
> > Signed-off-by: David Gibson 
> > ---
> >  hw/ppc/spapr.c  | 34 ++
> >  hw/ppc/spapr_pci.c  | 22 --
> >  include/hw/pci-host/spapr.h | 11 +--
> >  include/hw/ppc/spapr.h  |  4 
> >  4 files changed, 47 insertions(+), 24 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 03e3803..f6e9c2a 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2370,6 +2370,39 @@ static HotpluggableCPUList 
> > *spapr_query_hotpluggable_cpus(MachineState *machine)
> >  return head;
> >  }
> >  
> > +static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
> > +uint64_t *buid, hwaddr *pio, hwaddr 
> > *pio_size,
> > +hwaddr *mmio, hwaddr *mmio_size,
> > +unsigned n_dma, uint32_t *liobns, Error 
> > **errp)
> > +{
> > +const uint64_t base_buid = 0x8002000ULL;
> > +const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
> > +const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
> > +const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
> > +const hwaddr pio_offset = 0x8000; /* 2 GiB */
> > +const uint32_t max_index = 255;
> > +
> > +hwaddr phb_base;
> > +int i;
> > +
> > +if (index > max_index) {
> > +error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
> > +   max_index);
> > +return;
> > +}
> > +
> > +*buid = base_buid + index;
> > +for (i = 0; i < n_dma; ++i) {
> > +liobns[i] = SPAPR_PCI_LIOBN(index, i);
> > +}
> > +
> > +phb_base = phb0_base + index * phb_spacing;
> > +*pio = phb_base + pio_offset;
> > +*pio_size = SPAPR_PCI_IO_WIN_SIZE;
> > +*mmio = phb_base + mmio_offset;
> > +*mmio_size = SPAPR_PCI_MMIO_WIN_SIZE;
> > +}
> > +
> >  static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >  {
> >  MachineClass *mc = MACHINE_CLASS(oc);
> > @@ -2406,6 +2439,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
> > void *data)
> >  mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
> >  fwc->get_dev_path = spapr_get_fw_dev_path;
> >  nc->nmi_monitor_handler = spapr_nmi;
> > +smc->phb_placement = spapr_phb_placement;
> >  }
> >  
> >  static const TypeInfo spapr_machine_info = {
> > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> > index 4f00865..c0fc964 100644
> > --- a/hw/ppc/spapr_pci.c
> > +++ b/hw/ppc/spapr_pci.c
> > @@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> > **errp)
> >  sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
> >  
> >  if (sphb->index != (uint32_t)-1) {
> > -hwaddr windows_base;
> > +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > +Error *local_err = NULL;
> >  
> >  if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
> > (uint32_t)-1)
> >  || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 
> > 2)
> > @@ -1322,21 +1323,14 @@ static void spapr_phb_realize(DeviceState *dev, 
> > Error **errp)
> >  return;
> >  }
> >  
> > -if (sphb->index > SPAPR_PCI_MAX_INDEX) {
> > -

Re: [Qemu-devel] Simulating 3 chips on one board

2016-10-06 Thread Alistair Francis

On Wed, Sep 28, 2016 at 4:36 PM, Alex Bennée  wrote:
>
> Seth K  writes:
>
>> I need to simulate 3 chips that are on one board and that talk to each
>> other through UART, SPI and GPIO. The chips verify each other's work, and I
>> need to be able to observe this communication for debugging. Can something
>> like this be done in QEMU?
>
> As Peter has mentioned this isn't supported by upstream. There are a
> number of out-of-tree QEMU's that have attempted to solve this problem
> which you might want to look at. Typically this involves some sort of
> IPC interface to share machine details. The one I'm aware of is the
> Xilinx Zynq:
>
> 
> http://www.wiki.xilinx.com/x--Running%20a%20Zynq%20UltraScale+%20Linux%20Kernel%20Image%20On%20Xilinx's%20ARM/PMU%20QEMU
>
> You might want to look at how they do things for some ideas. I'm afraid
> I've never run it myself so you'll have to investigate.

The Xilinx solution might be a good place to start. We use remote port
(which is a communication framework on top of TCP/unix sockets) to
communicate between different instances of QEMU in different
processes. It's pretty cool!

That wiki Alex mentioned is a good place to start. Everything we do is
public on GitHub and available to use.

Thanks,

Alistair

>
>>
>> My first thought was to create the chip then create a board/machine with 1
>> chip, and run 3 instances of QEMU on the host and have them talk to each
>> other via the host (/dev/uart7 for example) but that doesn't seem to be
>> possible. It seems QEMU cannot output 8 UARTS (I can't get more than 1) or
>> any GPIOs. Is that correct? Not sure about SPIs either.
>>
>> My next thought was to make 1 board with all three chips, but have some way
>> to sniff the UARTs/SPIs/GPIOs between chips. Is that possible in QEMU?
>
> I suspect the first approach is going to be the easier one to do. You
> could use sockets to link multiple QEMUs together but its probably a
> fair bit of work to get to that point.
>
> Some precursor work for building a heterogeneous QEMU (with multiple CPU
> types) was merged but I think the effort has stalled somewhat and needed
> a lot more work doing.
>
> --
> Alex Bennée
>

Re: [Qemu-devel] [PATCH] qtest: ask endianness of the target in qtest_init()

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 10:46:22PM +0200, Greg Kurz wrote:
> On Thu,  6 Oct 2016 20:56:58 +0200
> Laurent Vivier  wrote:
> 
> > The target endianness is not deduced anymore from
> > the architecture name but asked directly to the guest,
> > using a new qtest command: "endianness". As it can't
> > change (this is the value of TARGET_WORDS_BIGENDIAN),
> > we store it to not have to ask every time we want to
> > know if we have to byte-swap a value.
> > 
> > Signed-off-by: Laurent Vivier 
> > CC: Greg Kurz 
> > CC: David Gibson 
> > CC: Peter Maydell 
> > ---
> > Note: this patch can be seen as a v2 of
> > "qtest: evaluate endianness of the target in qtest_init()"
> > from the patch series "tests: enable virtio tests on SPAPR"
> > in which I have added the idea from Peter to ask the endianness
> > directly to the guest using a new qtest command.
> > 
> 
> This is definitely an improvement indeed.
> 
> Reviewed-by: Greg Kurz 

It is an improvement.  But I still think if we're relying on the
ill-defined "target endianness" we're already doing something wrong.

> 
> >  qtest.c   |   7 ++
> >  tests/libqos/virtio-pci.c |   2 +-
> >  tests/libqtest.c  | 224 
> > --
> >  tests/libqtest.h  |  16 +++-
> >  tests/virtio-blk-test.c   |   2 +-
> >  5 files changed, 118 insertions(+), 133 deletions(-)
> > 
> > diff --git a/qtest.c b/qtest.c
> > index 22482cc..b53b39c 100644
> > --- a/qtest.c
> > +++ b/qtest.c
> > @@ -537,6 +537,13 @@ static void qtest_process_command(CharDriverState 
> > *chr, gchar **words)
> >  
> >  qtest_send_prefix(chr);
> >  qtest_send(chr, "OK\n");
> > +} else if (strcmp(words[0], "endianness") == 0) {
> > +qtest_send_prefix(chr);
> > +#if defined(TARGET_WORDS_BIGENDIAN)
> > +qtest_sendf(chr, "OK big\n");
> > +#else
> > +qtest_sendf(chr, "OK little\n");
> > +#endif
> >  #ifdef TARGET_PPC64
> >  } else if (strcmp(words[0], "rtas") == 0) {
> >  uint64_t res, args, ret;
> > diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
> > index 18b92b9..6e005c1 100644
> > --- a/tests/libqos/virtio-pci.c
> > +++ b/tests/libqos/virtio-pci.c
> > @@ -86,7 +86,7 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice 
> > *d, uint64_t addr)
> >  int i;
> >  uint64_t u64 = 0;
> >  
> > -if (qtest_big_endian()) {
> > +if (target_big_endian()) {
> >  for (i = 0; i < 8; ++i) {
> >  u64 |= (uint64_t)qpci_io_readb(dev->pdev,
> >  (void *)(uintptr_t)addr + i) << (7 - i) * 
> > 8;
> > diff --git a/tests/libqtest.c b/tests/libqtest.c
> > index 6f6bdf1..27cf6b1 100644
> > --- a/tests/libqtest.c
> > +++ b/tests/libqtest.c
> > @@ -37,6 +37,7 @@ struct QTestState
> >  bool irq_level[MAX_IRQ];
> >  GString *rx;
> >  pid_t qemu_pid;  /* our child QEMU process */
> > +bool big_endian;
> >  };
> >  
> >  static GHookList abrt_hooks;
> > @@ -146,89 +147,6 @@ void qtest_add_abrt_handler(GHookFunc fn, const void 
> > *data)
> >  g_hook_prepend(&abrt_hooks, hook);
> >  }
> >  
> > -QTestState *qtest_init(const char *extra_args)
> > -{
> > -QTestState *s;
> > -int sock, qmpsock, i;
> > -gchar *socket_path;
> > -gchar *qmp_socket_path;
> > -gchar *command;
> > -const char *qemu_binary;
> > -
> > -qemu_binary = getenv("QTEST_QEMU_BINARY");
> > -g_assert(qemu_binary != NULL);
> > -
> > -s = g_malloc(sizeof(*s));
> > -
> > -socket_path = g_strdup_printf("/tmp/qtest-%d.sock", getpid());
> > -qmp_socket_path = g_strdup_printf("/tmp/qtest-%d.qmp", getpid());
> > -
> > -sock = init_socket(socket_path);
> > -qmpsock = init_socket(qmp_socket_path);
> > -
> > -qtest_add_abrt_handler(kill_qemu_hook_func, s);
> > -
> > -s->qemu_pid = fork();
> > -if (s->qemu_pid == 0) {
> > -setenv("QEMU_AUDIO_DRV", "none", true);
> > -command = g_strdup_printf("exec %s "
> > -  "-qtest unix:%s,nowait "
> > -  "-qtest-log %s "
> > -  "-qmp unix:%s,nowait "
> > -  "-machine accel=qtest "
> > -  "-display none "
> > -  "%s", qemu_binary, socket_path,
> > -  getenv("QTEST_LOG") ? "/dev/fd/2" : 
> > "/dev/null",
> > -  qmp_socket_path,
> > -  extra_args ?: "");
> > -execlp("/bin/sh", "sh", "-c", command, NULL);
> > -exit(1);
> > -}
> > -
> > -s->fd = socket_accept(sock);
> > -if (s->fd >= 0) {
> > -s->qmp_fd = socket_accept(qmpsock);
> > -}
> > -unlink(socket_path);
> > -unlink(qmp_socket_path);
> > -g_free(socket_path);
> > -g_free(qmp_socket_path);
> > -
> > -g_assert(s->fd >= 0 && s->qmp_fd >= 0);
> > -
> > -s-

Re: [Qemu-devel] [PATCH] qtest: add read/write accessors with a specific endianness

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 05:59:00PM +0200, Laurent Vivier wrote:
> 
> 
> On 06/10/2016 17:41, Peter Maydell wrote:
> > On 6 October 2016 at 16:36, Paolo Bonzini  wrote:
> >>
> >>
> >> On 06/10/2016 16:11, Greg Kurz wrote:
> >>> FWIW, Cedric had another proposal which apparently went unnoticed:
> >>>
> >>> 
> >>>
> >>> The idea is to add an optional endianness argument to the read*/write*
> >>> commands in the qtest protocol:
> >>> - libqtest then provides explicit _le and _be APIs
> >>> - no extra byteswap is performed on the test program side: qtest
> >>>   actually handles that and does exactly 1 or 0 byteswap.
> >>> - it does not use memread/memwrite
> >>> - the current 'guest native' API where qtest tswaps is preserved
> >>>
> >>
> >> No, this is a worse idea, because the right place to do the swap is in
> >> the "program" (libqtest) not in the "CPU" (QEMU).
> > 
> > Speaking of the right place to do things, perhaps we should
> > reimplement qtest_big_endian() in libqtest.c to send a query
> > to the QEMU-under-test to ask it what TARGET_BIG_ENDIAN says,
> > rather than hardcoding a big list of architectures...
> 
> Yes, it's a good idea.

I disagree.

TARGET_BIG_ENDIAN is simply not well defined - we should avoid using
it at all.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] qtest: add read/write accessors with a specific endianness

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 05:36:32PM +0200, Paolo Bonzini wrote:
> 
> 
> On 06/10/2016 16:11, Greg Kurz wrote:
> > FWIW, Cedric had another proposal which apparently went unnoticed:
> > 
> > 
> > 
> > The idea is to add an optional endianness argument to the read*/write*
> > commands in the qtest protocol:
> > - libqtest then provides explicit _le and _be APIs
> > - no extra byteswap is performed on the test program side: qtest
> >   actually handles that and does exactly 1 or 0 byteswap.
> > - it does not use memread/memwrite
> > - the current 'guest native' API where qtest tswaps is preserved
> > 
> 
> No, this is a worse idea, because the right place to do the swap is in
> the "program" (libqtest) not in the "CPU" (QEMU).

Hrm.. I guess that makes sense from an x86 perspective when
load/stores always operate in LE.  Not so much for something like
Power where the CPU can perform both LE and BE load/stores trivially.
You can select with CPU mode combined with which instruction form you
use.  e.g. the always-LE writel() on a BE Power kernel is a single
byte-reversed store instruction[0].  there's no "swap" as such, and the
swapped value never appears in a register.  I'm not certain if gcc is
smart enough to translate foo->bar = cpu_to_le32(val) into a
byte-reversed store, but it might be.

The value passed across the pipe to readw etc. is text, so it has no
endianness, just as a value in a cpu register has no endianness.  To
me it makes perfect sense to tell the qtest "cpu" which endianness of
load/store you want it to do with that.

[0] Well, ok, there's a memory barrier too, so it's not quite 1
instruction.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson

signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] qtest: add read/write accessors with a specific endianness

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 04:11:42PM +0200, Greg Kurz wrote:
> On Thu, 6 Oct 2016 12:03:34 +0100
> Peter Maydell  wrote:
> 
> > On 6 October 2016 at 04:38, David Gibson  
> > wrote:
> > > On Wed, Oct 05, 2016 at 05:31:07AM -0700, Peter Maydell wrote:  
> > >> On 4 October 2016 at 16:43, David Gibson  
> > >> wrote:  
> > >> > On Tue, Oct 04, 2016 at 01:36:09PM +0100, Peter Maydell wrote:  
> > >> >> The difficulty with this patch is that it's hard to tell whether
> > >> >> it's really required, or if this is just adding an extra layer
> > >> >> of byteswapping that should really be done in some other location  
> > >> >
> > >> > Actually, it's neither.  It's not essential for anything, but it
> > >> > *removes* an extra layer of byteswapping that really never should have
> > >> > been done in the first place.  
> > >>
> > >> The patch is very clearly adding calls to swapping functions.
> > >> It looks like it's mostly convenience functions for not doing
> > >> those swaps explicitly in the test cases.  
> > >
> > > It's adding 1 swap on top of the memread/memwrite path - that's the
> > > path which had no existing swaps (intended primarily for bag-o'-bytes
> > > block access AFAICT).  
> > 
> > Yeah, I hadn't noticed it was using the memread/memwrite path.
> > I disagree with using that code path for what ought to be
> > register read/writes (among other things, it's not clear to me
> > that it guarantees that a 4-byte access by the test code is
> > always a 4-byte access on the device, etc).
> > 
> 
> FWIW, Cedric had another proposal which apparently went unnoticed:
> 
> 
> 
> The idea is to add an optional endianness argument to the read*/write*
> commands in the qtest protocol:
> - libqtest then provides explicit _le and _be APIs
> - no extra byteswap is performed on the test program side: qtest
>   actually handles that and does exactly 1 or 0 byteswap.
> - it does not use memread/memwrite
> - the current 'guest native' API where qtest tswaps is preserved

That would also be fine by me.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] qtest: add read/write accessors with a specific endianness

2016-10-06 Thread David Gibson

On Thu, Oct 06, 2016 at 11:47:36AM +0100, Peter Maydell wrote:
> On 6 October 2016 at 04:45, David Gibson  wrote:
> > On Wed, Oct 05, 2016 at 07:20:52AM -0700, Peter Maydell wrote:
> >> On 5 October 2016 at 07:00, Cédric Le Goater  wrote:
> >> > On 10/05/2016 03:53 PM, Peter Maydell wrote:
> >> >> Which tswap? Last time I worked through the stack of
> >> >> what happens I thought that we had the right set of
> >> >> swaps in the right places.
> >> >
> >> > The one I am talking about are under qtest_process_command(),
> >> > see below.
> >>
> >> Those are correct and required, and they do not change
> >> the overall behaviour of the system depending on the host
> >> endianness. (They convert 32-bit values to "bag of
> >> bytes in guest order" which is what the cpu_physical_memory_*
> >> functions want.)
> >
> > These functions are correct for the defined semantics of the
> > readw/readl operations, but those semantics are not useful.
> 
> ?? They're the most obvious and required semantics: "act
> like the CPU just did a word/halfword/byte read/write".

The CPU with what mode and options?

> There's a reason we've got this far without needing anything
> else, and it's that this is the most straightforward use case.

No, I'm pretty sure we've got this far because most of the tests
haven't yet been enabled for a traditionally BE target.

> > This proposal is introducing alternate functions with the more useful
> > semantics which are "convert a 32-bit value to a bag of bytes in LE
> > order" or "convert a 32-bit value to a bag of bytes in BE order"
> > depending on which variant you choose.
> 
> It's adding functions whose semantics are "act like the
> CPU wrote this value to some RAM and then memcpy()ed it to
> the device". I think devices whose usage model is "memcpy bytes
> to me" are rare to nonexistent.

Uh, that's not the intention.  I have some comments on this elsewhere.

The intended semantics are that we do a single atomic write to an
address, but with a specific endianness.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCHv3] Reducing stack frame size in stream_process_mem2s()

2016-10-06 Thread Alistair Francis

On Thu, Oct 6, 2016 at 11:16 AM,   wrote:
> From: Rutuja Shah 
>
> This patch allocates memory for txbuf in struct Stream rather than the stack.
> As a result, the stack frame size is reduced of stream_process_mem2s().
>
> Signed-off-by: Rutuja Shah 
> Reviewed-by: Edgar E. Iglesias 
> Reviewed-by: Stefan Hajnoczi 
> Reviewed-by: Alistair Francis 

Hey Peter,

Can this go through target-arm?

Thanks,

Alistair

> ---
>  hw/dma/xilinx_axidma.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/hw/dma/xilinx_axidma.c b/hw/dma/xilinx_axidma.c
> index b135a5f..6065689 100644
> --- a/hw/dma/xilinx_axidma.c
> +++ b/hw/dma/xilinx_axidma.c
> @@ -111,6 +111,7 @@ struct Stream {
>  unsigned int complete_cnt;
>  uint32_t regs[R_MAX];
>  uint8_t app[20];
> +unsigned char txbuf[16 * 1024];
>  };
>
>  struct XilinxAXIDMAStreamSlave {
> @@ -256,7 +257,6 @@ static void stream_process_mem2s(struct Stream *s, 
> StreamSlave *tx_data_dev,
>   StreamSlave *tx_control_dev)
>  {
>  uint32_t prev_d;
> -unsigned char txbuf[16 * 1024];
>  unsigned int txlen;
>
>  if (!stream_running(s) || stream_idle(s)) {
> @@ -277,17 +277,17 @@ static void stream_process_mem2s(struct Stream *s, 
> StreamSlave *tx_data_dev,
>  }
>
>  txlen = s->desc.control & SDESC_CTRL_LEN_MASK;
> -if ((txlen + s->pos) > sizeof txbuf) {
> +if ((txlen + s->pos) > sizeof s->txbuf) {
>  hw_error("%s: too small internal txbuf! %d\n", __func__,
>   txlen + s->pos);
>  }
>
>  cpu_physical_memory_read(s->desc.buffer_address,
> - txbuf + s->pos, txlen);
> + s->txbuf + s->pos, txlen);
>  s->pos += txlen;
>
>  if (stream_desc_eof(&s->desc)) {
> -stream_push(tx_data_dev, txbuf, s->pos);
> +stream_push(tx_data_dev, s->txbuf, s->pos);
>  s->pos = 0;
>  stream_complete(s);
>  }
> --
> 1.9.1
>
>

Re: [Qemu-devel] Baremetal Netduino2 -- cannot output on UARTs 2-4

2016-10-06 Thread Alistair Francis

On Thu, Oct 6, 2016 at 2:52 PM, Seth K  wrote:
> You're right, qemu was not happy with that command line, but your pointer
> really helped me out, thank you!! I think a combination of my
> misunderstanding what the arguments meant, and a weird bug with this chip,
> resulted in my complete confusion.
>
> Using the command line:
> ../qemu/arm-softmmu/qemu-system-arm -M netduino2 -nographic -kernel
> output.bin -serial unix:///tmp/uart1,server -serial unix:///tmp/uart2,server
> -serial unix:///tmp/uart3,server -serial unix:///tmp/uart4,server
>
> and opening 4 sockets:
> socat - UNIX-CONNECT:/tmp/uart1 ...
>
> sends data written to UART1 to /tmp/uart1 and UART4 to /tmp/uart4. 2 and 3
> still disappear but that seems to be a bug and I have reported it. Now to
> test this on a chip with 8 UARTS...

What is the bug? Can you CC me on it?

Thanks,

Alistair

>
> Thanks again!
>
> On Wed, Oct 5, 2016 at 5:21 PM, Alistair Francis 
> wrote:
>>
>> On Wed, Oct 5, 2016 at 10:45 AM, Seth K  wrote:
>> > Thanks for that link.
>> >
>> > I tried that command line and it output UART4 but UART 1 disappeared and
>> > UART2-3 are still missing. That page doesn't seem to have an explanation
>> > of
>> > what that command line is doing nor why /dev/null is used twice, so I'm
>> > a
>> > little lost. Removing the first /dev/null made UART4 disappear but UART1
>> > came back. In the past I've looked for documentation that explained the
>> > command line but everything I found was very vague.
>>
>> Hey Seth,
>>
>> Each -serial option is used to specify where to send the serial
>> output. These are parsed in order when passed into QEMU. So the first
>> -serial option controls where to send UART0 data and so on.
>>
>> That example I sent you is for a Netduino 2 so you will need to
>> changed the -serial options to match what you want to print, but it is
>> a good example you can use. Especially for muxing multiple serial
>> devices.
>>
>> It sounds like you want something like:
>> -chardev stdio,mux=on,id=terminal -serial chardev:terminal -serial
>> chardev:terminal -serial chardev:terminal -serial chardev:terminal
>> -monitor chardev:terminal
>>
>> Which will output everything to the terminal. I can image that will
>> cause some problems though, so you might want to output some to
>> telnet/sockets instead to stop everything being mixed together.
>>
>> Remember that -chardev creates the output device but doesn't connect
>> it to a UART. You need the -serial option to do that.
>>
>> Thanks,
>>
>> Alistair
>>
>> >
>> > build.sh has a bunch of command lines I've found online and tried:
>> >
>> > $QEMU/arm-softmmu/qemu-system-arm -M netduino2 -nographic -kernel
>> > output.bin
>> >
>> >
>> > #this one sends UART1 to a socket but not UART2
>> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
>> > -kernel
>> > output.bin -serial unix:///tmp/uart,server -serial
>> > unix:///tmp/uart2,server
>> > #socat - UNIX-CONNECT:/tmp/uart
>> >
>> > #../../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
>> > -kernel output.bin -serial unix:///tmp/uart1,server,id=uart2 -serial
>> > unix:///tmp/uart2,server,id=uart1
>> > #didn't redirect
>> >
>> > #other desperation
>> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic -s
>> > -d
>> > cpu,in_asm -kernel output.bin
>> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
>> > -serial
>> > unix:///tmp/uart,server -kernel output.bin
>> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
>> > -chardev socket,id=usar0,host=localhost,port=31337,server -kernel
>> > output.bin
>> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
>> > -chardev socket,id=chardev,host=localhost,port=31337,server -kernel
>> > output.bin
>> > #../../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
>> > -kernel output.bin -s -S
>> >
>> > On Tue, Oct 4, 2016 at 12:59 PM, Alistair Francis 
>> > wrote:
>> >>
>> >> On Mon, Oct 3, 2016 at 1:25 PM, Seth K  wrote:
>> >> > I have made a bare metal "Hello World" program for the Netduino2. I
>> >> > have
>> >> > pushed it here:
>> >> >
>> >> > https://github.com/skintigh/baremetal_netduino2
>> >> >
>> >> > It should output "Test 1/4" to USART 1, "Test 2/4" to USART 2, "Test
>> >> > 3/4"
>> >> > to USART 3 and "Test 4/4" to UART 4.
>> >> >
>> >> > What actually happens in QEMU is only the first string is output.
>> >> > That
>> >> > may
>> >> > be a command line argument error on my part, so for a sanity check I
>> >> > put
>> >> > printf statements in the function stm32f2xx_usart_write in
>> >> > qemu/hw/char/stm32f2xx_usart.c and recompiled qemu. The result is
>> >> > text
>> >> > sent
>> >> > to UART1 and UART4 make is to the function (though only 1 is output),
>> >> > while
>> >> > writes to 2 and 3 simply disappear and never make it to that
>> >> > function. I
>> >> > assumed all writes to UARTs would go to that function.
>> >> >
>> >> > Am I doing something dumb?

Re: [Qemu-devel] [PATCH v2 09/11] blockjob: add block_job_start

2016-10-06 Thread John Snow




On 10/05/2016 11:17 AM, Kevin Wolf wrote:

Am 01.10.2016 um 00:00 hat John Snow geschrieben:

Instead of automatically starting jobs at creation time via backup_start
et al, we'd like to return a job object pointer that can be started
manually at later point in time.

For now, add the block_job_start mechanism and start the jobs
automatically as we have been doing, with conversions job-by-job coming
in later patches.

Of note: cancellation of unstarted jobs will perform all the normal
cleanup as if the job had started, particularly abort and clean. The
only difference is that we will not emit any events, because the job
never actually started.

Signed-off-by: John Snow 


Should we make sure that jobs are only added to the block_jobs list once
they are started? It doesn't sound like a good idea to make a job
without a coroutine user-accessible.

Kevin



That would certainly help limit exposure to a potentially dangerous 
object, but some operations on these un-started jobs are still perfectly 
reasonable, like cancellation. Even things like "set speed" are 
perfectly reasonable on an unstarted job.


I'd rather just verify that having an accessible job cannot be operated 
on unsafely via the public interface, even though that's more work.


So here's the list:

-block_job_next: Harmless.

-block_job_get: Harmless.

-block_job_set_speed: Depends on the set_speed implementation, but 
backup_set_speed makes no assumptions and that's the only job I am 
attempting to convert in this series.


-block_job_cancel: Edited to specifically support pre-started jobs in 
this patch.


-block_job_complete: Edited to check for a coroutine specifically, but 
even without, a job will not be able to become ready without running first.


-block_job_query: Harmless* (*I could probably expose a 'started' 
variable so that libvirt didn't get confused as to why there are jobs 
that exist but are not busy nor paused.)


-block_job_pause: Harmless**

-block_job_user_pause: Harmless**

-block_job_user_paused: Harmless, if meaningless.

-block_job_resume: **We will attempt to call block_job_enter, but 
conditional on job->co, we do nothing, so it's harmless if not 
misleading that you can pause/resume to your heart's content.


-block_job_user_resume: ^ http://i.imgur.com/2zYxrIe.png ^

-block_job_cancel_sync: Safe, due to edits to block_job_cancel. Polling 
loop WILL complete as normal, because all jobs will finish through 
block_job_completed one way or another.


-block_job_cancel_sync_all: As safe as the above.

-block_job_complete_sync: Safe, complete will return error for unstarted 
jobs.


-block_job_iostatus_reset: Harmless, I think -- backup does not 
implement this method. (Huh, *no* jobs implement iostatus_reset at the 
moment...)


-block_job_txn_new: Doesn't operate on jobs.

-block_job_txn_unref: Also doesn't operate on jobs.

-block_job_get_aio_context: Harmless.

-block_job_txn_add_job: Safe and intended! There is trickery here, 
though, as once a job is introduced into transactions it opens it up to 
the private interface. This adds the following functions to considerations:


-block_job_completed: Safe, does not assume a coroutine anywhere.

-block_job_completed_single: Specifically updated to be context-aware of 
if we are pre-started or not. This is the "real" completion mechanism 
for BlockJobs that gets run for transactional OR individual jobs.


-block_job_completed_txn_abort: ***BUG***! The problem with the patch as 
I've sent it is that cancel calls completed (under the premise that 
nobody else would ever get to be able to), but we call both cancel AND 
completed from this function, which will cause us to call completed 
twice on pre-started jobs. I will fix this for the next version.


-block_job_completed_txn_success: Should never be called; if it IS, the 
presence of an unstarted job in the transaction will cause an early 
return. And even if I am utterly wrong and every job in the transaction 
completes successfully (somehow?) calling block_job_completed_single is 
perfectly safe by design.



Everything else is either internal to block job instances or the 
blockjob core.



There may be:
(A) Bugs in my code/thinking, or
(B) Improvements we can make to the readability,

but I believe that this is (Apart from the mentioned bug) not dangerous.

--js

Re: [Qemu-devel] [Qemu-block] [PATCH] block/gluster: add support for SEEK_DATA/SEEK_HOLE

2016-10-06 Thread Eric Blake

On 03/07/2016 01:14 PM, Eric Blake wrote:
> [adding qemu-devel; ALL patches must cc qemu-devel even when sent to
> another list]
> 
> On 03/07/2016 11:04 AM, Niels de Vos wrote:
>> GlusterFS 3.8 contains support for SEEK_DATA and SEEK_HOLE. This makes
>> it possible to detect sparse areas in files.
>>
>> Signed-off-by: Niels de Vos 
>>
>> --
>> Tested by compiling and running "qemu-img map gluster://..." with a
>> build of the current master branch of glusterfs. Using a Fedora
>> cloud image (in raw format) shows many SEEK procudure calls going back
>> and forth over the network. The output of "qemu map" matches the output
>> when run against the image on the local filesystem.
>> ---

I hit a weird failure when trying to compile this on an older RHEL 6
box, where /usr/include/unistd.h is too old to include SEEK_DATA and
SEEK_HOLE:

block/gluster.c: In function ‘qemu_gluster_test_seek’:
block/gluster.c:684: error: ‘SEEK_DATA’ undeclared (first use in this
function)
block/gluster.c:684: error: (Each undeclared identifier is reported only
once
block/gluster.c:684: error: for each function it appears in.)
block/gluster.c: In function ‘find_allocation’:
block/gluster.c:1202: error: ‘SEEK_DATA’ undeclared (first use in this
function)
block/gluster.c:1234: error: ‘SEEK_HOLE’ undeclared (first use in this
function)

The patch has been in place for several months (which shows how seldom I
compile on that particular box), but it makes me wonder why none of the
autobuilders have hit this failure.  But since the code mentions that it
shamelessly copies from raw-posix.c, and that file in turn has #ifdef
guards to only do SEEK_HOLE optimizations if the system headers defined
SEEK_HOLE in the first place, it sounds like you need to do a followup
patch along those lines.


>> + *
>> + * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
>> + */
>> +static int find_allocation(BlockDriverState *bs, off_t start,
>> +   off_t *data, off_t *hole)
>> +{
>> +BDRVGlusterState *s = bs->opaque;
>> +off_t offs;
>> +
>> +/*
>> + * SEEK_DATA cases:
>> + * D1. offs == start: start is in data
>> + * D2. offs > start: start is in a hole, next data at offs
>> + * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
>> + *  or start is beyond EOF
>> + * If the latter happens, the file has been truncated behind
>> + * our back since we opened it.  All bets are off then.
>> + * Treating like a trailing hole is simplest.
>> + * D4. offs < 0, errno != ENXIO: we learned nothing
>> + */
>> +offs = glfs_lseek(s->fd, start, SEEK_DATA);


-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] Baremetal Netduino2 -- cannot output on UARTs 2-4

2016-10-06 Thread Seth K

You're right, qemu was not happy with that command line, but your pointer
really helped me out, thank you!! I think a combination of my
misunderstanding what the arguments meant, and a weird bug with this chip,
resulted in my complete confusion.

Using the command line:
../qemu/arm-softmmu/qemu-system-arm -M netduino2 -nographic -kernel
output.bin -serial unix:///tmp/uart1,server -serial
unix:///tmp/uart2,server -serial unix:///tmp/uart3,server -serial
unix:///tmp/uart4,server

and opening 4 sockets:
socat - UNIX-CONNECT:/tmp/uart1 ...

sends data written to UART1 to /tmp/uart1 and UART4 to /tmp/uart4. 2 and 3
still disappear but that seems to be a bug and I have reported it. Now to
test this on a chip with 8 UARTS...

Thanks again!

On Wed, Oct 5, 2016 at 5:21 PM, Alistair Francis 
wrote:

> On Wed, Oct 5, 2016 at 10:45 AM, Seth K  wrote:
> > Thanks for that link.
> >
> > I tried that command line and it output UART4 but UART 1 disappeared and
> > UART2-3 are still missing. That page doesn't seem to have an explanation
> of
> > what that command line is doing nor why /dev/null is used twice, so I'm a
> > little lost. Removing the first /dev/null made UART4 disappear but UART1
> > came back. In the past I've looked for documentation that explained the
> > command line but everything I found was very vague.
>
> Hey Seth,
>
> Each -serial option is used to specify where to send the serial
> output. These are parsed in order when passed into QEMU. So the first
> -serial option controls where to send UART0 data and so on.
>
> That example I sent you is for a Netduino 2 so you will need to
> changed the -serial options to match what you want to print, but it is
> a good example you can use. Especially for muxing multiple serial
> devices.
>
> It sounds like you want something like:
> -chardev stdio,mux=on,id=terminal -serial chardev:terminal -serial
> chardev:terminal -serial chardev:terminal -serial chardev:terminal
> -monitor chardev:terminal
>
> Which will output everything to the terminal. I can image that will
> cause some problems though, so you might want to output some to
> telnet/sockets instead to stop everything being mixed together.
>
> Remember that -chardev creates the output device but doesn't connect
> it to a UART. You need the -serial option to do that.
>
> Thanks,
>
> Alistair
>
> >
> > build.sh has a bunch of command lines I've found online and tried:
> >
> > $QEMU/arm-softmmu/qemu-system-arm -M netduino2 -nographic -kernel
> output.bin
> >
> >
> > #this one sends UART1 to a socket but not UART2
> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
> -kernel
> > output.bin -serial unix:///tmp/uart,server -serial
> unix:///tmp/uart2,server
> > #socat - UNIX-CONNECT:/tmp/uart
> >
> > #../../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
> > -kernel output.bin -serial unix:///tmp/uart1,server,id=uart2 -serial
> > unix:///tmp/uart2,server,id=uart1
> > #didn't redirect
> >
> > #other desperation
> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic -s
> -d
> > cpu,in_asm -kernel output.bin
> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
> -serial
> > unix:///tmp/uart,server -kernel output.bin
> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
> > -chardev socket,id=usar0,host=localhost,port=31337,server -kernel
> output.bin
> > #../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
> > -chardev socket,id=chardev,host=localhost,port=31337,server -kernel
> > output.bin
> > #../../qemu/arm-softmmu/qemu-system-arm -M netduino2 -m 128M -nographic
> > -kernel output.bin -s -S
> >
> > On Tue, Oct 4, 2016 at 12:59 PM, Alistair Francis 
> > wrote:
> >>
> >> On Mon, Oct 3, 2016 at 1:25 PM, Seth K  wrote:
> >> > I have made a bare metal "Hello World" program for the Netduino2. I
> have
> >> > pushed it here:
> >> >
> >> > https://github.com/skintigh/baremetal_netduino2
> >> >
> >> > It should output "Test 1/4" to USART 1, "Test 2/4" to USART 2, "Test
> >> > 3/4"
> >> > to USART 3 and "Test 4/4" to UART 4.
> >> >
> >> > What actually happens in QEMU is only the first string is output. That
> >> > may
> >> > be a command line argument error on my part, so for a sanity check I
> put
> >> > printf statements in the function stm32f2xx_usart_write in
> >> > qemu/hw/char/stm32f2xx_usart.c and recompiled qemu. The result is text
> >> > sent
> >> > to UART1 and UART4 make is to the function (though only 1 is output),
> >> > while
> >> > writes to 2 and 3 simply disappear and never make it to that
> function. I
> >> > assumed all writes to UARTs would go to that function.
> >> >
> >> > Am I doing something dumb? Is this a bug? Any help would be greatly
> >> > appreciated.
> >>
> >> Hello Seth,
> >>
> >> I haven't looked at the multiple UART problem in a while. It sounds
> >> like your command line arguments are incorrect.
> >>
> >> Have a look at this wiki page for details on what the serial options
>

[Qemu-devel] [Bug 1174654] Re: qemu-system-x86_64 takes 100% CPU after host machine resumed from suspend to ram

2016-10-06 Thread Jürgen Veidt

I observed a similar behavior with a different application on a Windows host. 
The application is using the multimedia timer. In my case it seems that the 
timer is catching up the ticks missed during suspend to ram after resume. The 
timer thread performing the callbacks has high-priority on Windows and makes 
the host machine almost unusable for a certain time depending on the suspend 
duration. 
Maybe it is a similar situation here?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1174654

Title:
  qemu-system-x86_64 takes 100% CPU after host machine resumed from
  suspend to ram

Status in QEMU:
  Confirmed
Status in qemu package in Ubuntu:
  Invalid

Bug description:
  I have Windows XP SP3  inside qemu VM. All works fine in 12.10. But
  after upgraiding to 13.04 i have to restart the VM each time i
  resuming my host machine, because qemu process starts to take CPU
  cycles and OS inside VM is very slow and sluggish. However it's still
  controllable and could be shutdown by itself.

  According to the taskmgr any active process takes 99% CPU. It's not
  stuck on some single process.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1174654/+subscriptions

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Programmingkid


On Oct 6, 2016, at 4:47 PM, Peter Maydell wrote:

> On 6 October 2016 at 12:59, Eric Blake  wrote:
>> Libvirt also learned that the qemu 'migrate-to-disk' format (used by
>> 'savevm' or 'migrate') is NOT self-descriptive - in order to fully and
>> safely revert to an earlier state, you HAVE to store the command line
>> (or a way to regenerate the command line) that was associated with the
>> qemu whose state you saved, along with tracking all hotplugs.  Since a
>> mere 'savevm' REQUIRES external information to safely be restored, you
>> would have to figure out a way to store this additional information
>> alongside whatever save files you plan on creating (and please don't
>> change the qcow2 file format to become a dumping grounds for this
>> additional information).
> 
> Good point. I think this is a fairly strong argument for
> keeping the "user friendly" interface to snapshots in the
> VM management layer, not QEMU itself.

Another great feature dies... just kidding. Thank you everyone for giving me
your feedback.

[Qemu-devel] [PATCH v5] tests: add a m25p80 test

2016-10-06 Thread Cédric Le Goater

This test uses the palmetto platform and the Aspeed SPI controller to
test the m25p80 flash module device model. The flash model is defined
by the platform (n25q256a) and it would be nice to find way to control
it, using a property probably.

Signed-off-by: Cédric Le Goater 
Reviewed-by: Peter Maydell 
Brainstormed-with: Greg Kurz 
---

 Changes since v5:

 - use an explicit bswap for the values read/written to the flash
   region.

 Changes since v4:

 - fixed Makefile targets
 - replaced -M with -m in qtest command line
 
 Changes since v2:

 - changed mkstemp() path prefix
 
 Changes since v1:

 - fixed guest args to use -drive and not -mtdblock

 tests/Makefile.include |2 
 tests/m25p80-test.c|  252 +
 2 files changed, 254 insertions(+)
 create mode 100644 tests/m25p80-test.c

Index: qemu-aspeed.git/tests/Makefile.include
===
--- qemu-aspeed.git.orig/tests/Makefile.include
+++ qemu-aspeed.git/tests/Makefile.include
@@ -288,6 +288,7 @@ check-qtest-sparc64-y = tests/endianness
 
 check-qtest-arm-y = tests/tmp105-test$(EXESUF)
 check-qtest-arm-y += tests/ds1338-test$(EXESUF)
+check-qtest-arm-y += tests/m25p80-test$(EXESUF)
 gcov-files-arm-y += hw/misc/tmp105.c
 check-qtest-arm-y += tests/virtio-blk-test$(EXESUF)
 gcov-files-arm-y += arm-softmmu/hw/block/virtio-blk.c
@@ -618,6 +619,7 @@ tests/bios-tables-test$(EXESUF): tests/b
 tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y)
 tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
 tests/ds1338-test$(EXESUF): tests/ds1338-test.o $(libqos-imx-obj-y)
+tests/m25p80-test$(EXESUF): tests/m25p80-test.o
 tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
 tests/q35-test$(EXESUF): tests/q35-test.o $(libqos-pc-obj-y)
 tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
Index: qemu-aspeed.git/tests/m25p80-test.c
===
--- /dev/null
+++ qemu-aspeed.git/tests/m25p80-test.c
@@ -0,0 +1,252 @@
+/*
+ * QTest testcase for the M25P80 Flash (Using the Aspeed SPI
+ * Controller)
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+#include "libqtest.h"
+
+/*
+ * ASPEED SPI Controller registers
+ */
+#define R_CONF  0x00
+#define   CONF_ENABLE_W0   (1 << 16)
+#define R_CE_CTRL   0x04
+#define   CRTL_EXTENDED0   0  /* 32 bit addressing for SPI */
+#define R_CTRL0 0x10
+#define   CTRL_CE_STOP_ACTIVE  (1 << 2)
+#define   CTRL_USERMODE0x3
+
+#define ASPEED_FMC_BASE0x1E62
+#define ASPEED_FLASH_BASE  0x2000
+
+/*
+ * Flash commands
+ */
+enum {
+JEDEC_READ = 0x9f,
+BULK_ERASE = 0xc7,
+READ = 0x03,
+PP = 0x02,
+WREN = 0x6,
+EN_4BYTE_ADDR = 0xB7,
+ERASE_SECTOR = 0xd8,
+};
+
+#define FLASH_JEDEC 0x20ba19  /* n25q256a */
+#define FLASH_SIZE  (32 * 1024 * 1024)
+
+#define PAGE_SIZE   256
+
+/*
+ * Use an explicit bswap for the values read/wrote to the flash region
+ * as they are BE and the Aspeed CPU is LE.
+ */
+static inline uint32_t make_be32(uint32_t data)
+{
+return bswap32(data);
+}
+
+static void spi_conf(uint32_t value)
+{
+uint32_t conf = readl(ASPEED_FMC_BASE + R_CONF);
+
+conf |= value;
+writel(ASPEED_FMC_BASE + R_CONF, conf);
+}
+
+static void spi_ctrl_start_user(void)
+{
+uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
+
+ctrl |= CTRL_USERMODE | CTRL_CE_STOP_ACTIVE;
+writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+
+ctrl &= ~CTRL_CE_STOP_ACTIVE;
+writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+}
+
+static void spi_ctrl_stop_user(void)
+{
+uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
+
+ctrl |= CTRL_USERMODE | CTRL_CE_STOP_ACTIVE;
+writel(ASPEED_FMC_BASE +

Re: [Qemu-devel] [PATCH] qtest: ask endianness of the target in qtest_init()

2016-10-06 Thread Greg Kurz

On Thu,  6 Oct 2016 20:56:58 +0200
Laurent Vivier  wrote:

> The target endianness is not deduced anymore from
> the architecture name but asked directly to the guest,
> using a new qtest command: "endianness". As it can't
> change (this is the value of TARGET_WORDS_BIGENDIAN),
> we store it to not have to ask every time we want to
> know if we have to byte-swap a value.
> 
> Signed-off-by: Laurent Vivier 
> CC: Greg Kurz 
> CC: David Gibson 
> CC: Peter Maydell 
> ---
> Note: this patch can be seen as a v2 of
> "qtest: evaluate endianness of the target in qtest_init()"
> from the patch series "tests: enable virtio tests on SPAPR"
> in which I have added the idea from Peter to ask the endianness
> directly to the guest using a new qtest command.
> 

This is definitely an improvement indeed.

Reviewed-by: Greg Kurz 

>  qtest.c   |   7 ++
>  tests/libqos/virtio-pci.c |   2 +-
>  tests/libqtest.c  | 224 
> --
>  tests/libqtest.h  |  16 +++-
>  tests/virtio-blk-test.c   |   2 +-
>  5 files changed, 118 insertions(+), 133 deletions(-)
> 
> diff --git a/qtest.c b/qtest.c
> index 22482cc..b53b39c 100644
> --- a/qtest.c
> +++ b/qtest.c
> @@ -537,6 +537,13 @@ static void qtest_process_command(CharDriverState *chr, 
> gchar **words)
>  
>  qtest_send_prefix(chr);
>  qtest_send(chr, "OK\n");
> +} else if (strcmp(words[0], "endianness") == 0) {
> +qtest_send_prefix(chr);
> +#if defined(TARGET_WORDS_BIGENDIAN)
> +qtest_sendf(chr, "OK big\n");
> +#else
> +qtest_sendf(chr, "OK little\n");
> +#endif
>  #ifdef TARGET_PPC64
>  } else if (strcmp(words[0], "rtas") == 0) {
>  uint64_t res, args, ret;
> diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
> index 18b92b9..6e005c1 100644
> --- a/tests/libqos/virtio-pci.c
> +++ b/tests/libqos/virtio-pci.c
> @@ -86,7 +86,7 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, 
> uint64_t addr)
>  int i;
>  uint64_t u64 = 0;
>  
> -if (qtest_big_endian()) {
> +if (target_big_endian()) {
>  for (i = 0; i < 8; ++i) {
>  u64 |= (uint64_t)qpci_io_readb(dev->pdev,
>  (void *)(uintptr_t)addr + i) << (7 - i) * 8;
> diff --git a/tests/libqtest.c b/tests/libqtest.c
> index 6f6bdf1..27cf6b1 100644
> --- a/tests/libqtest.c
> +++ b/tests/libqtest.c
> @@ -37,6 +37,7 @@ struct QTestState
>  bool irq_level[MAX_IRQ];
>  GString *rx;
>  pid_t qemu_pid;  /* our child QEMU process */
> +bool big_endian;
>  };
>  
>  static GHookList abrt_hooks;
> @@ -146,89 +147,6 @@ void qtest_add_abrt_handler(GHookFunc fn, const void 
> *data)
>  g_hook_prepend(&abrt_hooks, hook);
>  }
>  
> -QTestState *qtest_init(const char *extra_args)
> -{
> -QTestState *s;
> -int sock, qmpsock, i;
> -gchar *socket_path;
> -gchar *qmp_socket_path;
> -gchar *command;
> -const char *qemu_binary;
> -
> -qemu_binary = getenv("QTEST_QEMU_BINARY");
> -g_assert(qemu_binary != NULL);
> -
> -s = g_malloc(sizeof(*s));
> -
> -socket_path = g_strdup_printf("/tmp/qtest-%d.sock", getpid());
> -qmp_socket_path = g_strdup_printf("/tmp/qtest-%d.qmp", getpid());
> -
> -sock = init_socket(socket_path);
> -qmpsock = init_socket(qmp_socket_path);
> -
> -qtest_add_abrt_handler(kill_qemu_hook_func, s);
> -
> -s->qemu_pid = fork();
> -if (s->qemu_pid == 0) {
> -setenv("QEMU_AUDIO_DRV", "none", true);
> -command = g_strdup_printf("exec %s "
> -  "-qtest unix:%s,nowait "
> -  "-qtest-log %s "
> -  "-qmp unix:%s,nowait "
> -  "-machine accel=qtest "
> -  "-display none "
> -  "%s", qemu_binary, socket_path,
> -  getenv("QTEST_LOG") ? "/dev/fd/2" : 
> "/dev/null",
> -  qmp_socket_path,
> -  extra_args ?: "");
> -execlp("/bin/sh", "sh", "-c", command, NULL);
> -exit(1);
> -}
> -
> -s->fd = socket_accept(sock);
> -if (s->fd >= 0) {
> -s->qmp_fd = socket_accept(qmpsock);
> -}
> -unlink(socket_path);
> -unlink(qmp_socket_path);
> -g_free(socket_path);
> -g_free(qmp_socket_path);
> -
> -g_assert(s->fd >= 0 && s->qmp_fd >= 0);
> -
> -s->rx = g_string_new("");
> -for (i = 0; i < MAX_IRQ; i++) {
> -s->irq_level[i] = false;
> -}
> -
> -/* Read the QMP greeting and then do the handshake */
> -qtest_qmp_discard_response(s, "");
> -qtest_qmp_discard_response(s, "{ 'execute': 'qmp_capabilities' }");
> -
> -if (getenv("QTEST_STOP")) {
> -kill(s->qemu_pid, SIGSTOP);
> -}
> -
> -return s;
> -}
> -
> -void qtest_quit(QTestState *s)
> -{
> -qtest_instances = g

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 12:59, Eric Blake  wrote:
> Libvirt also learned that the qemu 'migrate-to-disk' format (used by
> 'savevm' or 'migrate') is NOT self-descriptive - in order to fully and
> safely revert to an earlier state, you HAVE to store the command line
> (or a way to regenerate the command line) that was associated with the
> qemu whose state you saved, along with tracking all hotplugs.  Since a
> mere 'savevm' REQUIRES external information to safely be restored, you
> would have to figure out a way to store this additional information
> alongside whatever save files you plan on creating (and please don't
> change the qcow2 file format to become a dumping grounds for this
> additional information).

Good point. I think this is a fairly strong argument for
keeping the "user friendly" interface to snapshots in the
VM management layer, not QEMU itself.

thanks
-- PMM

Re: [Qemu-devel] [PATCH] qtest: ask endianness of the target in qtest_init()

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 11:56, Laurent Vivier  wrote:
> The target endianness is not deduced anymore from
> the architecture name but asked directly to the guest,
> using a new qtest command: "endianness". As it can't
> change (this is the value of TARGET_WORDS_BIGENDIAN),
> we store it to not have to ask every time we want to
> know if we have to byte-swap a value.
>
> Signed-off-by: Laurent Vivier 
> CC: Greg Kurz 
> CC: David Gibson 
> CC: Peter Maydell 
> ---
> Note: this patch can be seen as a v2 of
> "qtest: evaluate endianness of the target in qtest_init()"
> from the patch series "tests: enable virtio tests on SPAPR"
> in which I have added the idea from Peter to ask the endianness
> directly to the guest using a new qtest command.
>
>  qtest.c   |   7 ++
>  tests/libqos/virtio-pci.c |   2 +-
>  tests/libqtest.c  | 224 
> --
>  tests/libqtest.h  |  16 +++-
>  tests/virtio-blk-test.c   |   2 +-
>  5 files changed, 118 insertions(+), 133 deletions(-)
>
> diff --git a/qtest.c b/qtest.c
> index 22482cc..b53b39c 100644
> --- a/qtest.c
> +++ b/qtest.c
> @@ -537,6 +537,13 @@ static void qtest_process_command(CharDriverState *chr, 
> gchar **words)
>
>  qtest_send_prefix(chr);
>  qtest_send(chr, "OK\n");
> +} else if (strcmp(words[0], "endianness") == 0) {
> +qtest_send_prefix(chr);
> +#if defined(TARGET_WORDS_BIGENDIAN)
> +qtest_sendf(chr, "OK big\n");
> +#else
> +qtest_sendf(chr, "OK little\n");
> +#endif
>  #ifdef TARGET_PPC64
>  } else if (strcmp(words[0], "rtas") == 0) {
>  uint64_t res, args, ret;
> diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
> index 18b92b9..6e005c1 100644
> --- a/tests/libqos/virtio-pci.c
> +++ b/tests/libqos/virtio-pci.c
> @@ -86,7 +86,7 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, 
> uint64_t addr)
>  int i;
>  uint64_t u64 = 0;
>
> -if (qtest_big_endian()) {
> +if (target_big_endian()) {
>  for (i = 0; i < 8; ++i) {
>  u64 |= (uint64_t)qpci_io_readb(dev->pdev,
>  (void *)(uintptr_t)addr + i) << (7 - i) * 8;

Why rename the function? We're only changing its
implementation.

> diff --git a/tests/libqtest.c b/tests/libqtest.c
> index 6f6bdf1..27cf6b1 100644
> --- a/tests/libqtest.c
> +++ b/tests/libqtest.c
> @@ -37,6 +37,7 @@ struct QTestState
>  bool irq_level[MAX_IRQ];
>  GString *rx;
>  pid_t qemu_pid;  /* our child QEMU process */
> +bool big_endian;
>  };
>
>  static GHookList abrt_hooks;
> @@ -146,89 +147,6 @@ void qtest_add_abrt_handler(GHookFunc fn, const void 
> *data)
>  g_hook_prepend(&abrt_hooks, hook);
>  }
>
> -QTestState *qtest_init(const char *extra_args)
> -{
> -QTestState *s;
> -int sock, qmpsock, i;
> -gchar *socket_path;
> -gchar *qmp_socket_path;
> -gchar *command;
> -const char *qemu_binary;
> -
> -qemu_binary = getenv("QTEST_QEMU_BINARY");
> -g_assert(qemu_binary != NULL);

This diff arrangement makes the patch a bit hard to read;
what meant that the functions had to be moved around?

> +/* ask endianness of the target */
> +
> +qtest_sendf(s, "endianness\n");
> +args = qtest_rsp(s, 1);
> +g_assert(strcmp(args[1], "big") == 0 || strcmp(args[1], "little") == 0);
> +s->big_endian = strcmp(args[1], "big") == 0;
> +g_strfreev(args);

This would be better in its own utility function, I think.

thanks
-- PMM

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Eric Blake

On 10/06/2016 03:10 PM, Programmingkid wrote:
> 
> On Oct 6, 2016, at 3:59 PM, Eric Blake wrote:
> 
>> On 10/06/2016 09:22 AM, Programmingkid wrote:
>>> Would you accept a patch that added "Save State" and "Restore State" menu 
>>> items to the cocoa interface? They would allow the user to save the running 
>>> state of the emulator.
>>
>> Doesn't virt-manager already do this?  What do we gain by duplicating
>> GUI functionality at this level that is already implemented at higher
>> levels?  Not that I'm opposed to the idea, but having a solid reason why
>> it is useful is important.
> 
> Virt-manager is a Linux exclusive. This program doesn't run on Windows or
> Mac OS.

Not true. I've seen it ported to Windows, and I'm sure Cole would
welcome a port to Mac.

> The savevm feature is anything but perfect, but that doesn't mean we
> shouldn't provide easy access to it. This feature is already there in
> QEMU, so why not help the user be able to use it?

If you want to HELP the user, then tell them to use a management app
that has already dealt with the problems of consistent snapshots, rather
than making the user reinvent it themselves by clicking a gui button
that does incomplete work.

Yes, qemu has a gui.  But I maintain that it is mostly a mistake, and
that time spent improving the gui in this project, rather than porting
and improving better upper-layer guis, is probably wasted.  The Unix
philosophy is "do one thing and do it well" - we don't always live up to
it, but on THIS mailing list, the thing we do well is emulation, not
guis.  You are going to be hard-pressed to find reviewers, even if you
spend time solving the technical issues.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v2 04/11] blockjobs: Always use block_job_get_aio_context

2016-10-06 Thread John Snow




On 10/05/2016 10:02 AM, Kevin Wolf wrote:

Am 01.10.2016 um 00:00 hat John Snow geschrieben:

There are a few places where we're fishing it out for ourselves.
Let's not do that and instead use the helper.

Signed-off-by: John Snow 


That change makes a difference when the block job is running its
completion part after block_job_defer_to_main_loop(). The commit message
could be more explicit about whether this is intentional or whether this
case is expected to happen at all.

I suspect that if it can happen, this is a bug fix. Please check and
update the commit message accordingly.



Because I'm bad with being concise, I wrote a TLDR at the bottom. 
Otherwise, enjoy this wall of text.



Kevin



Intentional under the premise of:

(1) Acquiring the context for which a job is not actually running under 
is likely incorrect (or at the very least misleading), and


(2) If using the main thread context for any would-be callers is 
incorrect, this is a problem with the job lifetime that needs to be 
corrected anyway.



In general, if we are acquiring the context to secure exclusive access 
to the BlockJob state itself, using the getter here is perfectly safe. 
If we are acquiring context for other reasons, we need to consider more 
carefully.



The callers are:

(A) bdrv_drain_all (block/io)

	Obtains context for the sake of pause/resume. Pauses all jobs before 
draining all BDSes. For starters, pausing a job that has deferred to 
main has no effect (and neither does resuming). This usage appears 
slightly erroneous, though, in that if we are not running from the main 
thread, we are definitely not securing exclusive rights to the block 
job. We could, in theory, race on reads/writes to the pause count field. 
This would be a bugfix.


(B) find_block_job (all monitor context)

Acquires context as a courtesy for its callers:
- qmp_block_job_set_speed
- qmp_block_job_cancel
- qmp_block_job_pause
- qmp_block_job_resume
- qmp_block_job_complete

In an "already deferred to main" sense...  in general, if the job has 
already deferred to main we don't need to acquire the block's context to 
get safe access to the job, because we're already running in the main 
context. Further, none of these functions actually have any meaning for 
a job in such a state.


- set_speed: Sets speed parameters, harmless either way.
	- cancel: Will set the cancelled boolean, reset iostatus, then attempt 
to enter the job. Since a job that enters the main loop remains busy, 
the enter is a NOP. The BlockBackend AIO context here is therefore 
extraneous, and the getter is safe.

- pause: Only increments a counter, and will have no effect.
	- resume: Decrements a counter. Attempts to enter(), but as stated 
above this is a NOP.
	- complete: Calls .complete(), for which the only implementation is 
mirror_complete. Uh, this actually seems messy. Looks like there's 
nothing to prevent us from calling this after we've already told it to 
complete once. This could be a legitimate bug that this patch does 
nothing in particular to address. If complete() is shored up such that 
it can be called precisely once, this becomes safe.


(C) qmp_query_block_jobs (monitor context)

Just a getter. Using get_context is safe in either state.

(D) run_block_job (qemu-img)

	Never called when the context is in the main loop anyway. Effectively 
no change here.




So, with the exception of .complete, I think this is a safe change as it 
stands... However... Paolo wants to complicate my life and get rid of 
this getter for his own fiendish purposes. He suggests pushing down 
context acquisition into blockjob.c directly for any QMP callers:


- qmp_block_job_set_speed -> block_job_set_speed
- qmp_block_job_cancel -> block_job_cancel
- qmp_block_job_pause -> block_job_user_pause
- qmp_block_job_resume -> block_job_user_resume
- qmp_block_job_complete -> block_job_complete
- qmp_query_block_jobs -> block_job_query

Most of these have only one caller in the QMP layer:

block_job_set_speed
block_job_user_pause
block_job_user_resume
block_job_query

These can easily just take the context they need, removing external uses 
of job->blk for purposes of acquiring the context.


block_job_cancel and block_job_complete are different.

block_job_cancel is called in many places, but we can just add a similar 
block_job_user_cancel if we wanted a version which takes care to acquire 
context and one that does not. (Or we could just acquire the context 
regardless, but Paolo warned me ominously that recursive locks are EVIL. 
He sounded serious.)


block_job_complete has no direct callers outside of QMP, but it is also 
used as a callback by block_job_complete_sync, used in qemu-img for 
run_block_job. I can probably rewrite qemu_img to avoid this usage.




TLDR:
- This change should be perfectly safe, but Paolo wants to get rid of 
this usage anyway.

- At least 5/6 uses of external context grabbing

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Programmingkid


On Oct 6, 2016, at 3:59 PM, Eric Blake wrote:

> On 10/06/2016 09:22 AM, Programmingkid wrote:
>> Would you accept a patch that added "Save State" and "Restore State" menu 
>> items to the cocoa interface? They would allow the user to save the running 
>> state of the emulator.
> 
> Doesn't virt-manager already do this?  What do we gain by duplicating
> GUI functionality at this level that is already implemented at higher
> levels?  Not that I'm opposed to the idea, but having a solid reason why
> it is useful is important.

Virt-manager is a Linux exclusive. This program doesn't run on Windows or
Mac OS.

> Speaking with libvirt experience: saving a guest is somewhat easy.  But
> once you have a save-state file, then what?  Remember, the qemu GUI is
> associated with a SINGLE qemu process.  When libvirt manages save files,
> it is managing MULTIPLE qemu processes.  The sequence of 'create a save
> file, hot-plug a device, then reverting to the save file' currently
> REQUIRES that you destroy one qemu process and create another one, where
> the new process is back to the pre-hotplug configuration that was in use
> when the save file was created.  Otherwise the qemu 'loadvm' command
> will likely fail (and worse, if it does not fail, you are likely to
> trigger even-harder-to-diagnose guest corruptions that only strike down
> the road, rather than at the time of the loadvm).

There are multiple variables to consider. Hardware might be added or taken
away in between save states. Ensuring consistency between save states
is what the user should do.

> If your gui (whether cocoa or GTK) is associated with a single qemu
> process, then you will have a VERY tough time figuring out how to start
> a new qemu process to replace the current one while still keeping the
> gui unchanged.  And the work to convert qemu over to managing multiple
> VMs itself is rather pointless, when you already have libvirt and
> virt-manager and other wrappers that are already good at that.

The save states would be stored in the qcow2 image file of the guest
operating system. Multiple operatings could be managed this way by one
gui.

> Libvirt also learned that the qemu 'migrate-to-disk' format (used by
> 'savevm' or 'migrate') is NOT self-descriptive - in order to fully and
> safely revert to an earlier state, you HAVE to store the command line
> (or a way to regenerate the command line) that was associated with the
> qemu whose state you saved, along with tracking all hotplugs.  Since a
> mere 'savevm' REQUIRES external information to safely be restored, you
> would have to figure out a way to store this additional information
> alongside whatever save files you plan on creating (and please don't
> change the qcow2 file format to become a dumping grounds for this
> additional information).

The savevm feature is anything but perfect, but that doesn't mean we
shouldn't provide easy access to it. This feature is already there in
QEMU, so why not help the user be able to use it?

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Eric Blake

On 10/06/2016 11:17 AM, Paolo Bonzini wrote:
> 
> 
> On 06/10/2016 18:04, Programmingkid wrote:
>> 'Save State...' would call something equivalent to the monitor command 
>> savevm.
>> It would display a dialog box asking to give a name to this save. The default
>> name would probably be "save state mm-dd- - n". The m,d,y stand
>> for month, day, and year respectively. The n would be the save state
>> count for that day.
>>  
>> 'Restore state...' would probably display a dialog box with all the save
>> states displayed in a list. The user could select one of them and
>> QEMU would restore that state.
> 
> These are of course savevm and loadvm.

Which are somewhat limited in their power - they don't always work if
the user does hotplug in between.

>> Code in the gui would detect if the hard drive image was in a format that
>> supported save states. The menu items would probably be grayed out if
>> save states could not work with the current HD image file. 
> 
> Ok, this makes sense.  Thanks for explaining it!  I think this would be
> nice to have in the GTK+ interface.

I don't know - libvirt already learned the hard way that qcow2 images
alone are not ideal for saving machine state; and libvirt ended up
creating its own save state format that consists of a concatenation of
libvirt XML and the qemu migration format.  And the problem of how to
revert to a state with different devices than the guest currently has
seems rather difficult to squeeze into a GTK app that only manages a
single VM.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Eric Blake

On 10/06/2016 09:22 AM, Programmingkid wrote:
> Would you accept a patch that added "Save State" and "Restore State" menu 
> items to the cocoa interface? They would allow the user to save the running 
> state of the emulator.

Doesn't virt-manager already do this?  What do we gain by duplicating
GUI functionality at this level that is already implemented at higher
levels?  Not that I'm opposed to the idea, but having a solid reason why
it is useful is important.

Speaking with libvirt experience: saving a guest is somewhat easy.  But
once you have a save-state file, then what?  Remember, the qemu GUI is
associated with a SINGLE qemu process.  When libvirt manages save files,
it is managing MULTIPLE qemu processes.  The sequence of 'create a save
file, hot-plug a device, then reverting to the save file' currently
REQUIRES that you destroy one qemu process and create another one, where
the new process is back to the pre-hotplug configuration that was in use
when the save file was created.  Otherwise the qemu 'loadvm' command
will likely fail (and worse, if it does not fail, you are likely to
trigger even-harder-to-diagnose guest corruptions that only strike down
the road, rather than at the time of the loadvm).

If your gui (whether cocoa or GTK) is associated with a single qemu
process, then you will have a VERY tough time figuring out how to start
a new qemu process to replace the current one while still keeping the
gui unchanged.  And the work to convert qemu over to managing multiple
VMs itself is rather pointless, when you already have libvirt and
virt-manager and other wrappers that are already good at that.

Libvirt also learned that the qemu 'migrate-to-disk' format (used by
'savevm' or 'migrate') is NOT self-descriptive - in order to fully and
safely revert to an earlier state, you HAVE to store the command line
(or a way to regenerate the command line) that was associated with the
qemu whose state you saved, along with tracking all hotplugs.  Since a
mere 'savevm' REQUIRES external information to safely be restored, you
would have to figure out a way to store this additional information
alongside whatever save files you plan on creating (and please don't
change the qcow2 file format to become a dumping grounds for this
additional information).

Your idea may be noble, but I think you are going down a rabbit's hole
of unimplementable misery, and advise that you probably won't succeed.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [QEMU PATCH v5 4/6] migration: migrate QTAILQ

2016-10-06 Thread Jianjun Duan



On 10/06/2016 12:01 PM, Dr. David Alan Gilbert wrote:
> * Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
>>
>>
>> On 10/05/2016 09:56 AM, Dr. David Alan Gilbert wrote:
>>> * Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
 Currently we cannot directly transfer a QTAILQ instance because of the
 limitation in the migration code. Here we introduce an approach to
 transfer such structures. In our approach such a structure is tagged
 with VMS_LINKED. We then modified vmstate_save_state and vmstate_load_state
 so that when VMS_LINKED is encountered, put and get from VMStateInfo are
 called respectively. We created VMStateInfo vmstate_info_qtailq for QTAILQ.
 Similar VMStateInfo can be created for other data structures such as list.
 This approach will be used to transfer pending_events and ccs_list in spapr
 state.

 We also create some macros in qemu/queue.h to access a QTAILQ using pointer
 arithmetic. This ensures that we do not depend on the implementation
 details about QTAILQ in the migration code.
>>>
>>> I think we're going to need a way to have a more flexible
>>> loops; and thus my choice here wouldn't be to use the .get/.put together
>>> with the VMSD; but I think we'll end up needing a new
>>> data structure, maybe a VMStateLoop *loop in VMStateField.
>>>
>>> So would it be easier if you added that new member, then you wouldn't have 
>>> to
>>> modify every get() and put() function that already exists in the previous 
>>> patch.
>>>
>>> Specifically, your format of QTAILQ is perfectly reasonable - a
>>> byte before each entry which is 1 to indicate there's an entry or 0
>>> to indicate termination, but there are lots of other variants, e.g.
>>>
>>>a) put_scsi_requests uses that byte to hold a flag, so it's 0,1,2
>>>   0 still means terminate but 1 or 2 set a flag in the structure.
>>
>> I quickly take a look of put_scsi_requests. It is transferring a QTAILQ of
>> SCSIRequest. However it goes into the structure inside to dump the
>> elements out.
>> If using my approach, I would have a VMSD for SCSIRequest. The
>> additional byte used to indicate the end of the queue would lie outside
>> the SCSCIRequest data block, so there would be no confusion.
> 
> Hmm OK; I don't think it's that easy but we'll see.
> 
It is more complicated if we want to use the exact stream as is now.
IMO VMStateInfo provides enough flexibility and is used for migrating
scsi_requests. I would stick with it if the same stream layout is to be
used.
> However, can I make one much simpler request; please split this patch
> so that the VMSTATE_LINKED and 
> vmstate_save_state/vmstate_load_state/vmfield_get_type_name
> are in one patch, while the QTAILQ patches are in a separate patch.
> (I'd be OK if you moved the VMSTATE_LINKED into the previous patch).
>

OK.

> I've just been thinking about a different use for the same mechanism;
> I want to do a:
>   VMSTATE_WITH_TMP(t1*, type1, type2, vmsd)
> 
> which also sets the LINKED, where the .get/.put allocate a temporary
> structure (of type/size type2), set up *tmp = t1 and then do the 
> vmstate_load/save
> using the vmsd on the temporary; something like (untested):
> 
> static int get_tmp(QEMUFile *f, void *pv, size_t unused_size, VMStateField 
> *field)
> {
> const VMStateDescription *vmsd = field->vmsd;
> size_t size = field->size;
> int version_id = field->version_id;
> void *tmp = gmalloc(size);
> int ret;
> 
> *(void **)tmp = pv;
> ret = vmstate_load_state(f, vmsd, tmp, version_id);
> gfree(tmp);
> return ret;
> }
> 
> This can be in a generic macro; and we would impose that type2 must be a 
> struct
> with the first element is 'type1* parent' (compile checked).
> This would work nicely for where we have to do some maths to generate some
> temporary results prior to migration; the .pre_save of the vmsd can read the 
> data
> from pv->parent and write it to the other fields but not have to use
> qemu_get_*/qemu_put_* at all.
>

This could be a special instance of VMStateInfo.

> Dave
> 
>>
>>>b) slirp_state_load also uses a null byte termination but not off a 
>>> QTAILQ
>>>   (although I think it could be flipped for one) (it uses '42' for the
>>>   non-0 value, but looks like it could become 1)
>>
>>>c) virtio_blk also rolls it's own linked list but again with the 0/1 byte
>>>
>>>   Now how would I modify your QTAILQ load/store to do (a) without copying 
>>> the whole
>>> thing?
>>>
>>> Dave
>>>

 Signed-off-by: Jianjun Duan 
 ---
  include/migration/vmstate.h | 26 ++
  include/qemu/queue.h| 32 ++
  migration/trace-events  |  4 +++
  migration/vmstate.c | 66 
 +
  4 files changed, 128 insertions(+)

 diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
 index 459dd4a..e60c994 100644
 --- a/includ

Re: [Qemu-devel] [PATCH v5 14/14] cryptodev: rename cryptodev stuff

2016-10-06 Thread Eric Blake

On 10/06/2016 06:36 AM, Gonglei wrote:
> Remove qcrypto and/or QCRYPTO prefix in order to
> make the name shorter because it doesn't repeat
> any information.
> 
> Signed-off-by: Gonglei 
> ---
>  backends/cryptodev-builtin.c  |  84 

This file is new as part of your series.  Please rebase this to avoid
the churn, by making it use the correct naming from the get-go rather
than an after-the-fact correction.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH v3 8/8] vmxnet3: remove unnecessary internal msix state flag

2016-10-06 Thread Dmitry Fleytman


> On 6 Oct 2016, at 17:43, Dr. David Alan Gilbert  wrote:
> 
> * Dmitry Fleytman (dmi...@daynix.com) wrote:
>> 
>>> On 30 Sep 2016, at 15:08 PM, Markus Armbruster  wrote:
>>> 
>>> Cao jin  writes:
>>> 
> On 09/29/2016 10:42 PM, Markus Armbruster wrote:
> Cao jin  writes:
> 
 
>> static int vmxnet3_post_load(void *opaque, int version_id)
>> {
>> VMXNET3State *s = opaque;
>> -PCIDevice *d = PCI_DEVICE(s);
>> 
>> net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s),
>> s->max_tx_frags, s->peer_has_vhdr);
>> net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr);
>> 
>> -if (s->msix_used) {
>> -if  (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
>> -VMW_WRPRN("Failed to re-use MSI-X vectors");
>> -msix_uninit(d, &s->msix_bar, &s->msix_bar);
>> -s->msix_used = false;
>> -return -1;
>> -}
>> -}
>> -
>> vmxnet3_validate_queues(s);
>> vmxnet3_validate_interrupts(s);
> 
> This hunk isn't obvious.  Can you explain the change?
> 
 
 flag msix_used is used in VMStateDescription.post_Load().
 
 1st, I think msix's code here is not necessary, because in
 destination, device has been realized before incoming migration, So I
 don't know why re-use MSI-X vectors here. Dmitry, could help to
 explain?
 
 2nd, this patch is going to remove this flag, so I removed the hunk.
>>> 
>>> We need to find out whether the call of vmxnet3_use_msix_vectors() is
>>> necessary.  I suspect it's not only not necessary, but actively wrong.
>>> 
>>> If that's true, removing becomes a bug fix that should be a separate
>>> patch.
>>> 
>>> If it's only unnecessary, the removal may stay in this patch, but it
>>> needs to be explained.  Separate patch might be easier to explain.  Your
>>> choice.
>>> 
>>> If it correct and necessary, then this patch needs to be changed not to
>>> drop it.  Instead, replace s->msix_used by msix_enabled(d) like you do
>>> elsewhere.
>>> 
>>> Dmitry, can you help us find out?
>> 
>> Hello,
>> 
>> Yes, from what I see, this call is wrong and leads to
>> reference leaks on device unload at migration target.
>> It should be removed.
> 
> Talking of oddities in vmxnet3's msix load/save,
> vmxnet3 has the honour of being the only device that
> has both a register_savevm (which registers vmxnet3-msix)
> and also has a ->vmsd entry (dc->vmsd = &vmstate_vmxnet3)
> 
> What's the history behind that? Is there some ordering requirement
> etc about the order the two get loaded/saved?
> 
> Dave

Hi Dave,

There is no specific history behind that. Vmxnet3 code was written long time 
ago and this part is just a legacy code that was not cleaned up as QEMU code 
base evolved.

~Dmitry

> 
>> Best regards,
>> Dmitry
>> 
>> 
>> 
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [QEMU PATCH v5 4/6] migration: migrate QTAILQ

2016-10-06 Thread Dr. David Alan Gilbert

* Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
> 
> 
> On 10/05/2016 09:56 AM, Dr. David Alan Gilbert wrote:
> > * Jianjun Duan (du...@linux.vnet.ibm.com) wrote:
> >> Currently we cannot directly transfer a QTAILQ instance because of the
> >> limitation in the migration code. Here we introduce an approach to
> >> transfer such structures. In our approach such a structure is tagged
> >> with VMS_LINKED. We then modified vmstate_save_state and vmstate_load_state
> >> so that when VMS_LINKED is encountered, put and get from VMStateInfo are
> >> called respectively. We created VMStateInfo vmstate_info_qtailq for QTAILQ.
> >> Similar VMStateInfo can be created for other data structures such as list.
> >> This approach will be used to transfer pending_events and ccs_list in spapr
> >> state.
> >>
> >> We also create some macros in qemu/queue.h to access a QTAILQ using pointer
> >> arithmetic. This ensures that we do not depend on the implementation
> >> details about QTAILQ in the migration code.
> > 
> > I think we're going to need a way to have a more flexible
> > loops; and thus my choice here wouldn't be to use the .get/.put together
> > with the VMSD; but I think we'll end up needing a new
> > data structure, maybe a VMStateLoop *loop in VMStateField.
> > 
> > So would it be easier if you added that new member, then you wouldn't have 
> > to
> > modify every get() and put() function that already exists in the previous 
> > patch.
> > 
> > Specifically, your format of QTAILQ is perfectly reasonable - a
> > byte before each entry which is 1 to indicate there's an entry or 0
> > to indicate termination, but there are lots of other variants, e.g.
> > 
> >a) put_scsi_requests uses that byte to hold a flag, so it's 0,1,2
> >   0 still means terminate but 1 or 2 set a flag in the structure.
> 
> I quickly take a look of put_scsi_requests. It is transferring a QTAILQ of
> SCSIRequest. However it goes into the structure inside to dump the
> elements out.
> If using my approach, I would have a VMSD for SCSIRequest. The
> additional byte used to indicate the end of the queue would lie outside
> the SCSCIRequest data block, so there would be no confusion.

Hmm OK; I don't think it's that easy but we'll see.

However, can I make one much simpler request; please split this patch
so that the VMSTATE_LINKED and 
vmstate_save_state/vmstate_load_state/vmfield_get_type_name
are in one patch, while the QTAILQ patches are in a separate patch.
(I'd be OK if you moved the VMSTATE_LINKED into the previous patch).

I've just been thinking about a different use for the same mechanism;
I want to do a:
  VMSTATE_WITH_TMP(t1*, type1, type2, vmsd)

which also sets the LINKED, where the .get/.put allocate a temporary
structure (of type/size type2), set up *tmp = t1 and then do the 
vmstate_load/save
using the vmsd on the temporary; something like (untested):

static int get_tmp(QEMUFile *f, void *pv, size_t unused_size, VMStateField 
*field)
{
const VMStateDescription *vmsd = field->vmsd;
size_t size = field->size;
int version_id = field->version_id;
void *tmp = gmalloc(size);
int ret;

*(void **)tmp = pv;
ret = vmstate_load_state(f, vmsd, tmp, version_id);
gfree(tmp);
return ret;
}

This can be in a generic macro; and we would impose that type2 must be a struct
with the first element is 'type1* parent' (compile checked).
This would work nicely for where we have to do some maths to generate some
temporary results prior to migration; the .pre_save of the vmsd can read the 
data
from pv->parent and write it to the other fields but not have to use
qemu_get_*/qemu_put_* at all.

Dave

> 
> >b) slirp_state_load also uses a null byte termination but not off a 
> > QTAILQ
> >   (although I think it could be flipped for one) (it uses '42' for the
> >   non-0 value, but looks like it could become 1)
> 
> >c) virtio_blk also rolls it's own linked list but again with the 0/1 byte
> > 
> >   Now how would I modify your QTAILQ load/store to do (a) without copying 
> > the whole
> > thing?
> > 
> > Dave
> > 
> >>
> >> Signed-off-by: Jianjun Duan 
> >> ---
> >>  include/migration/vmstate.h | 26 ++
> >>  include/qemu/queue.h| 32 ++
> >>  migration/trace-events  |  4 +++
> >>  migration/vmstate.c | 66 
> >> +
> >>  4 files changed, 128 insertions(+)
> >>
> >> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> >> index 459dd4a..e60c994 100644
> >> --- a/include/migration/vmstate.h
> >> +++ b/include/migration/vmstate.h
> >> @@ -186,6 +186,12 @@ enum VMStateFlags {
> >>   * to determine the number of entries in the array. Only valid in
> >>   * combination with one of VMS_VARRAY*. */
> >>  VMS_MULTIPLY_ELEMENTS = 0x4000,
> >> +/* For fields which need customized handling, such as QTAILQ in 
> >> queue.h.
> >> + * When this flag is set in VMStateField,

[Qemu-devel] [PATCH] qtest: ask endianness of the target in qtest_init()

2016-10-06 Thread Laurent Vivier

The target endianness is not deduced anymore from
the architecture name but asked directly to the guest,
using a new qtest command: "endianness". As it can't
change (this is the value of TARGET_WORDS_BIGENDIAN),
we store it to not have to ask every time we want to
know if we have to byte-swap a value.

Signed-off-by: Laurent Vivier 
CC: Greg Kurz 
CC: David Gibson 
CC: Peter Maydell 
---
Note: this patch can be seen as a v2 of
"qtest: evaluate endianness of the target in qtest_init()"
from the patch series "tests: enable virtio tests on SPAPR"
in which I have added the idea from Peter to ask the endianness
directly to the guest using a new qtest command.

 qtest.c   |   7 ++
 tests/libqos/virtio-pci.c |   2 +-
 tests/libqtest.c  | 224 --
 tests/libqtest.h  |  16 +++-
 tests/virtio-blk-test.c   |   2 +-
 5 files changed, 118 insertions(+), 133 deletions(-)

diff --git a/qtest.c b/qtest.c
index 22482cc..b53b39c 100644
--- a/qtest.c
+++ b/qtest.c
@@ -537,6 +537,13 @@ static void qtest_process_command(CharDriverState *chr, 
gchar **words)
 
 qtest_send_prefix(chr);
 qtest_send(chr, "OK\n");
+} else if (strcmp(words[0], "endianness") == 0) {
+qtest_send_prefix(chr);
+#if defined(TARGET_WORDS_BIGENDIAN)
+qtest_sendf(chr, "OK big\n");
+#else
+qtest_sendf(chr, "OK little\n");
+#endif
 #ifdef TARGET_PPC64
 } else if (strcmp(words[0], "rtas") == 0) {
 uint64_t res, args, ret;
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index 18b92b9..6e005c1 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -86,7 +86,7 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, 
uint64_t addr)
 int i;
 uint64_t u64 = 0;
 
-if (qtest_big_endian()) {
+if (target_big_endian()) {
 for (i = 0; i < 8; ++i) {
 u64 |= (uint64_t)qpci_io_readb(dev->pdev,
 (void *)(uintptr_t)addr + i) << (7 - i) * 8;
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 6f6bdf1..27cf6b1 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -37,6 +37,7 @@ struct QTestState
 bool irq_level[MAX_IRQ];
 GString *rx;
 pid_t qemu_pid;  /* our child QEMU process */
+bool big_endian;
 };
 
 static GHookList abrt_hooks;
@@ -146,89 +147,6 @@ void qtest_add_abrt_handler(GHookFunc fn, const void *data)
 g_hook_prepend(&abrt_hooks, hook);
 }
 
-QTestState *qtest_init(const char *extra_args)
-{
-QTestState *s;
-int sock, qmpsock, i;
-gchar *socket_path;
-gchar *qmp_socket_path;
-gchar *command;
-const char *qemu_binary;
-
-qemu_binary = getenv("QTEST_QEMU_BINARY");
-g_assert(qemu_binary != NULL);
-
-s = g_malloc(sizeof(*s));
-
-socket_path = g_strdup_printf("/tmp/qtest-%d.sock", getpid());
-qmp_socket_path = g_strdup_printf("/tmp/qtest-%d.qmp", getpid());
-
-sock = init_socket(socket_path);
-qmpsock = init_socket(qmp_socket_path);
-
-qtest_add_abrt_handler(kill_qemu_hook_func, s);
-
-s->qemu_pid = fork();
-if (s->qemu_pid == 0) {
-setenv("QEMU_AUDIO_DRV", "none", true);
-command = g_strdup_printf("exec %s "
-  "-qtest unix:%s,nowait "
-  "-qtest-log %s "
-  "-qmp unix:%s,nowait "
-  "-machine accel=qtest "
-  "-display none "
-  "%s", qemu_binary, socket_path,
-  getenv("QTEST_LOG") ? "/dev/fd/2" : 
"/dev/null",
-  qmp_socket_path,
-  extra_args ?: "");
-execlp("/bin/sh", "sh", "-c", command, NULL);
-exit(1);
-}
-
-s->fd = socket_accept(sock);
-if (s->fd >= 0) {
-s->qmp_fd = socket_accept(qmpsock);
-}
-unlink(socket_path);
-unlink(qmp_socket_path);
-g_free(socket_path);
-g_free(qmp_socket_path);
-
-g_assert(s->fd >= 0 && s->qmp_fd >= 0);
-
-s->rx = g_string_new("");
-for (i = 0; i < MAX_IRQ; i++) {
-s->irq_level[i] = false;
-}
-
-/* Read the QMP greeting and then do the handshake */
-qtest_qmp_discard_response(s, "");
-qtest_qmp_discard_response(s, "{ 'execute': 'qmp_capabilities' }");
-
-if (getenv("QTEST_STOP")) {
-kill(s->qemu_pid, SIGSTOP);
-}
-
-return s;
-}
-
-void qtest_quit(QTestState *s)
-{
-qtest_instances = g_list_remove(qtest_instances, s);
-g_hook_destroy_link(&abrt_hooks, g_hook_find_data(&abrt_hooks, TRUE, s));
-
-/* Uninstall SIGABRT handler on last instance */
-if (!qtest_instances) {
-cleanup_sigabrt_handler();
-}
-
-kill_qemu(s);
-close(s->fd);
-close(s->qmp_fd);
-g_string_free(s->rx, true);
-g_free(s);
-}
-
 static void socket_send(int fd, const char *buf, size_t size)

Re: [Qemu-devel] [PATCH 05/10] qemu-tech: document lazy condition code evaluation in cpu.h

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 16:24, Paolo Bonzini  wrote:
> Unlike the other sections, they are pretty specific to a particular CPU.
>
> Signed-off-by: Paolo Bonzini 
> ---
>  qemu-tech.texi | 25 -
>  target-cris/cpu.h  |  7 +++
>  target-i386/cpu.h  |  7 +++
>  target-m68k/cpu.h  |  8 
>  target-sparc/cpu.h |  5 +
>  5 files changed, 27 insertions(+), 25 deletions(-)

target-s390x/ also seems to have the CC optimization...

thanks
-- PMM

Re: [Qemu-devel] [PATCH 06/10] qemu-tech: move user mode emulation features from qemu-tech

2016-10-06 Thread Jonathan Neuschäfer

On Thu, Oct 06, 2016 at 05:24:19PM +0200, Paolo Bonzini wrote:
> These are interesting for users too, since nowadays most
> qemu-user users are going to be somewhat technical rather than
> just people that want to run Wine.  Some detail is lost, on
> the other hand some of the information I removed (e.g. basic
> block unchaining) was obsolete.
> 
> Signed-off-by: Paolo Bonzini 
> ---
[...]
> +@item Threading:
> +On Linux, QEMU can emulate the @code{clone} and create a real host

s/@code{clone}/@code{clone} syscall/ ?

Thanks for reworking the documentation!


Regards,
Jonathan Neuschäfer


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v2 02/11] blockjob: centralize QMP event emissions

2016-10-06 Thread John Snow




On 10/06/2016 02:16 PM, Eric Blake wrote:

On 10/06/2016 11:57 AM, John Snow wrote:

Mirrors "internal bitmaps," too.

I can rig it such that if a job has no ID, it will cease to show up via
query and no longer emit events.

Downside: Whether or not a device is busy or can accept another job
becomes opaque to the management layer.


A bit, but isn't the point of this exercise to ultimately reach the
point where we can support multiple simultaneous jobs, so long as
op-blockers prove the jobs don't collide?



Yes, exactly. It's only a temporary drawback. If libvirt can cope for 
now, I am happy.

[Qemu-devel] [PATCHv3] Reducing stack frame size in stream_process_mem2s()

2016-10-06 Thread rutu . shah . 26

From: Rutuja Shah 

This patch allocates memory for txbuf in struct Stream rather than the stack.
As a result, the stack frame size is reduced of stream_process_mem2s().

Signed-off-by: Rutuja Shah 
Reviewed-by: Edgar E. Iglesias 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Alistair Francis 
---
 hw/dma/xilinx_axidma.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/dma/xilinx_axidma.c b/hw/dma/xilinx_axidma.c
index b135a5f..6065689 100644
--- a/hw/dma/xilinx_axidma.c
+++ b/hw/dma/xilinx_axidma.c
@@ -111,6 +111,7 @@ struct Stream {
 unsigned int complete_cnt;
 uint32_t regs[R_MAX];
 uint8_t app[20];
+unsigned char txbuf[16 * 1024];
 };
 
 struct XilinxAXIDMAStreamSlave {
@@ -256,7 +257,6 @@ static void stream_process_mem2s(struct Stream *s, 
StreamSlave *tx_data_dev,
  StreamSlave *tx_control_dev)
 {
 uint32_t prev_d;
-unsigned char txbuf[16 * 1024];
 unsigned int txlen;
 
 if (!stream_running(s) || stream_idle(s)) {
@@ -277,17 +277,17 @@ static void stream_process_mem2s(struct Stream *s, 
StreamSlave *tx_data_dev,
 }
 
 txlen = s->desc.control & SDESC_CTRL_LEN_MASK;
-if ((txlen + s->pos) > sizeof txbuf) {
+if ((txlen + s->pos) > sizeof s->txbuf) {
 hw_error("%s: too small internal txbuf! %d\n", __func__,
  txlen + s->pos);
 }
 
 cpu_physical_memory_read(s->desc.buffer_address,
- txbuf + s->pos, txlen);
+ s->txbuf + s->pos, txlen);
 s->pos += txlen;
 
 if (stream_desc_eof(&s->desc)) {
-stream_push(tx_data_dev, txbuf, s->pos);
+stream_push(tx_data_dev, s->txbuf, s->pos);
 s->pos = 0;
 stream_complete(s);
 }
-- 
1.9.1

Re: [Qemu-devel] [PATCH v2 02/11] blockjob: centralize QMP event emissions

2016-10-06 Thread Eric Blake

On 10/06/2016 11:57 AM, John Snow wrote:
> Mirrors "internal bitmaps," too.
> 
> I can rig it such that if a job has no ID, it will cease to show up via
> query and no longer emit events.
> 
> Downside: Whether or not a device is busy or can accept another job
> becomes opaque to the management layer.

A bit, but isn't the point of this exercise to ultimately reach the
point where we can support multiple simultaneous jobs, so long as
op-blockers prove the jobs don't collide?

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 3/4] sockets: add AF_VSOCK support

2016-10-06 Thread Eric Blake

On 10/06/2016 11:40 AM, Stefan Hajnoczi wrote:
> Add the AF_VSOCK address family so that qemu-ga will be able to use
> virtio-vsock.
> 
> The AF_VSOCK address family uses  address tuples.  The cid is
> the unique identifier comparable to an IP address.  AF_VSOCK does not
> use name resolution so it's seasy to convert between struct sockaddr_vm

s/seasy/easy/

> and strings.
> 
> This patch defines a VsockSocketAddress instead of trying to piggy-back
> on InetSocketAddress.  This is cleaner in the long run since it avoids
> lots of IPv4 vs IPv6 vs vsock special casing.

At any rate, it seems like SocketAddress would be a better fit for a
tri-state union between InetSocketAddress, UnixSocketAddress, and
VnetSocketAddress.

> 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  qapi-schema.json|  23 +-
>  util/qemu-sockets.c | 222 
> 
>  2 files changed, 244 insertions(+), 1 deletion(-)
> 

> +##
>  # @SocketAddress
>  #
>  # Captures the address of a socket, which could also be a named file 
> descriptor
> @@ -3027,6 +3047,7 @@
>'data': {
>  'inet': 'InetSocketAddress',
>  'unix': 'UnixSocketAddress',
> +'vsock': 'VsockSocketAddress',
>  'fd': 'String' } }

Which is in fact what you did.


> +static int vsock_connect_addr(const struct sockaddr_vm *svm, bool 
> *in_progress,
> +  ConnectState *connect_state, Error **errp)
> +{
> +int sock, rc;
> +
> +*in_progress = false;
> +
> +sock = qemu_socket(AF_VSOCK, SOCK_STREAM, 0);
> +if (sock < 0) {
> +error_setg_errno(errp, errno, "Failed to create socket");
> +return -1;
> +}
> +if (connect_state != NULL) {
> +qemu_set_nonblock(sock);

Isn't the presence of vsock support sufficient to prove that we have
SOCK_NONBLOCK support as part of our socket() call?  In which case,
wouldn't it be better to pass that option up front to atomically get a
non-blocking socket, rather than having to change its state after the fact?


> +static VsockSocketAddress *vsock_parse(const char *str, Error **errp)
> +{
> +VsockSocketAddress *addr = NULL;
> +char cid[33];
> +char port[33];
> +
> +if (sscanf(str, "%32[^:]:%32[^,]", cid, port) != 2) {

Would it be a wise idea to also use %n to ensure that you aren't
ignoring trailing garbage?

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 0/3] preliminaries for GICv3 virt support

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 18:48, Peter Maydell  wrote:
> Trying the kvm-unit-tests, the simple 'setup' test
> works OK, but the one which tries to PSCI boot other SMP cores
> fails.

This is probably because the outer QEMU is intercepting those
PSCI calls and executing them itself rather than allowing the
EL2 guest to do the work...

-- PMM

Re: [Qemu-devel] [Qemu-block] [PATCH v14 10/21] qapi: permit auto-creating nested structs

2016-10-06 Thread Eric Blake

On 10/06/2016 10:30 AM, Kevin Wolf wrote:

>>> So, considering that it is a purely internally used type not visible in
>>> QMP, would it make sense to change NetLegacy to be a flat union instead,
>>> with NetLegacyOptions as the common base? Then you get the same flat
>>> namespace that we always had and that makes much more sense as an API.
>>
>> Changing that will impact on the QMP data structure, so I don't think
>> we can do that.
> 
> I don't see this type used in QMP at all. It's only used for command
> line parsing, and only with the OptsVisitor, so I think we're fine if we
> flatten it now.

In fact, in all my work to move netdev_add towards QAPI, I intentionally
special-cased NetLegacy to be unchanged, because it was not being used
by QMP at the time, and I didn't want any QMP changes to netdev to break
command line usage of NetLegacy.

We still have the annoying problem that my last patch for converting
netdev_add to QAPI didn't make 2.7, because we hadn't sorted out whether
we wanted to be able to handle back-compat of a user that requested "1"
vs. 1 (the QemuOpts code accepted either spelling, by virtue of the fact
that QDict to opts conversion rewrote the parsed QMP object into all
strings); and maybe this series solves that issue.  But the issue for
netdev_add (which IS visible to QMP) is slightly different than the
issue for NetLegacy (which does not have QMP ties other than using QAPI
to define a struct and glue code in net.c to map it back to normal
netdev code).

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Programmingkid


On Oct 6, 2016, at 1:49 PM, Peter Maydell wrote:

> On 6 October 2016 at 18:43, Programmingkid  wrote:
>> @ Peter I don't know how to implement this in GTK. The GTK GUI maintainer 
>> doesn't even
>> reply to my emails. Could we start this in Cocoa and then port it to GTK? I 
>> think
>> the time is right. QEMU has progressed to the point that the save and restore
>> feature works. A lot of users would appreciate this feature if it were more
>> accessible.
> 
> I'm afraid I don't have time to design-review and code-review significant
> changes to the cocoa UI. So you would need to find somebody else to
> review it anyway, in which case you might as well start with GTK
> or whatever that reviewer's preference for UI frontend is.

Is there another person you could suggest? Gerd seems too unavailable for this.

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 18:43, Programmingkid  wrote:
> @ Peter I don't know how to implement this in GTK. The GTK GUI maintainer 
> doesn't even
> reply to my emails. Could we start this in Cocoa and then port it to GTK? I 
> think
> the time is right. QEMU has progressed to the point that the save and restore
> feature works. A lot of users would appreciate this feature if it were more
> accessible.

I'm afraid I don't have time to design-review and code-review significant
changes to the cocoa UI. So you would need to find somebody else to
review it anyway, in which case you might as well start with GTK
or whatever that reviewer's preference for UI frontend is.

thanks
-- PMM

Re: [Qemu-devel] [PATCH 0/3] preliminaries for GICv3 virt support

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 17:59, Edgar E. Iglesias  wrote:
> On Thu, Oct 06, 2016 at 02:21:04PM +0100, Peter Maydell wrote:
>> This set of three straightforward patches is a preliminary
>> for adding virtualization support to the GICv3 emulation:
>>  * add a (nop implementation of) MDCCINT_EL1, since KVM
>>will read/write it on worldswitch
>>  * fix some bugs in the GICv3 trace events
>>  * add trace events for the generic timers
>>(which I have been using for debugging)
>>
>> I actually have almost all of the GICv3 virt code written,
>> but it currently has bugs which mean that a guest kernel
>> under KVM won't boot. Debugging in progress...
>
>
> That is very cool, we could soon enable EL2 :-)
>
> What kind of issues are you seeing?

The guest kernel just sits there like a lemon without printing
anything. Trying the kvm-unit-tests, the simple 'setup' test
works OK, but the one which tries to PSCI boot other SMP cores
fails.

> FWIW with our out of tree GICv2 virt models we've got issues
> with SMP Xen were things go nuts some times with virtual
> timer interrupts. Some times they take for ever to hit,
> like if we loose events.

Have you tried undoing the broken bit of virt.c that
marks the timer interrupts as edge triggered ?
(see 'hw/arm/virt: Don't incorrectly claim architectural timer
to be edge-triggered' in the branch below).

> Last time I looked at it, I noticed that our GICv2 virt
> implementation of the APR regs and EOIR stuff seems totally
> bogus (my bad).

Those were dodgy in original GICv2 for a long time...

https://git.linaro.org/people/peter.maydell/qemu-arm.git gicv3-virt
if you're interested in looking at my work-in-progress, though
as I say it is clearly badly broken right now.

thanks
-- PMM

Re: [Qemu-devel] [PATCH V6 2/2] arm: virt: add PMU property to mach-virt machine type

2016-10-06 Thread Andrea Bolognani

On Tue, 2016-10-04 at 17:38 -0400, Wei Huang wrote:
> CPU vPMU is now turned off by default, but it was ON in virt-2.7
> machine type. To solve this problem, this patch adds a PMU option
> in machine state, which is used to control CPU's vPMU status. This
> PMU option is not exposed to command line and is turned on in
> virt-2.7 machine type to make sure it is backward compatible.
> 
> Signed-off-by: Wei Huang 
> ---
>  hw/arm/virt.c | 17 +
>  1 file changed, 17 insertions(+)

On both Moonshot and ThunderX,

Tested-by: Andrea Bolognani 

-- 
Andrea Bolognani / Red Hat / Virtualization

Re: [Qemu-devel] [PATCH V6 1/2] arm64: Add an option to turn on/off vPMU support

2016-10-06 Thread Andrea Bolognani

On Tue, 2016-10-04 at 17:38 -0400, Wei Huang wrote:
> This patch adds a pmu=[on/off] option to enable/disable vPMU support
> in guest vCPU. This option is only available for cortex-a57/cortex-53/
> host under both TCG and KVM modes, but unavailable on ARMv7 and other
> processors. It allows virt tools, such as libvirt, to determine the
> exsitence of vPMU and configure it. Note that, if nothing specified,
> the pmu option is set to AUTO as default, allowing machine-level PMU
> property to override it. Also when pmu is turned on under non-KVM mode,
> a warning message will be printed.
> 
> Signed-off-by: Wei Huang 
> ---
>  hw/arm/virt-acpi-build.c |  2 +-
>  hw/arm/virt.c|  2 +-
>  target-arm/cpu.c | 23 +++
>  target-arm/cpu.h |  3 ++-
>  target-arm/cpu64.c   |  2 ++
>  target-arm/kvm64.c   | 17 ++---
>  6 files changed, 43 insertions(+), 6 deletions(-)

On both Moonshot and ThunderX,

Tested-by: Andrea Bolognani 

-- 
Andrea Bolognani / Red Hat / Virtualization

Re: [Qemu-devel] Adding Save States menu items

2016-10-06 Thread Programmingkid


On Oct 6, 2016, at 12:17 PM, Paolo Bonzini wrote:

> 
> 
> On 06/10/2016 18:04, Programmingkid wrote:
>> 'Save State...' would call something equivalent to the monitor command 
>> savevm.
>> It would display a dialog box asking to give a name to this save. The default
>> name would probably be "save state mm-dd- - n". The m,d,y stand
>> for month, day, and year respectively. The n would be the save state
>> count for that day.
>> 
>> 'Restore state...' would probably display a dialog box with all the save
>> states displayed in a list. The user could select one of them and
>> QEMU would restore that state.
> 
> These are of course savevm and loadvm.
> 
>> 'Manage Save States...' would display a dialog box with all the saves
>> states displayed in a list. There would be buttons on the bottom called 
>> "Delete", "Rename",
>> and "Duplicate". They would give the user the options that can
>> be done to each save state. The Delete button would correspond to
>> the monitor command delvm "name". Rename and Duplicate would have to
>> be researched a little more before I can say how they would be implemented.
> 
> I think Delete is enough.
> 
>> Code in the gui would detect if the hard drive image was in a format that
>> supported save states. The menu items would probably be grayed out if
>> save states could not work with the current HD image file. 
> 
> Ok, this makes sense.  Thanks for explaining it!  I think this would be
> nice to have in the GTK+ interface.
> 
> Paolo

@ Paolo Thanks for the help.

@ Peter I don't know how to implement this in GTK. The GTK GUI maintainer 
doesn't even
reply to my emails. Could we start this in Cocoa and then port it to GTK? I 
think
the time is right. QEMU has progressed to the point that the save and restore
feature works. A lot of users would appreciate this feature if it were more
accessible.

Re: [Qemu-devel] vfio/pci: IGD assignment

2016-10-06 Thread nicolas prochazka

Hello,
I'm just add
{ PCI_VENDOR_ID_INTEL, 0x9d70, pci_quirk_mf_endpoint_acs },
in drivers/pci/quirks.c   for the sound card on intel nuc skylake.
This issues is present on broadwell, but it's very strange behaviour
because it depends how the windows igd drivers seems to be installed.
I'm trying to investigate, now i known there's no vfio modification in
kernel code
Regards,
Nicolas

2016-10-06 17:56 GMT+02:00 Alex Williamson :

> On Thu, 6 Oct 2016 08:06:24 +0200
> nicolas prochazka  wrote:
>
> > Hello,
> > Since I upgrade my linux kernel from linux 4.8rc1 to 4.8.0, I can observe
> > regression with VFIO/PCI IGD Assignment .
> > All works very well with linux 4.8rc1 ( broadwell/skylake cpu + win10
> guest
> > / Linux Guest ) .
> > Since linux 4.8.0, Windows guest screen is a " blink pixel , multi color
> > blink , a matrix revisited.
> >
> > Only linux kernel change between two test,
> > no log in qemu or dmesg.
> >
> > Is there modification to vfio in kernel between 4.8rc1 and 4.8.0 ? , can
> i
> > test something.
>
> Hmm, I was running 4.8-rc5 on my system, I upgraded to v4.8 and it
> still works.  There's only one change in drivers/vfio from rc1 to 4.8:
>
> $ git log --oneline v4.8-rc1..v4.8 drivers/vfio
> c8952a7 vfio/pci: Fix NULL pointer oops in error interrupt setup handling
>
> I notice you're also assigning device 1f.3, which means you're using
> the acs override patch, which means you're not running a stock v4.8.
> Thanks,
>
> Alex
>

Re: [Qemu-devel] [PATCH v2 02/11] blockjob: centralize QMP event emissions

2016-10-06 Thread John Snow




On 10/06/2016 03:44 AM, Kevin Wolf wrote:

Am 05.10.2016 um 20:49 hat John Snow geschrieben:

On 10/05/2016 09:43 AM, Kevin Wolf wrote:

Am 01.10.2016 um 00:00 hat John Snow geschrieben:

@@ -3136,10 +3111,10 @@ void qmp_block_commit(bool has_job_id, const char 
*job_id, const char *device,
goto out;
}
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
-on_error, block_job_cb, bs, &local_err, false);
+on_error, NULL, bs, &local_err, false);


Here we have an additional caller in block/replication.c and qemu-img,
so the parameters must stay. For qemu-img, nothing changes. For
replication, the block job events are added as a side effect.

Not sure if we want to emit such events for an internal block job, but
if we do want the change, it should be explicit.



Hmm, do we want to make it so some jobs are invisible and others are
not? Because as it stands right now, neither case is strictly true.
We only emit cancelled/completed events if it was started via QMP,
however we do emit events for error and ready regardless of who
started the job.

That didn't seem particularly consistent to me; either all events
should be controlled by the job layer itself or none of them should
be.


Yes, I agree. The use of block jobs in replication is rather broken and
we should change it one way or another. But I'd prefer to do so
explicitly instead of doing it as a side-effect of a patch like this
one.



I can always split this patch out and CC Wen, Eric, Markus et al and 
adjust the commit message to be explicit.



I opted for "all."

For "internal" jobs that did not previously emit any events, is it
not true that these jobs still appear in the block job list and are
effectively public regardless? I'd argue that these messages may be
of value for management utilities who are still blocked by these
jobs whether or not they are 'internal' or not.

I'll push for keeping it mandatory and explicit. If it becomes a
problem, we can always add a 'silent' job property that silences ALL
qmp events, including all completion, error, and ready notices.


Actually, there is at least one other reason why the block jobs in
replication are a bad a idea as they are today: Job naming. Currently
they use a fixed string, conflicting with the user-controlled job
namespace and with itself (i.e. restricting replication to a single
disk).

And are we really prepared to handle cases where the user decides to
pause, complete or cancel an internal job?

I think we should really hide them from the user. And maybe the way to
do so isn't a bool job->user flag, but actually job->id = NULL. Then it
would work the same way as named/internal BlockBackends do and we would
get rid of the naming problem, too.

Kevin



Mirrors "internal bitmaps," too.

I can rig it such that if a job has no ID, it will cease to show up via 
query and no longer emit events.


Downside: Whether or not a device is busy or can accept another job 
becomes opaque to the management layer.


--js

Re: [Qemu-devel] [PATCH v2 00/12] virtio migration: simplify vmstate helper

2016-10-06 Thread Halil Pasic



On 10/06/2016 05:30 PM, Paolo Bonzini wrote:
> 
> 
> On 06/10/2016 14:55, Halil Pasic wrote:
>>
>> Let us simplify a couple of things and get rid of some code duplication.
>>
>> NOTE: This series is exploring the suggestions of Paolo (I did my best
>> to do everything as requested). I still think that we are better of with
>> a macro that with spelling out the VMStateDescription for each device
>> separately and redundantly. The LOC balance of the previous version was
>> -41, this version is at +14 because of the expanded macros.  IMHO the
>> readability benefit of spelling out the vmsd definitions is questionabe
>> (but it is beneficial if using ctags or grep). I hope for a good
>> discussion, but I can live with this version too.
>>
>> v1 --> v2:
>> * export VMStateInfo instead of helpers
>> * change semantic of VMSTATE_VIRTIO_DEVICE
>> * drop VIRTIO_DEF_DEVICE_VMSD macro, use its expansion instead
> 
> Yes, this is what I meant...  I personally like that everything is
> spelled out in
> 
> +static const VMStateDescription vmstate_virtio_blk = {
> +.name = "virtio-blk",
> +.minimum_version_id = 2,
> +.version_id = 2,
> +.fields = (VMStateField[]) {
> +VMSTATE_VIRTIO_DEVICE,
> +VMSTATE_END_OF_LIST()
> +},
> +};
> 

Is a valid point :). There is one important thing that is not spelled
out here, and that's the state specific to virtio_blk. In my opinion
this definition suggests that virtio_blk has no state beyond its
base's (in therms of inheritance, that is virtio core+transport) state, 
and this is wrong. That is probably the main reason why I prefer
the 'magic macro' (others are my preference for the concise, and for
making similar stuff look similar, and dissimilar things look different).

This however can be remedied if we do:
in virtio.h:
extern const VMStateField virtio_vmstate_fields[];
in virtio.c:
const VMStateField virtio_vmstate_fields[] = {
VMSTATE_VIRTIO_DEVICE,
VMSTATE_END_OF_LIST()
};

in virtio-blk.c:
static const VMStateDescription vmstate_virtio_blk = {
.name = "virtio-blk",
.minimum_version_id = 2,
.version_id = 2,
.fields = virtio_vmstate_fields,
};

And would need to fix const correctness for VMStateField in
vmstate.c and vmstate.h.

I have never thought of this because of the const but right
how I feel like I like this option the best:
* it works with grep and ctags
* its absolutely flexible
* the oddity of irtio_vmstate_fields can be documented at
  the declaration/definition site
* we could later use VMSTATE_VIRTIO_DEVICE with the usual VMSTATE_PCI_DEVICE
  like semantic for the new devices once we establish a new migration schema
  in which the the derived specifies its state using the fields member of
  its vmsd
* redundancy is minimal:
** having separate control over minimum_version_id and version_id
   seems appropriate to me and having it written out improves readability
** establishing a naming convention for the vmsd is not so important and
   the one line we can spare there with the macro is not worth it

What do you thing guys should I make a v3 along this path?

Cheers,
Halil

Re: [Qemu-devel] [PATCH 1/3] target-arm: Implement dummy MDCCINT_EL1

2016-10-06 Thread Edgar E. Iglesias

On Thu, Oct 06, 2016 at 02:21:05PM +0100, Peter Maydell wrote:
> MDCCINT_EL1 is part of the DCC debugger communication
> channel between the CPU and an attached external debugger.
> QEMU doesn't implement this, but since Linux may try
> to access this register we need to provide at least
> a dummy implementation.
> 
> Signed-off-by: Peter Maydell 

Reviewed-by: Edgar E. Iglesias 


> ---
>  target-arm/helper.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 25f612d..23792ab 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -4060,6 +4060,14 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
>.cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
>.access = PL1_RW, .accessfn = access_tda,
>.type = ARM_CP_NOP },
> +/* Dummy MDCCINT_EL1, since we don't implement the Debug Communications
> + * Channel but Linux may try to access this register. The 32-bit
> + * alias is DBGDCCINT.
> + */
> +{ .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH,
> +  .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
> +  .access = PL1_RW, .accessfn = access_tda,
> +  .type = ARM_CP_NOP },
>  REGINFO_SENTINEL
>  };
>  
> -- 
> 2.7.4
>

Re: [Qemu-devel] [PATCH 2/3] target-arm: Add trace events for the generic timers

2016-10-06 Thread Edgar E. Iglesias

On Thu, Oct 06, 2016 at 02:21:06PM +0100, Peter Maydell wrote:
> Add some useful trace events for the ARM generic timers (notably
> the various register writes and the resulting IRQ line state).

Reviewed-by: Edgar E. Iglesias 


> 
> Signed-off-by: Peter Maydell 
> ---
>  Makefile.objs   |  1 +
>  target-arm/helper.c | 20 
>  2 files changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index 02fb8e7..69fdd48 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -155,6 +155,7 @@ trace-events-y += hw/alpha/trace-events
>  trace-events-y += ui/trace-events
>  trace-events-y += audio/trace-events
>  trace-events-y += net/trace-events
> +trace-events-y += target-arm/trace-events
>  trace-events-y += target-i386/trace-events
>  trace-events-y += target-sparc/trace-events
>  trace-events-y += target-s390x/trace-events
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 23792ab..5fcdc2b 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -1,4 +1,5 @@
>  #include "qemu/osdep.h"
> +#include "trace.h"
>  #include "cpu.h"
>  #include "internals.h"
>  #include "exec/gdbstub.h"
> @@ -1560,10 +1561,13 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
>  /* Note that this must be unsigned 64 bit arithmetic: */
>  int istatus = count - offset >= gt->cval;
>  uint64_t nexttick;
> +int irqstate;
>  
>  gt->ctl = deposit32(gt->ctl, 2, 1, istatus);
> -qemu_set_irq(cpu->gt_timer_outputs[timeridx],
> - (istatus && !(gt->ctl & 2)));
> +
> +irqstate = (istatus && !(gt->ctl & 2));
> +qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate);
> +
>  if (istatus) {
>  /* Next transition is when count rolls back over to zero */
>  nexttick = UINT64_MAX;
> @@ -1580,11 +1584,13 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
>  nexttick = INT64_MAX / GTIMER_SCALE;
>  }
>  timer_mod(cpu->gt_timer[timeridx], nexttick);
> +trace_arm_gt_recalc(timeridx, irqstate, nexttick);
>  } else {
>  /* Timer disabled: ISTATUS and timer output always clear */
>  gt->ctl &= ~4;
>  qemu_set_irq(cpu->gt_timer_outputs[timeridx], 0);
>  timer_del(cpu->gt_timer[timeridx]);
> +trace_arm_gt_recalc_disabled(timeridx);
>  }
>  }
>  
> @@ -1610,6 +1616,7 @@ static void gt_cval_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>int timeridx,
>uint64_t value)
>  {
> +trace_arm_gt_cval_write(timeridx, value);
>  env->cp15.c14_timer[timeridx].cval = value;
>  gt_recalc_timer(arm_env_get_cpu(env), timeridx);
>  }
> @@ -1629,6 +1636,7 @@ static void gt_tval_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  {
>  uint64_t offset = timeridx == GTIMER_VIRT ? env->cp15.cntvoff_el2 : 0;
>  
> +trace_arm_gt_tval_write(timeridx, value);
>  env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) - offset +
>   sextract64(value, 0, 32);
>  gt_recalc_timer(arm_env_get_cpu(env), timeridx);
> @@ -1641,6 +1649,7 @@ static void gt_ctl_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  ARMCPU *cpu = arm_env_get_cpu(env);
>  uint32_t oldval = env->cp15.c14_timer[timeridx].ctl;
>  
> +trace_arm_gt_ctl_write(timeridx, value);
>  env->cp15.c14_timer[timeridx].ctl = deposit64(oldval, 0, 2, value);
>  if ((oldval ^ value) & 1) {
>  /* Enable toggled */
> @@ -1649,8 +1658,10 @@ static void gt_ctl_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  /* IMASK toggled: don't need to recalculate,
>   * just set the interrupt line based on ISTATUS
>   */
> -qemu_set_irq(cpu->gt_timer_outputs[timeridx],
> - (oldval & 4) && !(value & 2));
> +int irqstate = (oldval & 4) && !(value & 2);
> +
> +trace_arm_gt_imask_toggle(timeridx, irqstate);
> +qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate);
>  }
>  }
>  
> @@ -1715,6 +1726,7 @@ static void gt_cntvoff_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  {
>  ARMCPU *cpu = arm_env_get_cpu(env);
>  
> +trace_arm_gt_cntvoff_write(value);
>  raw_write(env, ri, value);
>  gt_recalc_timer(cpu, GTIMER_VIRT);
>  }
> -- 
> 2.7.4
>

[Qemu-devel] [PATCH 0/4] qga: add vsock-listen

2016-10-06 Thread Stefan Hajnoczi

This patch series adds virtio-vsock support to the QEMU guest agent.

  $ qemu-system-x86_64 -device vhost-vsock-pci,guest-cid=3 ...
  (guest)# qemu-ga -m vsock-listen -p 3:1234

You can interact with the qga monitor using the nc-vsock utility:
https://raw.githubusercontent.com/stefanha/linux/dd0d6a2aa62c0fd6cdc9dbd4b3dc4bfd0828c329/nc-vsock.c

  $ nc-vsock 3 1234
  {'execute': 'guest-info'}
  ...

For more information about virtio-vsock, see
http://qemu-project.org/Features/VirtioVsock.

Stefan Hajnoczi (4):
  qga: drop unused sockaddr in accept(2) call
  qga: drop unnecessary GA_CHANNEL_UNIX_LISTEN checks
  sockets: add AF_VSOCK support
  qga: add vsock-listen method

 qapi-schema.json|  23 +-
 qga/channel-posix.c |  36 +++--
 qga/channel.h   |   1 +
 qga/main.c  |   6 +-
 util/qemu-sockets.c | 222 
 5 files changed, 277 insertions(+), 11 deletions(-)

-- 
2.7.4

Re: [Qemu-devel] [PATCH] MAINTAINERS: qemu-trivial information

2016-10-06 Thread Eric Blake

On 10/06/2016 09:43 AM, Laurent Vivier wrote:
> Information about "qemu-trivial" ML can be found in the wiki:
> 
> http://wiki.qemu.org/Contribute/TrivialPatches
> 
> But the first place where a developer looks is the file MAINTAINERS.
> 
> This also allows the get_maintainer.pl script to display
> the qemu-trivial ML address when the mail subject contains "trivial".
> 

Does order of sections in the file matter? Should the trivial hunk be
place near the beginning of the file, rather than the (current) end
(which will likely devolve to some random place in the middle)?

> Signed-off-by: Laurent Vivier 
> ---
>  MAINTAINERS | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 9bca506..f58cda9 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1720,3 +1720,12 @@ Docker based testing framework and cases
>  M: Fam Zheng 
>  S: Maintained
>  F: tests/docker/
> +
> +TRIVIAL PATCHES
> +---
> +Trivial patches
> +M: Michael Tokarev 
> +S: Maintained
> +L: qemu-triv...@nongnu.org
> +K: ^Subject:.*(?i)trivial
> +T: git git://git.corpit.ru/qemu.git trivial-patches
> 

The idea is cool, even if the placement can be improved.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-devel] [PATCH 3/4] sockets: add AF_VSOCK support

2016-10-06 Thread Stefan Hajnoczi

Add the AF_VSOCK address family so that qemu-ga will be able to use
virtio-vsock.

The AF_VSOCK address family uses  address tuples.  The cid is
the unique identifier comparable to an IP address.  AF_VSOCK does not
use name resolution so it's seasy to convert between struct sockaddr_vm
and strings.

This patch defines a VsockSocketAddress instead of trying to piggy-back
on InetSocketAddress.  This is cleaner in the long run since it avoids
lots of IPv4 vs IPv6 vs vsock special casing.

Signed-off-by: Stefan Hajnoczi 
---
 qapi-schema.json|  23 +-
 util/qemu-sockets.c | 222 
 2 files changed, 244 insertions(+), 1 deletion(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index c3dcf11..8864a96 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -987,12 +987,14 @@
 #
 # @unix: unix socket
 #
+# @vsock: vsock family (since 2.8)
+#
 # @unknown: otherwise
 #
 # Since: 2.1
 ##
 { 'enum': 'NetworkAddressFamily',
-  'data': [ 'ipv4', 'ipv6', 'unix', 'unknown' ] }
+  'data': [ 'ipv4', 'ipv6', 'unix', 'vsock', 'unknown' ] }
 
 ##
 # @VncBasicInfo
@@ -3017,6 +3019,24 @@
 'path': 'str' } }
 
 ##
+# @VsockSocketAddress
+#
+# Captures a socket address in the vsock namespace.
+#
+# @cid: unique host identifier
+# @port: port
+#
+# Note that string types are used to allow for possible future hostname or
+# service resolution support.
+#
+# Since 2.8
+##
+{ 'struct': 'VsockSocketAddress',
+  'data': {
+'cid': 'str',
+'port': 'str' } }
+
+##
 # @SocketAddress
 #
 # Captures the address of a socket, which could also be a named file descriptor
@@ -3027,6 +3047,7 @@
   'data': {
 'inet': 'InetSocketAddress',
 'unix': 'UnixSocketAddress',
+'vsock': 'VsockSocketAddress',
 'fd': 'String' } }
 
 ##
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 6db48b3..46cd1ba 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -17,6 +17,10 @@
  */
 #include "qemu/osdep.h"
 
+#ifdef AF_VSOCK
+#include 
+#endif /* AF_VSOCK */
+
 #include "monitor/monitor.h"
 #include "qapi/error.h"
 #include "qemu/sockets.h"
@@ -75,6 +79,9 @@ NetworkAddressFamily inet_netfamily(int family)
 case PF_INET6: return NETWORK_ADDRESS_FAMILY_IPV6;
 case PF_INET:  return NETWORK_ADDRESS_FAMILY_IPV4;
 case PF_UNIX:  return NETWORK_ADDRESS_FAMILY_UNIX;
+#ifdef AF_VSOCK
+case PF_VSOCK: return NETWORK_ADDRESS_FAMILY_VSOCK;
+#endif /* AF_VSOCK */
 }
 return NETWORK_ADDRESS_FAMILY_UNKNOWN;
 }
@@ -650,6 +657,176 @@ int inet_connect(const char *str, Error **errp)
 return sock;
 }
 
+#ifdef AF_VSOCK
+static bool vsock_parse_vaddr_to_sockaddr(const VsockSocketAddress *vaddr,
+  struct sockaddr_vm *svm,
+  Error **errp)
+{
+unsigned long long val;
+
+memset(svm, 0, sizeof(*svm));
+svm->svm_family = AF_VSOCK;
+
+if (parse_uint_full(vaddr->cid, &val, 10) < 0 ||
+val > UINT32_MAX) {
+error_setg(errp, "Failed to parse cid '%s'", vaddr->cid);
+return false;
+}
+svm->svm_cid = val;
+
+if (parse_uint_full(vaddr->port, &val, 10) < 0 ||
+val > UINT32_MAX) {
+error_setg(errp, "Failed to parse port '%s'", vaddr->port);
+return false;
+}
+svm->svm_port = val;
+
+return true;
+}
+
+static int vsock_connect_addr(const struct sockaddr_vm *svm, bool *in_progress,
+  ConnectState *connect_state, Error **errp)
+{
+int sock, rc;
+
+*in_progress = false;
+
+sock = qemu_socket(AF_VSOCK, SOCK_STREAM, 0);
+if (sock < 0) {
+error_setg_errno(errp, errno, "Failed to create socket");
+return -1;
+}
+if (connect_state != NULL) {
+qemu_set_nonblock(sock);
+}
+/* connect to peer */
+do {
+rc = 0;
+if (connect(sock, (const struct sockaddr *)svm, sizeof(*svm)) < 0) {
+rc = -errno;
+}
+} while (rc == -EINTR);
+
+if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+connect_state->fd = sock;
+qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state);
+*in_progress = true;
+} else if (rc < 0) {
+error_setg_errno(errp, errno, "Failed to connect socket");
+closesocket(sock);
+return -1;
+}
+return sock;
+}
+
+static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp,
+   NonBlockingConnectHandler *callback,
+   void *opaque)
+{
+struct sockaddr_vm svm;
+int sock = -1;
+bool in_progress;
+ConnectState *connect_state = NULL;
+
+if (!vsock_parse_vaddr_to_sockaddr(vaddr, &svm, errp)) {
+return -1;
+}
+
+if (callback != NULL) {
+connect_state = g_malloc0(sizeof(*connect_state));
+connect_state->callback = callback;
+connect_state->opaque = opaque;
+}
+
+sock = vsock_connect_ad

[Qemu-devel] [PATCH 1/5] target-tricore: Added FTOUZ instruction

2016-10-06 Thread Bastian Koppelmann

Converts a 32-bit floating point number to an unsigned int. The
result is rounded towards zero.

Signed-off-by: Bastian Koppelmann 
---
 target-tricore/fpu_helper.c | 42 ++
 target-tricore/helper.h |  1 +
 target-tricore/translate.c  |  3 +++
 3 files changed, 46 insertions(+)

diff --git a/target-tricore/fpu_helper.c b/target-tricore/fpu_helper.c
index 98fe947..56a26eb 100644
--- a/target-tricore/fpu_helper.c
+++ b/target-tricore/fpu_helper.c
@@ -215,3 +215,45 @@ uint32_t helper_itof(CPUTriCoreState *env, uint32_t arg)
 }
 return (uint32_t)f_result;
 }
+
+uint32_t helper_ftouz(CPUTriCoreState *env, uint32_t arg)
+{
+float32 f_arg = make_float32(arg);
+uint32_t result;
+int32_t flags;
+result = float32_to_uint32_round_to_zero(f_arg, &env->fp_status);
+
+flags = f_get_excp_flags(env);
+if (flags) {
+if (float32_is_any_nan(f_arg)) {
+flags |= float_flag_invalid;
+result = 0;
+/* f_real(D[a]) < 0.0 */
+} else if (float32_lt_quiet(f_arg, 0.0, &env->fp_status)) {
+flags |= float_flag_invalid;
+result = 0;
+/* f_real(D[a]) > 2^32 -1  */
+} else if (float32_lt_quiet(0x4f80, f_arg, &env->fp_status)) {
+flags |= float_flag_invalid;
+result = 0x;
+} else {
+flags &= ~float_flag_invalid;
+}
+/* once invalid flag has been set, we cannot set inexact anymore
+   since each FPU operation can only assert ONE flag. (see
+   TriCore ISA Manual Vol. 1 (11-9)) */
+if (!(flags & float_flag_invalid)) {
+if (!float32_eq(f_arg, make_float32(result), &env->fp_status)) {
+flags |= float_flag_inexact;
+} else {
+flags &= ~float_flag_inexact;
+}
+} else {
+flags &= ~float_flag_inexact;
+}
+f_update_psw_flags(env, flags);
+} else {
+env->FPU_FS = 0;
+}
+return result;
+}
diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index 9333e16..467c880 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -112,6 +112,7 @@ DEF_HELPER_3(fdiv, i32, env, i32, i32)
 DEF_HELPER_3(fcmp, i32, env, i32, i32)
 DEF_HELPER_2(ftoi, i32, env, i32)
 DEF_HELPER_2(itof, i32, env, i32)
+DEF_HELPER_2(ftouz, i32, env, i32)
 /* dvinit */
 DEF_HELPER_3(dvinit_b_13, i64, env, i32, i32)
 DEF_HELPER_3(dvinit_b_131, i64, env, i32, i32)
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 9a50df9..27c6d31 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -6698,6 +6698,9 @@ static void decode_rr_divide(CPUTriCoreState *env, 
DisasContext *ctx)
 case OPC2_32_RR_ITOF:
 gen_helper_itof(cpu_gpr_d[r3], cpu_env, cpu_gpr_d[r1]);
 break;
+case OPC2_32_RR_FTOUZ:
+gen_helper_ftouz(cpu_gpr_d[r3], cpu_env, cpu_gpr_d[r1]);
+break;
 default:
 generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC);
 }
-- 
2.10.0

Re: [Qemu-devel] [PATCH] virtio-9p: add reset handler

2016-10-06 Thread Greg Kurz

On Thu, 6 Oct 2016 19:11:03 +0300
"Michael S. Tsirkin"  wrote:

> On Thu, Oct 06, 2016 at 03:12:10PM +0200, Greg Kurz wrote:
> > Virtio devices should implement the VirtIODevice->reset() function to
> > perform necessary cleanup actions and to bring the device to a quiescent
> > state.
> > 
> > In the case of the virtio-9p device, this means:
> > - emptying the list of active PDUs (i.e. draining all in-flight I/O)
> > - freeing all fids (i.e. close open file descriptors and free memory)
> > 
> > That's what this patch does.
> > 
> > The reset handler first waits for all active PDUs to complete. Since
> > completion happens in the QEMU global aio context, we just have to
> > loop around aio_poll() until the active list is empty.
> > 
> > The freeing part involves some actions to be performed on the backend,
> > like closing file descriptors or flushing extended attributes to the
> > underlying filesystem. The virtfs_reset() function already does the
> > job: it calls free_fid() for all open fids not involved in an ongoing
> > I/O operation. We are sure this is the case since we have drained
> > the PDU active list.
> > 
> > The current code implements all backend accesses with coroutines, but we
> > want to stay synchronous on the reset path. We can either change the
> > current code to be able to run when not in coroutine context, or create
> > a coroutine context and wait for virtfs_reset() to complete. This patch
> > goes for the latter because it results in simpler code.
> > 
> > Note that we also need to create a dummy PDU because it is also an API
> > to pass the FsContext pointer to all backend callbacks.
> > 
> > Signed-off-by: Greg Kurz   
> 
> Reviewed-by: Michael S. Tsirkin 
> 

Unless someone objects, this will go through my tree.

> > ---
> >  hw/9pfs/9p.c   |   31 +++
> >  hw/9pfs/9p.h   |1 +
> >  hw/9pfs/virtio-9p-device.c |8 
> >  3 files changed, 40 insertions(+)
> > 
> > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> > index 119ee584969b..42137395037e 100644
> > --- a/hw/9pfs/9p.c
> > +++ b/hw/9pfs/9p.c
> > @@ -3522,6 +3522,37 @@ void v9fs_device_unrealize_common(V9fsState *s, 
> > Error **errp)
> >  g_free(s->tag);
> >  }
> >  
> > +

And I'll remove the empty line :)

> > +typedef struct VirtfsCoResetData {
> > +V9fsPDU pdu;
> > +bool done;
> > +} VirtfsCoResetData;
> > +
> > +static void coroutine_fn virtfs_co_reset(void *opaque)
> > +{
> > +VirtfsCoResetData *data = opaque;
> > +
> > +virtfs_reset(&data->pdu);
> > +data->done = true;
> > +}
> > +
> > +void v9fs_reset(V9fsState *s)
> > +{
> > +VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
> > +Coroutine *co;
> > +
> > +while (!QLIST_EMPTY(&s->active_list)) {
> > +aio_poll(qemu_get_aio_context(), true);
> > +}
> > +
> > +co = qemu_coroutine_create(virtfs_co_reset, &data);
> > +qemu_coroutine_enter(co);
> > +
> > +while (!data.done) {
> > +aio_poll(qemu_get_aio_context(), true);
> > +}
> > +}
> > +
> >  static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
> >  {
> >  struct rlimit rlim;
> > diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> > index d539d2ebe9c0..6b69eaf24614 100644
> > --- a/hw/9pfs/9p.h
> > +++ b/hw/9pfs/9p.h
> > @@ -339,5 +339,6 @@ ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, 
> > const char *fmt, ...);
> >  V9fsPDU *pdu_alloc(V9fsState *s);
> >  void pdu_free(V9fsPDU *pdu);
> >  void pdu_submit(V9fsPDU *pdu);
> > +void v9fs_reset(V9fsState *s);
> >  
> >  #endif
> > diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
> > index 009b43f6d045..b73d72aceb64 100644
> > --- a/hw/9pfs/virtio-9p-device.c
> > +++ b/hw/9pfs/virtio-9p-device.c
> > @@ -130,6 +130,13 @@ static void virtio_9p_device_unrealize(DeviceState 
> > *dev, Error **errp)
> >  v9fs_device_unrealize_common(s, errp);
> >  }
> >  
> > +static void virtio_9p_reset(VirtIODevice *vdev)
> > +{
> > +V9fsVirtioState *v = (V9fsVirtioState *)vdev;
> > +
> > +v9fs_reset(&v->state);
> > +}
> > +
> >  ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset,
> >  const char *fmt, va_list ap)
> >  {
> > @@ -188,6 +195,7 @@ static void virtio_9p_class_init(ObjectClass *klass, 
> > void *data)
> >  vdc->unrealize = virtio_9p_device_unrealize;
> >  vdc->get_features = virtio_9p_get_features;
> >  vdc->get_config = virtio_9p_get_config;
> > +vdc->reset = virtio_9p_reset;
> >  }
> >  
> >  static const TypeInfo virtio_device_info = {

[Qemu-devel] [PATCH 1/4] qga: drop unused sockaddr in accept(2) call

2016-10-06 Thread Stefan Hajnoczi

ga_channel_listen_accept() is currently hard-coded to support only
AF_UNIX because the struct sockaddr_un type is used.  This function
should work with any address family.

Drop the sockaddr since the client address is unused and is an optional
argument to accept(2).

Signed-off-by: Stefan Hajnoczi 
---
 qga/channel-posix.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index bb65d8b..bf32158 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -26,13 +26,10 @@ static gboolean ga_channel_listen_accept(GIOChannel 
*channel,
 GAChannel *c = data;
 int ret, client_fd;
 bool accepted = false;
-struct sockaddr_un addr;
-socklen_t addrlen = sizeof(addr);
 
 g_assert(channel != NULL);
 
-client_fd = qemu_accept(g_io_channel_unix_get_fd(channel),
-(struct sockaddr *)&addr, &addrlen);
+client_fd = qemu_accept(g_io_channel_unix_get_fd(channel), NULL, NULL);
 if (client_fd == -1) {
 g_warning("error converting fd to gsocket: %s", strerror(errno));
 goto out;
-- 
2.7.4

Re: [Qemu-devel] [PULL 00/15] Migration

2016-10-06 Thread Juan Quintela

Peter Maydell  wrote:
> On 6 October 2016 at 17:09, Juan Quintela  wrote:
>> Peter Maydell  wrote:
>>> On 5 October 2016 at 14:44, Juan Quintela  wrote:
 From: Juan Quintela 

 The following changes since commit 
 bbc4c3f4f3c624e2de64fdcb79f4dd8c1a508e9d:

   Merge remote-tracking branch 'remotes/kevin/tags/for-upstream'
 into staging (2016-10-04 14:25:08 +0100)

 are available in the git repository at:

   git://github.com/juanquintela/qemu.git tags/migration/20161005-1

 for you to fetch changes up to c62da143b6792cc32dbd5db15b936d7f58cc36ee:

   docs/xbzrle: correction (2016-10-05 14:28:41 +0200)

 
 migration/next for 20161005

 Hi

 This get:
 - documentation fix: Cao
 - improve max-bandwidth and downtime-limit (Ashijeet)
 - move migration commands to "boxed" (Eric)
 - rdma fixes (David)
 - postcopy fixes (David)
 - better errors (David)

 Please apply
>>>
>>> This runs into the "OSX %zu vs SIZE_MAX" issue again:
>>
>> G.
>> I already have trouble with %PR vs %zu for i386 vs anything else.
>>
>> Anyone has a linux crosscompiler for osx that I can use?  Or there is a
>> way to get darwin or whatever virtual machine to test compile this?
>
> Travis builds include OSX, if you're prepared to wait around
> for them to complete before submitting (they're usually about 10
> hours wall-clock time to complete a build).

And I was complaining that each of my full builds after a rebase can
take around 45mins . O:-)  They are all local.

>
>> It gets frustrating, before submissing I do:
>> - make check
>> - avocado virt test
>> - full compile on linux x86_64 (64 bits)
>> - full compile on linux i386 (32 bits)
>> - full cross-compile for windows 64bit
>> - full cross-compile for windows 32bit
>>
>> (ok, I only test linux 64 bit), and I still don't get all the errors.
>
> This is why I do OSX builds in my merge tests, they do pick
> up some stuff other builds don't.
>
> thanks
> -- PMM

Later, Juan.

[Qemu-devel] [PATCH 0/5] Added 7 instructions to the tricore target

2016-10-06 Thread Bastian Koppelmann

Hi,

this series was originally posted by Peer Adelt some time ago[1], but still had 
some problems 
which I tried to fix with this patch set.

The tricky bits here were the FTOUZ and MADD/MSUB.F instructions. The latter 
had the 
problem of not giving back the correct NAN when the result of the add/sub of 
muladd/sub
was invalid. I addressed that by fixing up the value later, which feels hacky. 
I feel
the better solution would be extending softfloat to recognize ADD_NAN's and 
emitting
the correct NAN there. On the other hand it's a change in softfloat for a small 
edge case.

Additionally this patch set adds the UPDFL instructions.

Cheers,
Bastian

[1] http://lists.nongnu.org/archive/html/qemu-devel/2016-06/msg01936.html

Bastian Koppelmann (3):
  target-tricore: Added FTOUZ instruction
  target-tricore: Added MADD.F and MSUB.F instructions
  target-tricore: Add updfl instruction

Peer Adelt (2):
  target-tricore: Added new MOV instruction variant
  target-tricore: Added new JNE instruction variant

 target-tricore/fpu_helper.c  | 147 ++-
 target-tricore/helper.h  |   4 ++
 target-tricore/translate.c   |  47 +
 target-tricore/tricore-opcodes.h |   3 +
 4 files changed, 200 insertions(+), 1 deletion(-)

-- 
2.10.0

Re: [Qemu-devel] [PULL 00/15] Migration

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 17:23, Juan Quintela  wrote:
> Peter Maydell  wrote:
>> Travis builds include OSX, if you're prepared to wait around
>> for them to complete before submitting (they're usually about 10
>> hours wall-clock time to complete a build).
>
> And I was complaining that each of my full builds after a rebase can
> take around 45mins . O:-)  They are all local.

None of the individual travis jobs take very long, I think it
just throttles our access to infrastructure and refuses to
parallelize us very much.

thanks
-- PMM

[Qemu-devel] [PATCH 4/5] target-tricore: Added new JNE instruction variant

2016-10-06 Thread Bastian Koppelmann

From: Peer Adelt 

If D[15] is != sign_ext(const4) then PC will be set to (PC +
zero_ext(disp4 + 16)).

[BK: fixed style errors]
Signed-off-by: Peer Adelt 
Message-Id: <1465314555-11501-5-git-send-email-peer.ad...@c-lab.de>
---
 target-tricore/translate.c   | 18 ++
 target-tricore/tricore-opcodes.h |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 4fe8a5f..be9b6b4 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -3362,9 +3362,17 @@ static void gen_compute_branch(DisasContext *ctx, 
uint32_t opc, int r1,
 case OPC1_16_SBC_JEQ:
 gen_branch_condi(ctx, TCG_COND_EQ, cpu_gpr_d[15], constant, offset);
 break;
+case OPC1_16_SBC_JEQ2:
+gen_branch_condi(ctx, TCG_COND_EQ, cpu_gpr_d[15], constant,
+ offset + 16);
+break;
 case OPC1_16_SBC_JNE:
 gen_branch_condi(ctx, TCG_COND_NE, cpu_gpr_d[15], constant, offset);
 break;
+case OPC1_16_SBC_JNE2:
+gen_branch_condi(ctx, TCG_COND_NE, cpu_gpr_d[15],
+ constant, offset + 16);
+break;
 /* SBRN-format jumps */
 case OPC1_16_SBRN_JZ_T:
 temp = tcg_temp_new();
@@ -4097,6 +4105,16 @@ static void decode_16Bit_opc(CPUTriCoreState *env, 
DisasContext *ctx)
 const16 = MASK_OP_SBC_CONST4_SEXT(ctx->opcode);
 gen_compute_branch(ctx, op1, 0, 0, const16, address);
 break;
+case OPC1_16_SBC_JEQ2:
+case OPC1_16_SBC_JNE2:
+if (tricore_feature(env, TRICORE_FEATURE_16)) {
+address = MASK_OP_SBC_DISP4(ctx->opcode);
+const16 = MASK_OP_SBC_CONST4_SEXT(ctx->opcode);
+gen_compute_branch(ctx, op1, 0, 0, const16, address);
+} else {
+generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC);
+}
+break;
 /* SBRN-format */
 case OPC1_16_SBRN_JNZ_T:
 case OPC1_16_SBRN_JZ_T:
diff --git a/target-tricore/tricore-opcodes.h b/target-tricore/tricore-opcodes.h
index 78ba338..08394b8 100644
--- a/target-tricore/tricore-opcodes.h
+++ b/target-tricore/tricore-opcodes.h
@@ -311,6 +311,7 @@ enum {
 OPC1_16_SRR_EQ   = 0x3a,
 OPC1_16_SB_J = 0x3c,
 OPC1_16_SBC_JEQ  = 0x1e,
+OPC1_16_SBC_JEQ2 = 0x9e,
 OPC1_16_SBR_JEQ  = 0x3e,
 OPC1_16_SBR_JGEZ = 0xce,
 OPC1_16_SBR_JGTZ = 0x4e,
@@ -318,6 +319,7 @@ enum {
 OPC1_16_SBR_JLEZ = 0x8e,
 OPC1_16_SBR_JLTZ = 0x0e,
 OPC1_16_SBC_JNE  = 0x5e,
+OPC1_16_SBC_JNE2 = 0xde,
 OPC1_16_SBR_JNE  = 0x7e,
 OPC1_16_SB_JNZ   = 0xee,
 OPC1_16_SBR_JNZ  = 0xf6,
-- 
2.10.0

Re: [Qemu-devel] [PATCH 05/10] qemu-tech: document lazy condition code evaluation in cpu.h

2016-10-06 Thread Paolo Bonzini



On 06/10/2016 18:18, Emilio G. Cota wrote:
> On Thu, Oct 06, 2016 at 17:24:18 +0200, Paolo Bonzini wrote:
>> Unlike the other sections, they are pretty specific to a particular CPU.
>>
>> Signed-off-by: Paolo Bonzini 
>> ---
>>  qemu-tech.texi | 25 -
>>  target-cris/cpu.h  |  7 +++
>>  target-i386/cpu.h  |  7 +++
>>  target-m68k/cpu.h  |  8 
>>  target-sparc/cpu.h |  5 +
>>  5 files changed, 27 insertions(+), 25 deletions(-)
> (snip)
>> diff --git a/target-cris/cpu.h b/target-cris/cpu.h
>> index 7d7fe6e..6d3de56 100644
>> --- a/target-cris/cpu.h
>> +++ b/target-cris/cpu.h
>> @@ -223,6 +223,13 @@ int cpu_cris_signal_handler(int host_signum, void 
>> *pinfo,
>>  void cris_initialize_tcg(void);
>>  void cris_initialize_crisv10_tcg(void);
>>  
>> +/* Instead of computing the condition codes after each x86 instruction,
>> + * QEMU just stores one operand (called CC_SRC), the result
>> + * (called CC_DST) and the type of operation (called CC_OP). When the
>> + * condition codes are needed, the condition codes can be calculated
>> + * using this information. Condition codes are not generated if they
>> + * are only needed for conditional branches.
>> + */
> 
> This text doesn't seem to be cris-specific, e.g.:
> - "each x86 instruction"
> - CC_SRC (git grep CC_SRC here doesn't return anything)
> - CC_DST (ditto)

Yeah, it's cc_src and cc_dest.  The uppercase is a relic of dyngen
(pre-TCG).  Same for m68k.

Paolo

> 
>>  enum {
>>  CC_OP_DYNAMIC, /* Use env->cc_op  */
>>  CC_OP_FLAGS,
>> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
>> index 6d028aa..f606f15 100644
>> --- a/target-i386/cpu.h
>> +++ b/target-i386/cpu.h
>> @@ -698,6 +698,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
>>  /* Use a clearer name for this.  */
>>  #define CPU_INTERRUPT_INIT  CPU_INTERRUPT_RESET
>>  
>> +/* Instead of computing the condition codes after each x86 instruction,
>> + * QEMU just stores one operand (called CC_SRC), the result
>> + * (called CC_DST) and the type of operation (called CC_OP). When the
>> + * condition codes are needed, the condition codes can be calculated
>> + * using this information. Condition codes are not generated if they
>> + * are only needed for conditional branches.
>> + */
>>  typedef enum {
>>  CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
>>  CC_OP_EFLAGS,  /* all cc are explicitly computed, CC_SRC = flags */
>> diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
>> index c2d40cb..ccc7157 100644
>> --- a/target-m68k/cpu.h
>> +++ b/target-m68k/cpu.h
>> @@ -154,6 +154,14 @@ int cpu_m68k_signal_handler(int host_signum, void 
>> *pinfo,
>> void *puc);
>>  void cpu_m68k_flush_flags(CPUM68KState *, int);
>>  
>> +
>> +/* Instead of computing the condition codes after each x86 instruction,
>> + * QEMU just stores one operand (called CC_SRC), the result
>> + * (called CC_DST) and the type of operation (called CC_OP). When the
>> + * condition codes are needed, the condition codes can be calculated
>> + * using this information. Condition codes are not generated if they
>> + * are only needed for conditional branches.
>> + */
> 
> Same as above:
> - "each x86 instruction"
> - no CC_DST
> 
>   Emilio
>

Re: [Qemu-devel] [PATCH 0/3] preliminaries for GICv3 virt support

2016-10-06 Thread Edgar E. Iglesias

On Thu, Oct 06, 2016 at 02:21:04PM +0100, Peter Maydell wrote:
> This set of three straightforward patches is a preliminary
> for adding virtualization support to the GICv3 emulation:
>  * add a (nop implementation of) MDCCINT_EL1, since KVM
>will read/write it on worldswitch
>  * fix some bugs in the GICv3 trace events
>  * add trace events for the generic timers
>(which I have been using for debugging)
> 
> I actually have almost all of the GICv3 virt code written,
> but it currently has bugs which mean that a guest kernel
> under KVM won't boot. Debugging in progress...


That is very cool, we could soon enable EL2 :-)

What kind of issues are you seeing?

FWIW with our out of tree GICv2 virt models we've got issues
with SMP Xen were things go nuts some times with virtual
timer interrupts. Some times they take for ever to hit,
like if we loose events.

Last time I looked at it, I noticed that our GICv2 virt
implementation of the APR regs and EOIR stuff seems totally
bogus (my bad).

Cheers,
Edgar


> 
> thanks
> -- PMM
> 
> Peter Maydell (3):
>   target-arm: Implement dummy MDCCINT_EL1
>   target-arm: Add trace events for the generic timers
>   hw/intc/arm_gicv3: Fix ICC register tracepoints
> 
>  Makefile.objs |  1 +
>  hw/intc/arm_gicv3_cpuif.c | 23 +++
>  hw/intc/trace-events  | 14 +++---
>  target-arm/helper.c   | 28 
>  4 files changed, 47 insertions(+), 19 deletions(-)
> 
> -- 
> 2.7.4
>

[Qemu-devel] [PATCH 3/5] target-tricore: Added new MOV instruction variant

2016-10-06 Thread Bastian Koppelmann

From: Peer Adelt 

Puts the content of data register D[a] into E[c][63:32] and the
content of data register D[b] into E[c][31:0].

[BK: fix style error]
Signed-off-by: Peer Adelt 
Message-Id: <1465314555-11501-4-git-send-email-peer.ad...@c-lab.de>
---
 target-tricore/translate.c   | 15 +++
 target-tricore/tricore-opcodes.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 3fec353..4fe8a5f 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -6034,11 +6034,15 @@ static void decode_rr_accumulator(CPUTriCoreState *env, 
DisasContext *ctx)
 uint32_t op2;
 int r3, r2, r1;
 
+TCGv temp;
+
 r3 = MASK_OP_RR_D(ctx->opcode);
 r2 = MASK_OP_RR_S2(ctx->opcode);
 r1 = MASK_OP_RR_S1(ctx->opcode);
 op2 = MASK_OP_RR_OP2(ctx->opcode);
 
+temp = tcg_temp_new();
+
 switch (op2) {
 case OPC2_32_RR_ABS:
 gen_abs(cpu_gpr_d[r3], cpu_gpr_d[r2]);
@@ -6224,6 +6228,16 @@ static void decode_rr_accumulator(CPUTriCoreState *env, 
DisasContext *ctx)
 case OPC2_32_RR_MOV:
 tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r2]);
 break;
+case OPC2_32_RR_MOV_64:
+if (tricore_feature(env, TRICORE_FEATURE_16)) {
+CHECK_REG_PAIR(r3);
+tcg_gen_mov_tl(temp, cpu_gpr_d[r1]);
+tcg_gen_mov_tl(cpu_gpr_d[r3], cpu_gpr_d[r2]);
+tcg_gen_mov_tl(cpu_gpr_d[r3 + 1], temp);
+} else {
+generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC);
+}
+break;
 case OPC2_32_RR_NE:
 tcg_gen_setcond_tl(TCG_COND_NE, cpu_gpr_d[r3], cpu_gpr_d[r1],
cpu_gpr_d[r2]);
@@ -6344,6 +6358,7 @@ static void decode_rr_accumulator(CPUTriCoreState *env, 
DisasContext *ctx)
 default:
 generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC);
 }
+tcg_temp_free(temp);
 }
 
 static void decode_rr_logical_shift(CPUTriCoreState *env, DisasContext *ctx)
diff --git a/target-tricore/tricore-opcodes.h b/target-tricore/tricore-opcodes.h
index df666b0..78ba338 100644
--- a/target-tricore/tricore-opcodes.h
+++ b/target-tricore/tricore-opcodes.h
@@ -1062,6 +1062,7 @@ enum {
 OPC2_32_RR_MIN_H = 0x78,
 OPC2_32_RR_MIN_HU= 0x79,
 OPC2_32_RR_MOV   = 0x1f,
+OPC2_32_RR_MOV_64= 0x81,
 OPC2_32_RR_NE= 0x11,
 OPC2_32_RR_OR_EQ = 0x27,
 OPC2_32_RR_OR_GE = 0x2b,
-- 
2.10.0

Re: [Qemu-devel] [PULL 00/15] Migration

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 17:09, Juan Quintela  wrote:
> Peter Maydell  wrote:
>> On 5 October 2016 at 14:44, Juan Quintela  wrote:
>>> From: Juan Quintela 
>>>
>>> The following changes since commit bbc4c3f4f3c624e2de64fdcb79f4dd8c1a508e9d:
>>>
>>>   Merge remote-tracking branch 'remotes/kevin/tags/for-upstream'
>>> into staging (2016-10-04 14:25:08 +0100)
>>>
>>> are available in the git repository at:
>>>
>>>   git://github.com/juanquintela/qemu.git tags/migration/20161005-1
>>>
>>> for you to fetch changes up to c62da143b6792cc32dbd5db15b936d7f58cc36ee:
>>>
>>>   docs/xbzrle: correction (2016-10-05 14:28:41 +0200)
>>>
>>> 
>>> migration/next for 20161005
>>>
>>> Hi
>>>
>>> This get:
>>> - documentation fix: Cao
>>> - improve max-bandwidth and downtime-limit (Ashijeet)
>>> - move migration commands to "boxed" (Eric)
>>> - rdma fixes (David)
>>> - postcopy fixes (David)
>>> - better errors (David)
>>>
>>> Please apply
>>
>> This runs into the "OSX %zu vs SIZE_MAX" issue again:
>
> G.
> I already have trouble with %PR vs %zu for i386 vs anything else.
>
> Anyone has a linux crosscompiler for osx that I can use?  Or there is a
> way to get darwin or whatever virtual machine to test compile this?

Travis builds include OSX, if you're prepared to wait around
for them to complete before submitting (they're usually about 10
hours wall-clock time to complete a build).

> It gets frustrating, before submissing I do:
> - make check
> - avocado virt test
> - full compile on linux x86_64 (64 bits)
> - full compile on linux i386 (32 bits)
> - full cross-compile for windows 64bit
> - full cross-compile for windows 32bit
>
> (ok, I only test linux 64 bit), and I still don't get all the errors.

This is why I do OSX builds in my merge tests, they do pick
up some stuff other builds don't.

thanks
-- PMM

Re: [Qemu-devel] [PATCH v4 12/17] vfio/platform: fix a wrong returned value in vfio_populate_device

2016-10-06 Thread Auger Eric

Hi,

On 04/10/2016 14:48, Markus Armbruster wrote:
> Eric Auger  writes:
> 
>> In case the vfio_init_intp fails we currently do not return an
>> error value. This patch fixes the bug. The returned value is not
>> explicit but in practice the error object is the one used to
>> report the error to the end-user and the actual returned error
>> value is not used.
> 
> The function's contract permits this by neglecting to say anything about
> the return value %-)
Yes the error is severe enough to tear things down. I dared to keep it
as is since the error now is reported in the Error object.

Thanks

Eric
> 
> The callers don't actually care about the value.
> 
>> Signed-off-by: Eric Auger 
>> ---
>>  hw/vfio/platform.c | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
>> index 1a35da0..484e31f 100644
>> --- a/hw/vfio/platform.c
>> +++ b/hw/vfio/platform.c
>> @@ -508,6 +508,7 @@ static int vfio_populate_device(VFIODevice *vbasedev, 
>> Error **errp)
>>  irq.flags);
>>  intp = vfio_init_intp(vbasedev, irq, errp);
>>  if (!intp) {
>> +ret = -1;
>>  goto irq_err;
>>  }
>>  }
>

Re: [Qemu-devel] [PATCH 05/10] qemu-tech: document lazy condition code evaluation in cpu.h

2016-10-06 Thread Emilio G. Cota

On Thu, Oct 06, 2016 at 17:24:18 +0200, Paolo Bonzini wrote:
> Unlike the other sections, they are pretty specific to a particular CPU.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  qemu-tech.texi | 25 -
>  target-cris/cpu.h  |  7 +++
>  target-i386/cpu.h  |  7 +++
>  target-m68k/cpu.h  |  8 
>  target-sparc/cpu.h |  5 +
>  5 files changed, 27 insertions(+), 25 deletions(-)
(snip)
> diff --git a/target-cris/cpu.h b/target-cris/cpu.h
> index 7d7fe6e..6d3de56 100644
> --- a/target-cris/cpu.h
> +++ b/target-cris/cpu.h
> @@ -223,6 +223,13 @@ int cpu_cris_signal_handler(int host_signum, void *pinfo,
>  void cris_initialize_tcg(void);
>  void cris_initialize_crisv10_tcg(void);
>  
> +/* Instead of computing the condition codes after each x86 instruction,
> + * QEMU just stores one operand (called CC_SRC), the result
> + * (called CC_DST) and the type of operation (called CC_OP). When the
> + * condition codes are needed, the condition codes can be calculated
> + * using this information. Condition codes are not generated if they
> + * are only needed for conditional branches.
> + */

This text doesn't seem to be cris-specific, e.g.:
- "each x86 instruction"
- CC_SRC (git grep CC_SRC here doesn't return anything)
- CC_DST (ditto)

>  enum {
>  CC_OP_DYNAMIC, /* Use env->cc_op  */
>  CC_OP_FLAGS,
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 6d028aa..f606f15 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -698,6 +698,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
>  /* Use a clearer name for this.  */
>  #define CPU_INTERRUPT_INIT  CPU_INTERRUPT_RESET
>  
> +/* Instead of computing the condition codes after each x86 instruction,
> + * QEMU just stores one operand (called CC_SRC), the result
> + * (called CC_DST) and the type of operation (called CC_OP). When the
> + * condition codes are needed, the condition codes can be calculated
> + * using this information. Condition codes are not generated if they
> + * are only needed for conditional branches.
> + */
>  typedef enum {
>  CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
>  CC_OP_EFLAGS,  /* all cc are explicitly computed, CC_SRC = flags */
> diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
> index c2d40cb..ccc7157 100644
> --- a/target-m68k/cpu.h
> +++ b/target-m68k/cpu.h
> @@ -154,6 +154,14 @@ int cpu_m68k_signal_handler(int host_signum, void *pinfo,
> void *puc);
>  void cpu_m68k_flush_flags(CPUM68KState *, int);
>  
> +
> +/* Instead of computing the condition codes after each x86 instruction,
> + * QEMU just stores one operand (called CC_SRC), the result
> + * (called CC_DST) and the type of operation (called CC_OP). When the
> + * condition codes are needed, the condition codes can be calculated
> + * using this information. Condition codes are not generated if they
> + * are only needed for conditional branches.
> + */

Same as above:
- "each x86 instruction"
- no CC_DST

Emilio

[Qemu-devel] [PATCH 4/4] qga: add vsock-listen method

2016-10-06 Thread Stefan Hajnoczi

Add AF_VSOCK (virtio-vsock) support as an alternative to virtio-serial.

  $ qemu-system-x86_64 -device vhost-vsock-pci,guest-cid=3 ...
  (guest)# qemu-ga -m vsock-listen -p 3:1234

Signed-off-by: Stefan Hajnoczi 
---
 qga/channel-posix.c | 25 +
 qga/channel.h   |  1 +
 qga/main.c  |  6 --
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index 579891d..71582e0 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -193,6 +193,31 @@ static gboolean ga_channel_open(GAChannel *c, const gchar 
*path, GAChannelMethod
 ga_channel_listen_add(c, fd, true);
 break;
 }
+case GA_CHANNEL_VSOCK_LISTEN: {
+Error *local_err = NULL;
+SocketAddress *addr;
+char *addr_str;
+int fd;
+
+addr_str = g_strdup_printf("vsock:%s", path);
+addr = socket_parse(addr_str, &local_err);
+g_free(addr_str);
+if (local_err != NULL) {
+g_critical("%s", error_get_pretty(local_err));
+error_free(local_err);
+return false;
+}
+
+fd = socket_listen(addr, &local_err);
+qapi_free_SocketAddress(addr);
+if (local_err != NULL) {
+g_critical("%s", error_get_pretty(local_err));
+error_free(local_err);
+return false;
+}
+ga_channel_listen_add(c, fd, true);
+break;
+}
 default:
 g_critical("error binding/listening to specified socket");
 return false;
diff --git a/qga/channel.h b/qga/channel.h
index ae8cf0f..8fd0c8f 100644
--- a/qga/channel.h
+++ b/qga/channel.h
@@ -19,6 +19,7 @@ typedef enum {
 GA_CHANNEL_VIRTIO_SERIAL,
 GA_CHANNEL_ISA_SERIAL,
 GA_CHANNEL_UNIX_LISTEN,
+GA_CHANNEL_VSOCK_LISTEN,
 } GAChannelMethod;
 
 typedef gboolean (*GAChannelCallback)(GIOCondition condition, gpointer opaque);
diff --git a/qga/main.c b/qga/main.c
index 0b9d04e..6caf215 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -190,8 +190,8 @@ static void usage(const char *cmd)
 "Usage: %s [-m  -p ] []\n"
 "QEMU Guest Agent %s\n"
 "\n"
-"  -m, --method  transport method: one of unix-listen, virtio-serial, or\n"
-"isa-serial (virtio-serial is the default)\n"
+"  -m, --method  transport method: one of unix-listen, virtio-serial,\n"
+"isa-serial, or vsock-listen (virtio-serial is the 
default)\n"
 "  -p, --pathdevice/socket path (the default for virtio-serial is:\n"
 "%s,\n"
 "the default for isa-serial is:\n"
@@ -659,6 +659,8 @@ static gboolean channel_init(GAState *s, const gchar 
*method, const gchar *path)
 channel_method = GA_CHANNEL_ISA_SERIAL;
 } else if (strcmp(method, "unix-listen") == 0) {
 channel_method = GA_CHANNEL_UNIX_LISTEN;
+} else if (strcmp(method, "vsock-listen") == 0) {
+channel_method = GA_CHANNEL_VSOCK_LISTEN;
 } else {
 g_critical("unsupported channel method/type: %s", method);
 return false;
-- 
2.7.4

Re: [Qemu-devel] [PATCH v4 14/17] vfio/pci: Conversion to realize

2016-10-06 Thread Auger Eric

Hi Markus

On 04/10/2016 14:58, Markus Armbruster wrote:
> Eric Auger  writes:
> 
>> This patch converts VFIO PCI to realize function.
>>
>> Also original initfn errors now are propagated using QEMU
>> error objects. All errors are formatted with the same pattern:
>> "vfio: %s: the error description"
>>
>> Signed-off-by: Eric Auger 
>>
>> ---
>> v2 -> v3:
>> - use errp directly in all cases
>>
>> v1 -> v2:
>> - correct error_setg_errno with positive error values
>> ---
>>  hw/vfio/pci.c| 68 
>> ++--
>>  hw/vfio/trace-events |  2 +-
>>  2 files changed, 29 insertions(+), 41 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 40ff4a7..b316e13 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -2513,13 +2513,12 @@ static void 
>> vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>  vdev->req_enabled = false;
>>  }
>>  
>> -static int vfio_initfn(PCIDevice *pdev)
>> +static void vfio_realize(PCIDevice *pdev, Error **errp)
>>  {
>>  VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>>  VFIODevice *vbasedev_iter;
>>  VFIOGroup *group;
>>  char *tmp, group_path[PATH_MAX], *group_name;
>> -Error *err = NULL;
>>  ssize_t len;
>>  struct stat st;
>>  int groupid;
>> @@ -2533,9 +2532,9 @@ static int vfio_initfn(PCIDevice *pdev)
>>  }
>>  
>>  if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
>> -error_setg_errno(&err, errno, "no such host device");
>> -ret = -errno;
>> -goto error;
>> +error_setg_errno(errp, errno, "no such host device");
>> +error_prepend(errp, ERR_PREFIX, vdev->vbasedev.sysfsdev);
>> +return;
>>  }
>>  
>>  vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev));
>> @@ -2547,8 +2546,8 @@ static int vfio_initfn(PCIDevice *pdev)
>>  g_free(tmp);
>>  
>>  if (len <= 0 || len >= sizeof(group_path)) {
>> -ret = len < 0 ? -errno : -ENAMETOOLONG;
>> -error_setg_errno(&err, -ret, "no iommu_group found");
>> +error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
>> + "no iommu_group found");
>>  goto error;
>>  }
>>  
>> @@ -2556,36 +2555,33 @@ static int vfio_initfn(PCIDevice *pdev)
>>  
>>  group_name = basename(group_path);
>>  if (sscanf(group_name, "%d", &groupid) != 1) {
>> -error_setg_errno(&err, errno, "failed to read %s", group_path);
>> -ret = -errno;
>> +error_setg_errno(errp, errno, "failed to read %s", group_path);
>>  goto error;
>>  }
>>  
>> -trace_vfio_initfn(vdev->vbasedev.name, groupid);
>> +trace_vfio_realize(vdev->vbasedev.name, groupid);
>>  
>> -group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), 
>> &err);
>> +group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), 
>> errp);
>>  if (!group) {
>> -ret = -ENOENT;
>>  goto error;
>>  }
>>  
>>  QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>>  if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
>> -error_setg(&err, "device is already attached");
>> +error_setg(errp, "device is already attached");
>>  vfio_put_group(group);
>> -ret = -EBUSY;
>>  goto error;
>>  }
>>  }
>>  
>> -ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, 
>> &err);
>> +ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, 
>> errp);
>>  if (ret) {
>>  vfio_put_group(group);
>>  goto error;
>>  }
>>  
>> -ret = vfio_populate_device(vdev, &err);
>> -if (err) {
>> +ret = vfio_populate_device(vdev, errp);
>> +if (ret) {
> 
> The if (err) comes from PATCH 03.  You could reduce churn by checking
> ret from the start.  More of the same below.  Your choice.

I removed this spurious flip for vfio_populate_device and
vfio_msix_early_setup

Thanks

Eric
> 
>>  goto error;
>>  }
>>  
> [...]
>

Re: [Qemu-devel] [PATCH v2 1/2] linux-user: added support for preadv() system call.

2016-10-06 Thread Peter Maydell

On 6 October 2016 at 16:49, Dejan Jovicevic  wrote:
> v1 -> v2:
> - Using safe_preadv() instead of calling preadv() directly.
>
> This system call performs the same task as the readv system call,
> with the exception of having the fourth argument, offset, which
> specifes the file offset at which the input operation is to be performed.
>
> This implementation is based on the existing readv implementation.
>
> Signed-off-by: Dejan Jovicevic 
> ---
>  linux-user/syscall.c | 15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 0815f30..c7619f6 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -908,6 +908,8 @@ safe_syscall2(int, tkill, int, tid, int, sig)
>  safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig)
>  safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, 
> iovcnt)
>  safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, 
> iovcnt)
> +safe_syscall4(ssize_t, preadv, int, fd, const struct iovec *, iov, int, 
> iovcnt,
> +  off_t, offset)
>  safe_syscall3(int, connect, int, fd, const struct sockaddr *, addr,
>socklen_t, addrlen)
>  safe_syscall6(ssize_t, sendto, int, fd, const void *, buf, size_t, len,
> @@ -9894,6 +9896,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
> arg1,
>  }
>  }
>  break;
> +#if defined(TARGET_NR_preadv)
> +case TARGET_NR_preadv:
> +{
> +struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0);
> +if (vec != NULL) {
> +ret = get_errno(safe_preadv(arg1, vec, arg3, arg4));
> +unlock_iovec(vec, arg2, arg3, 1);
> +} else {
> +ret = -host_to_target_errno(errno);
> +   }
> +}
> +break;
> +#endif

Looking at the kernel implementation I think this is not quite right
(sorry for not checking the first time around).
preadv is a 5-argument syscall:
SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)

and the 64-bit offset is obtained by combining the pos_l and
pos_h arguments via pos_from_hilo():
http://lxr.free-electrons.com/source/fs/read_write.c#L927

So we need to handle 5 arguments in the inputs from the guest,
and we need to pass 5 arguments to the host syscall. (Watch out
for the case where guest and host have different ideas of
the size of long, when converting the input pos_l/pos_h
to the host pos_l/pos_h.)

Similarly for pwritev.

thanks
-- PMM

Re: [Qemu-devel] [PATCH 09/10] qemu-tech: rewrite some parts

2016-10-06 Thread Emilio G. Cota

On Thu, Oct 06, 2016 at 17:24:22 +0200, Paolo Bonzini wrote:
> Drop most the device emulation part and merge the rest into the description
> of the MMU.  Make some bits more up-to-date.
> 
> Signed-off-by: Paolo Bonzini 
(snip)
>  The host SIGSEGV and SIGBUS signal handlers are used to get invalid
> -memory accesses. The simulated program counter is found by
> -retranslating the corresponding basic block and by looking where the
> -host program counter was at the exception point.
> -
> -The virtual CPU cannot retrieve the exact @code{EFLAGS} register because
> -in some cases it is not computed because of condition code
> -optimisations. It is not a big concern because the emulated code can
> -still be restarted in any cases.
> -
> -@node MMU emulation
> -@section MMU emulation
> -
> -For system emulation QEMU supports a soft MMU. In that mode, the MMU
> +memory accesses. QEMU keeps a map that host program counter to
> +target program counter, and looks up where the exception happened
> +based on the host program counter at the exception point.

I had to read "keeps a map that host program to target program counter"
several times; that "that" confused me.
Perhaps "keeps a map of host-to-target program counters" would
be clearer?

> +On some targets, some bits of the virtual CPU's state are not flushed to the
> +memory until the end of the translation block.  This is done for internal

"flushed to memory" sounds better to me than "flushed to the memory".

Emilio

Re: [Qemu-devel] [PATCH] virtio-9p: add reset handler

2016-10-06 Thread Michael S. Tsirkin

On Thu, Oct 06, 2016 at 03:12:10PM +0200, Greg Kurz wrote:
> Virtio devices should implement the VirtIODevice->reset() function to
> perform necessary cleanup actions and to bring the device to a quiescent
> state.
> 
> In the case of the virtio-9p device, this means:
> - emptying the list of active PDUs (i.e. draining all in-flight I/O)
> - freeing all fids (i.e. close open file descriptors and free memory)
> 
> That's what this patch does.
> 
> The reset handler first waits for all active PDUs to complete. Since
> completion happens in the QEMU global aio context, we just have to
> loop around aio_poll() until the active list is empty.
> 
> The freeing part involves some actions to be performed on the backend,
> like closing file descriptors or flushing extended attributes to the
> underlying filesystem. The virtfs_reset() function already does the
> job: it calls free_fid() for all open fids not involved in an ongoing
> I/O operation. We are sure this is the case since we have drained
> the PDU active list.
> 
> The current code implements all backend accesses with coroutines, but we
> want to stay synchronous on the reset path. We can either change the
> current code to be able to run when not in coroutine context, or create
> a coroutine context and wait for virtfs_reset() to complete. This patch
> goes for the latter because it results in simpler code.
> 
> Note that we also need to create a dummy PDU because it is also an API
> to pass the FsContext pointer to all backend callbacks.
> 
> Signed-off-by: Greg Kurz 

Reviewed-by: Michael S. Tsirkin 

> ---
>  hw/9pfs/9p.c   |   31 +++
>  hw/9pfs/9p.h   |1 +
>  hw/9pfs/virtio-9p-device.c |8 
>  3 files changed, 40 insertions(+)
> 
> diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
> index 119ee584969b..42137395037e 100644
> --- a/hw/9pfs/9p.c
> +++ b/hw/9pfs/9p.c
> @@ -3522,6 +3522,37 @@ void v9fs_device_unrealize_common(V9fsState *s, Error 
> **errp)
>  g_free(s->tag);
>  }
>  
> +
> +typedef struct VirtfsCoResetData {
> +V9fsPDU pdu;
> +bool done;
> +} VirtfsCoResetData;
> +
> +static void coroutine_fn virtfs_co_reset(void *opaque)
> +{
> +VirtfsCoResetData *data = opaque;
> +
> +virtfs_reset(&data->pdu);
> +data->done = true;
> +}
> +
> +void v9fs_reset(V9fsState *s)
> +{
> +VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
> +Coroutine *co;
> +
> +while (!QLIST_EMPTY(&s->active_list)) {
> +aio_poll(qemu_get_aio_context(), true);
> +}
> +
> +co = qemu_coroutine_create(virtfs_co_reset, &data);
> +qemu_coroutine_enter(co);
> +
> +while (!data.done) {
> +aio_poll(qemu_get_aio_context(), true);
> +}
> +}
> +
>  static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
>  {
>  struct rlimit rlim;
> diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> index d539d2ebe9c0..6b69eaf24614 100644
> --- a/hw/9pfs/9p.h
> +++ b/hw/9pfs/9p.h
> @@ -339,5 +339,6 @@ ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const 
> char *fmt, ...);
>  V9fsPDU *pdu_alloc(V9fsState *s);
>  void pdu_free(V9fsPDU *pdu);
>  void pdu_submit(V9fsPDU *pdu);
> +void v9fs_reset(V9fsState *s);
>  
>  #endif
> diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
> index 009b43f6d045..b73d72aceb64 100644
> --- a/hw/9pfs/virtio-9p-device.c
> +++ b/hw/9pfs/virtio-9p-device.c
> @@ -130,6 +130,13 @@ static void virtio_9p_device_unrealize(DeviceState *dev, 
> Error **errp)
>  v9fs_device_unrealize_common(s, errp);
>  }
>  
> +static void virtio_9p_reset(VirtIODevice *vdev)
> +{
> +V9fsVirtioState *v = (V9fsVirtioState *)vdev;
> +
> +v9fs_reset(&v->state);
> +}
> +
>  ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset,
>  const char *fmt, va_list ap)
>  {
> @@ -188,6 +195,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void 
> *data)
>  vdc->unrealize = virtio_9p_device_unrealize;
>  vdc->get_features = virtio_9p_get_features;
>  vdc->get_config = virtio_9p_get_config;
> +vdc->reset = virtio_9p_reset;
>  }
>  
>  static const TypeInfo virtio_device_info = {

[Qemu-devel] [PATCH 2/4] qga: drop unnecessary GA_CHANNEL_UNIX_LISTEN checks

2016-10-06 Thread Stefan Hajnoczi

Throughout the code there are c->listen_channel checks which manage the
listen socket file descriptor (waiting for accept(2), closing the file
descriptor, etc).  These checks are currently preceded by explicit
c->method == GA_CHANNEL_UNIX_LISTEN checks.

Explicit GA_CHANNEL_UNIX_LISTEN checks are not necessary since serial
channel types do not create the listen channel (c->listen_channel).

As more listen channel types are added, explicitly checking all of them
becomes messy.  Rely on c->listen_channel to determine whether or not a
listen socket file descriptor is used.

Signed-off-by: Stefan Hajnoczi 
---
 qga/channel-posix.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index bf32158..579891d 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -61,7 +61,6 @@ static void ga_channel_listen_add(GAChannel *c, int 
listen_fd, bool create)
 
 static void ga_channel_listen_close(GAChannel *c)
 {
-g_assert(c->method == GA_CHANNEL_UNIX_LISTEN);
 g_assert(c->listen_channel);
 g_io_channel_shutdown(c->listen_channel, true, NULL);
 g_io_channel_unref(c->listen_channel);
@@ -77,7 +76,7 @@ static void ga_channel_client_close(GAChannel *c)
 g_io_channel_shutdown(c->client_channel, true, NULL);
 g_io_channel_unref(c->client_channel);
 c->client_channel = NULL;
-if (c->method == GA_CHANNEL_UNIX_LISTEN && c->listen_channel) {
+if (c->listen_channel) {
 ga_channel_listen_add(c, 0, false);
 }
 }
@@ -255,8 +254,7 @@ GAChannel *ga_channel_new(GAChannelMethod method, const 
gchar *path,
 
 void ga_channel_free(GAChannel *c)
 {
-if (c->method == GA_CHANNEL_UNIX_LISTEN
-&& c->listen_channel) {
+if (c->listen_channel) {
 ga_channel_listen_close(c);
 }
 if (c->client_channel) {
-- 
2.7.4

Re: [Qemu-devel] [PATCH v4 08/17] vfio: Pass an Error object to vfio_connect_container

2016-10-06 Thread Auger Eric

Hi Markus,

On 04/10/2016 14:03, Markus Armbruster wrote:
> Eric Auger  writes:
> 
>> The error is currently simply reported in vfio_get_group. Don't
>> bother too much with the prefix which will be handled at upper level,
>> later on.
>>
>> Signed-off-by: Eric Auger 
>> ---
>>  hw/vfio/common.c | 39 ---
>>  1 file changed, 24 insertions(+), 15 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 29188a1..3b18eb4 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -34,6 +34,7 @@
>>  #include "qemu/range.h"
>>  #include "sysemu/kvm.h"
>>  #include "trace.h"
>> +#include "qapi/error.h"
>>  
>>  struct vfio_group_head vfio_group_list =
>>  QLIST_HEAD_INITIALIZER(vfio_group_list);
>> @@ -900,7 +901,8 @@ static void vfio_put_address_space(VFIOAddressSpace 
>> *space)
>>  }
>>  }
>>  
>> -static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
>> +static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
>> +  Error **errp)
>>  {
>>  VFIOContainer *container;
>>  int ret, fd;
>> @@ -918,15 +920,15 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as)
>>  
>>  fd = qemu_open("/dev/vfio/vfio", O_RDWR);
>>  if (fd < 0) {
>> -error_report("vfio: failed to open /dev/vfio/vfio: %m");
>> +error_setg_errno(errp, errno, "failed to open /dev/vfio/vfio");
>>  ret = -errno;
>>  goto put_space_exit;
>>  }
>>  
>>  ret = ioctl(fd, VFIO_GET_API_VERSION);
>>  if (ret != VFIO_API_VERSION) {
>> -error_report("vfio: supported vfio version: %d, "
>> - "reported version: %d", VFIO_API_VERSION, ret);
>> +error_setg(errp, "supported vfio version: %d, "
>> +   "reported version: %d", VFIO_API_VERSION, ret);
>>  ret = -EINVAL;
>>  goto close_fd_exit;
>>  }
>> @@ -941,7 +943,7 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as)
>>  
>>  ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
>>  if (ret) {
>> -error_report("vfio: failed to set group container: %m");
>> +error_setg_errno(errp, errno, "failed to set group container");
>>  ret = -errno;
>>  goto free_container_exit;
>>  }
>> @@ -949,7 +951,7 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as)
>>  container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU;
>>  ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
>>  if (ret) {
>> -error_report("vfio: failed to set iommu for container: %m");
>> +error_setg_errno(errp, errno, "failed to set iommu for 
>> container");
>>  ret = -errno;
>>  goto free_container_exit;
>>  }
>> @@ -976,7 +978,7 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as)
>>  
>>  ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
>>  if (ret) {
>> -error_report("vfio: failed to set group container: %m");
>> +error_setg_errno(errp, errno, "failed to set group container");
>>  ret = -errno;
>>  goto free_container_exit;
>>  }
>> @@ -984,7 +986,7 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as)
>>  v2 ? VFIO_SPAPR_TCE_v2_IOMMU : VFIO_SPAPR_TCE_IOMMU;
>>  ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
>>  if (ret) {
>> -error_report("vfio: failed to set iommu for container: %m");
>> +error_setg_errno(errp, errno, "failed to set iommu for 
>> container");
>>  ret = -errno;
>>  goto free_container_exit;
>>  }
>> @@ -997,7 +999,7 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as)
>>  if (!v2) {
>>  ret = ioctl(fd, VFIO_IOMMU_ENABLE);
>>  if (ret) {
>> -error_report("vfio: failed to enable container: %m");
>> +error_setg_errno(errp, errno, "failed to enable container");
>>  ret = -errno;
>>  goto free_container_exit;
>>  }
>> @@ -1008,7 +1010,8 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as)
>>   &address_space_memory);
>>  if (container->error) {
>>  memory_listener_unregister(&container->prereg_listener);
>> -error_report("vfio: RAM memory listener initialization 
>> failed for container");
>> +error_setg(errp,
>> +"RAM memory listener initialization failed for 
>> container");
>>  goto free_container_exit;
> 
> Preexisting: @ret not set here.  Intentional?
It was not before either. I suspect it is a bug since the container is
teared down.

I set ret to contai

Re: [Qemu-devel] [PATCH v4 15/17] vfio/pci: Remove vfio_msix_early_setup returned value

2016-10-06 Thread Auger Eric

Hi Markus,

On 04/10/2016 15:05, Markus Armbruster wrote:
> Eric Auger  writes:
> 
>> The returned value is not used anymore by the caller, vfio_realize,
>> since the error now is stored in the error object. So let's remove it.
>>
>> Signed-off-by: Eric Auger 
>>
>> ---
>>
>> Logically we could do that job for all the functions now getting an
>> Error object passed as a parameter to avoid duplicate information
>> between the error content and the returned value. This requires to use
>> a local error object in vfio_realize. So I am not sure this is worth
>> the candle.
> 
> Matter of taste, yours is fine.
> 
> We used to recommend returing void instead of an error code when the
> function sets and error.  More parsimonious in theory, more boiler-plate
> in practice, so we accept either now.  Perhaps we should even recommend
> returning an error code, but such a recommendation needs to come with
> patches converting existing code to it.

The risk is that if a programmer returns an error value without setting
the errp he will get a sigsev on subsequent error_prepend().
> 
>> ---
>>  hw/vfio/pci.c | 20 ++--
>>  1 file changed, 10 insertions(+), 10 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index b316e13..cea4d48 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1290,7 +1290,7 @@ static void vfio_pci_fixup_msix_region(VFIOPCIDevice 
>> *vdev)
>>   * need to first look for where the MSI-X table lives.  So we
>>   * unfortunately split MSI-X setup across two functions.
>>   */
>> -static int vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
>> +static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
>>  {
>>  uint8_t pos;
>>  uint16_t ctrl;
>> @@ -1300,25 +1300,25 @@ static int vfio_msix_early_setup(VFIOPCIDevice 
>> *vdev, Error **errp)
>>  
>>  pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
>>  if (!pos) {
>> -return 0;
>> +return;
>>  }
>>  
>>  if (pread(fd, &ctrl, sizeof(ctrl),
>>vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) {
>>  error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS");
>> -return -errno;
>> +return;
>>  }
>>  
>>  if (pread(fd, &table, sizeof(table),
>>vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) 
>> {
>>  error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE");
>> -return -errno;
>> +return;
>>  }
>>  
>>  if (pread(fd, &pba, sizeof(pba),
>>vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
>>  error_setg_errno(errp, errno, "failed to read PCI MSIX PBA");
>> -return -errno;
>> +return;
>>  }
>>  
>>  ctrl = le16_to_cpu(ctrl);
>> @@ -1351,7 +1351,7 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev, 
>> Error **errp)
>>  error_setg(errp, "hardware reports invalid configuration, "
>> "MSIX PBA outside of specified BAR");
>>  g_free(msix);
>> -return -EINVAL;
>> +return;
>>  }
>>  }
>>  
>> @@ -1360,8 +1360,6 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev, 
>> Error **errp)
>>  vdev->msix = msix;
>>  
>>  vfio_pci_fixup_msix_region(vdev);
>> -
>> -return 0;
>>  }
>>  
>>  static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
>> @@ -2519,6 +2517,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>  VFIODevice *vbasedev_iter;
>>  VFIOGroup *group;
>>  char *tmp, group_path[PATH_MAX], *group_name;
>> +Error *err = NULL;
>>  ssize_t len;
>>  struct stat st;
>>  int groupid;
>> @@ -2670,8 +2669,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>  
>>  vfio_pci_size_rom(vdev);
>>  
>> -ret = vfio_msix_early_setup(vdev, errp);
>> -if (ret) {
>> +vfio_msix_early_setup(vdev, &err);
>> +if (err) {
>> +error_propagate(errp, err);
>>  goto error;
>>  }
> 
> PATCH 04 checks err, PATCH 13 flips to ret, and this one flips back.
> Have you considered dropping both flips?  Your choice.
I removed one flip at least

Thanks

Eric
>

Re: [Qemu-devel] [PATCH] docs: Belatedly update for move of QMP/* to docs/

2016-10-06 Thread Eric Blake

On 10/06/2016 10:10 AM, Markus Armbruster wrote:
> Missed in commit 7537fe0 and commit 9b89b6a.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  docs/qmp-commands.txt | 2 +-
>  docs/writing-qmp-commands.txt | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 00/10] qemu-tech cleanup

2016-10-06 Thread Emilio G. Cota

On Thu, Oct 06, 2016 at 17:24:13 +0200, Paolo Bonzini wrote:
> qemu-tech is limited to TCG and large parts of it are obsolete or are
> just fine in qemu-doc.  Split it into other sources of documentation,
> placing what's left in an appendix of qemu-doc.
> 
> Ultimately we should have a new internals manual built from docs/, and
> then the "Translator Internals" parts of qemu-tech could move to docs/
> as well.  The bits on limitation and features of CPU emulation should
> remain in qemu-doc.  They are not entirely up-to-date, but I am not
> attempting to improve that yet---also because I could only really do
> that for x86.

Very happy with this update!

Reviewed-by: Emilio G. Cota 
for the whole patchset.

Thanks,

Emilio

[Qemu-devel] [PATCH v5 17/17] vfio/pci: Handle host oversight

2016-10-06 Thread Eric Auger

In case the end-user calls qemu with -vfio-pci option without passing
either sysfsdev or host property value, the device is interpreted as
:00:00.0. Let's create a specific error message to guide the end-user.

Signed-off-by: Eric Auger 
Reviewed-by: Markus Armbruster 

---
---
 hw/vfio/pci.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 6d01324..fef436a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2520,6 +2520,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 int i, ret;
 
 if (!vdev->vbasedev.sysfsdev) {
+if (!(~vdev->host.domain || ~vdev->host.bus ||
+  ~vdev->host.slot || ~vdev->host.function)) {
+error_setg(errp, "No provided host device");
+error_append_hint(errp, "Use -vfio-pci,host=:BB:DD.F "
+  "or -vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
+return;
+}
 vdev->vbasedev.sysfsdev =
 g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
 vdev->host.domain, vdev->host.bus,
@@ -2828,6 +2835,10 @@ static void vfio_instance_init(Object *obj)
 device_add_bootindex_property(obj, &vdev->bootindex,
   "bootindex", NULL,
   &pci_dev->qdev, NULL);
+vdev->host.domain = ~0U;
+vdev->host.bus = ~0U;
+vdev->host.slot = ~0U;
+vdev->host.function = ~0U;
 }
 
 static Property vfio_pci_dev_properties[] = {
-- 
1.9.1

[Qemu-devel] [PATCH v5 13/17] vfio/platform: Pass an error object to vfio_base_device_init

2016-10-06 Thread Eric Auger

This patch propagates errors encountered during vfio_base_device_init
up to the realize function.

In case the host value is not set or badly formed we now report an
error.

Signed-off-by: Eric Auger 
Reviewed-by: Markus Armbruster 

---
v4 -> v5:
- mention error returned on badly formed host value
- rework error message on stat failure

v3: creation
---
 hw/vfio/platform.c | 50 +++---
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 484e31f..a4663c9 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -541,13 +541,14 @@ static VFIODeviceOps vfio_platform_ops = {
 /**
  * vfio_base_device_init - perform preliminary VFIO setup
  * @vbasedev: the VFIO device handle
+ * @errp: error object
  *
  * Implement the VFIO command sequence that allows to discover
  * assigned device resources: group extraction, device
  * fd retrieval, resource query.
  * Precondition: the device name must be initialized
  */
-static int vfio_base_device_init(VFIODevice *vbasedev)
+static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
 {
 VFIOGroup *group;
 VFIODevice *vbasedev_iter;
@@ -555,7 +556,6 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
 ssize_t len;
 struct stat st;
 int groupid;
-Error *err = NULL;
 int ret;
 
 /* @sysfsdev takes precedence over @host */
@@ -564,6 +564,7 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
 vbasedev->name = g_strdup(basename(vbasedev->sysfsdev));
 } else {
 if (!vbasedev->name || strchr(vbasedev->name, '/')) {
+error_setg(errp, "wrong host device name");
 return -EINVAL;
 }
 
@@ -572,8 +573,8 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
 }
 
 if (stat(vbasedev->sysfsdev, &st) < 0) {
-error_report("vfio: error: no such host device: %s",
- vbasedev->sysfsdev);
+error_setg_errno(errp, errno,
+ "failed to get the sysfs host device file status");
 return -errno;
 }
 
@@ -582,49 +583,44 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
 g_free(tmp);
 
 if (len < 0 || len >= sizeof(group_path)) {
-error_report("vfio: error no iommu_group for device");
-return len < 0 ? -errno : -ENAMETOOLONG;
+ret = len < 0 ? -errno : -ENAMETOOLONG;
+error_setg_errno(errp, -ret, "no iommu_group found");
+return ret;
 }
 
 group_path[len] = 0;
 
 group_name = basename(group_path);
 if (sscanf(group_name, "%d", &groupid) != 1) {
-error_report("vfio: error reading %s: %m", group_path);
+error_setg_errno(errp, errno, "failed to read %s", group_path);
 return -errno;
 }
 
 trace_vfio_platform_base_device_init(vbasedev->name, groupid);
 
-group = vfio_get_group(groupid, &address_space_memory, &err);
+group = vfio_get_group(groupid, &address_space_memory, errp);
 if (!group) {
-ret = -ENOENT;
-goto error;
+return -ENOENT;
 }
 
 QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
 if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
-error_report("vfio: error: device %s is already attached",
- vbasedev->name);
+error_setg(errp, "device is already attached");
 vfio_put_group(group);
 return -EBUSY;
 }
 }
-ret = vfio_get_device(group, vbasedev->name, vbasedev, &err);
+ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
 if (ret) {
 vfio_put_group(group);
-goto error;
+return ret;
 }
 
-ret = vfio_populate_device(vbasedev, &err);
+ret = vfio_populate_device(vbasedev, errp);
 if (ret) {
 vfio_put_group(group);
 }
 
-error:
-if (err) {
-error_reportf_err(err, ERR_PREFIX, vbasedev->name);
-}
 return ret;
 }
 
@@ -650,11 +646,9 @@ static void vfio_platform_realize(DeviceState *dev, Error 
**errp)
 vbasedev->sysfsdev : vbasedev->name,
 vdev->compat);
 
-ret = vfio_base_device_init(vbasedev);
+ret = vfio_base_device_init(vbasedev, errp);
 if (ret) {
-error_setg(errp, "vfio: vfio_base_device_init failed for %s",
-   vbasedev->name);
-return;
+goto out;
 }
 
 for (i = 0; i < vbasedev->num_regions; i++) {
@@ -664,6 +658,16 @@ static void vfio_platform_realize(DeviceState *dev, Error 
**errp)
 }
 sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
 }
+out:
+if (!ret) {
+return;
+}
+
+if (vdev->vbasedev.name) {
+error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
+} else {
+error_prepend(errp, "vfio error: ");
+}
 }
 
 static const VMStateDescription vfio_platform_vmstate = {
-- 
1.9.1

[Qemu-devel] [PATCH 2/5] target-tricore: Added MADD.F and MSUB.F instructions

2016-10-06 Thread Bastian Koppelmann

Multiplies D[a] and D[b] and adds/subtracts the result to/from D[d].
The result is put in D[c]. All operands are floating-point numbers.

Signed-off-by: Bastian Koppelmann 
---
 target-tricore/fpu_helper.c | 93 -
 target-tricore/helper.h |  2 +
 target-tricore/translate.c  |  8 
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/target-tricore/fpu_helper.c b/target-tricore/fpu_helper.c
index 56a26eb..32055f3 100644
--- a/target-tricore/fpu_helper.c
+++ b/target-tricore/fpu_helper.c
@@ -21,7 +21,8 @@
 #include "cpu.h"
 #include "exec/helper-proto.h"
 
-#define ADD_NAN   0x7cf1
+#define QUIET_NAN 0x7fc0
+#define ADD_NAN   0x7fc1
 #define DIV_NAN   0x7fc8
 #define MUL_NAN   0x7fc2
 #define FPU_FS PSW_USB_C
@@ -47,6 +48,42 @@ static inline bool f_is_denormal(float32 arg)
 return float32_is_zero_or_denormal(arg) && !float32_is_zero(arg);
 }
 
+static inline float32 f_maddsub_nan_result(float32 arg1, float32 arg2,
+   float32 arg3, float32 result,
+   uint32_t flags)
+{
+uint32_t aSign, bSign, cSign;
+uint32_t aExp, bExp, cExp;
+
+if (float32_is_any_nan(arg1) || float32_is_any_nan(arg2) ||
+float32_is_any_nan(arg3)) {
+return QUIET_NAN;
+} else if (float32_is_infinity(arg1) && float32_is_zero(arg2)) {
+return MUL_NAN;
+} else if (float32_is_zero(arg1) && float32_is_infinity(arg2)) {
+return MUL_NAN;
+} else {
+aSign = arg1 >> 31;
+bSign = arg2 >> 31;
+cSign = arg3 >> 31;
+
+aExp = (arg1 >> 23) & 0xff;
+bExp = (arg2 >> 23) & 0xff;
+cExp = (arg3 >> 23) & 0xff;
+
+if (flags & float_muladd_negate_c) {
+cSign ^= 1;
+}
+if (((aExp == 0xff) || (bExp == 0xff)) && (cExp == 0xff)) {
+if (aSign ^ bSign ^ cSign) {
+return ADD_NAN;
+}
+}
+}
+
+return result;
+}
+
 static void f_update_psw_flags(CPUTriCoreState *env, uint8_t flags)
 {
 uint8_t some_excp = 0;
@@ -159,6 +196,60 @@ uint32_t helper_fdiv(CPUTriCoreState *env, uint32_t r1, 
uint32_t r2)
 return (uint32_t)f_result;
 }
 
+uint32_t helper_fmadd(CPUTriCoreState *env, uint32_t r1,
+  uint32_t r2, uint32_t r3)
+{
+uint32_t flags;
+float32 arg1 = make_float32(r1);
+float32 arg2 = make_float32(r2);
+float32 arg3 = make_float32(r3);
+float32 f_result;
+
+f_result = float32_muladd(arg1, arg2, arg3, 0, &env->fp_status);
+
+flags = f_get_excp_flags(env);
+if (flags) {
+if (flags & float_flag_invalid) {
+arg1 = float32_squash_input_denormal(arg1, &env->fp_status);
+arg2 = float32_squash_input_denormal(arg2, &env->fp_status);
+arg3 = float32_squash_input_denormal(arg3, &env->fp_status);
+f_result = f_maddsub_nan_result(arg1, arg2, arg3, f_result, flags);
+}
+f_update_psw_flags(env, flags);
+} else {
+env->FPU_FS = 0;
+}
+return (uint32_t)f_result;
+}
+
+uint32_t helper_fmsub(CPUTriCoreState *env, uint32_t r1,
+  uint32_t r2, uint32_t r3)
+{
+uint32_t flags;
+float32 arg1 = make_float32(r1);
+float32 arg2 = make_float32(r2);
+float32 arg3 = make_float32(r3);
+float32 f_result;
+
+f_result = float32_muladd(arg1, arg2, arg3, float_muladd_negate_product,
+  &env->fp_status);
+
+flags = f_get_excp_flags(env);
+if (flags) {
+if (flags & float_flag_invalid) {
+arg1 = float32_squash_input_denormal(arg1, &env->fp_status);
+arg2 = float32_squash_input_denormal(arg2, &env->fp_status);
+arg3 = float32_squash_input_denormal(arg3, &env->fp_status);
+
+f_result = f_maddsub_nan_result(arg1, arg2, arg3, f_result, flags);
+}
+f_update_psw_flags(env, flags);
+} else {
+env->FPU_FS = 0;
+}
+return (uint32_t)f_result;
+}
+
 uint32_t helper_fcmp(CPUTriCoreState *env, uint32_t r1, uint32_t r2)
 {
 uint32_t result, flags;
diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index 467c880..c897a44 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -109,6 +109,8 @@ DEF_HELPER_3(fadd, i32, env, i32, i32)
 DEF_HELPER_3(fsub, i32, env, i32, i32)
 DEF_HELPER_3(fmul, i32, env, i32, i32)
 DEF_HELPER_3(fdiv, i32, env, i32, i32)
+DEF_HELPER_4(fmadd, i32, env, i32, i32, i32)
+DEF_HELPER_4(fmsub, i32, env, i32, i32, i32)
 DEF_HELPER_3(fcmp, i32, env, i32, i32)
 DEF_HELPER_2(ftoi, i32, env, i32)
 DEF_HELPER_2(itof, i32, env, i32)
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 27c6d31..3fec353 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -7096,6 +7096,14 @@ static void decode_rrr_divide(CPUTriCoreState *env, 
DisasContext *ctx)
 case OPC2_32_RRR

[Qemu-devel] [PATCH v5 15/17] vfio/pci: Remove vfio_msix_early_setup returned value

2016-10-06 Thread Eric Auger

The returned value is not used anymore by the caller, vfio_realize,
since the error now is stored in the error object. So let's remove it.

Signed-off-by: Eric Auger 
Reviewed-by: Markus Armbruster 

---

Logically we could do that job for all the functions now getting an
Error object passed as a parameter to avoid duplicate information
between the error content and the returned value. This requires to use
a local error object in vfio_realize. So I am not sure this is worth
the candle.
---
 hw/vfio/pci.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index d9652c2..f063c65 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1290,7 +1290,7 @@ static void vfio_pci_fixup_msix_region(VFIOPCIDevice 
*vdev)
  * need to first look for where the MSI-X table lives.  So we
  * unfortunately split MSI-X setup across two functions.
  */
-static int vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
+static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
 {
 uint8_t pos;
 uint16_t ctrl;
@@ -1300,25 +1300,25 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev, 
Error **errp)
 
 pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
 if (!pos) {
-return 0;
+return;
 }
 
 if (pread(fd, &ctrl, sizeof(ctrl),
   vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) {
 error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS");
-return -errno;
+return;
 }
 
 if (pread(fd, &table, sizeof(table),
   vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) {
 error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE");
-return -errno;
+return;
 }
 
 if (pread(fd, &pba, sizeof(pba),
   vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
 error_setg_errno(errp, errno, "failed to read PCI MSIX PBA");
-return -errno;
+return;
 }
 
 ctrl = le16_to_cpu(ctrl);
@@ -1351,7 +1351,7 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev, 
Error **errp)
 error_setg(errp, "hardware reports invalid configuration, "
"MSIX PBA outside of specified BAR");
 g_free(msix);
-return -EINVAL;
+return;
 }
 }
 
@@ -1360,8 +1360,6 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev, 
Error **errp)
 vdev->msix = msix;
 
 vfio_pci_fixup_msix_region(vdev);
-
-return 0;
 }
 
 static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
@@ -2519,6 +2517,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 VFIODevice *vbasedev_iter;
 VFIOGroup *group;
 char *tmp, group_path[PATH_MAX], *group_name;
+Error *err = NULL;
 ssize_t len;
 struct stat st;
 int groupid;
@@ -2670,8 +2669,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 
 vfio_pci_size_rom(vdev);
 
-ret = vfio_msix_early_setup(vdev, errp);
-if (ret) {
+vfio_msix_early_setup(vdev, &err);
+if (err) {
+error_propagate(errp, err);
 goto error;
 }
 
-- 
1.9.1

1 2 3 4 >

1 - 100 of 337 matches

Mail list logo