Re: [Qemu-devel] [PATCH qemu v3] RFC: vfio-pci: Allow mmap of MSIX BAR

2018-01-24 Thread Alex Williamson
On Wed, 24 Jan 2018 16:30:04 +0100
Auger Eric  wrote:

> Hi Alex,
> On 24/01/18 16:02, Alex Williamson wrote:
> > On Wed, 24 Jan 2018 15:13:06 +0100
> > Auger Eric  wrote:
> >   
> >> Hi Alex,
> >> On 23/01/18 20:55, Alex Williamson wrote:  
> >>> On Tue, 23 Jan 2018 16:34:34 +0100
> >>> Auger Eric  wrote:
> >>> 
>  Hi Alexey,
>  On 23/01/18 02:22, Alexey Kardashevskiy wrote:
> > This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability
> > which tells that a region with MSIX data can be mapped entirely, i.e.
> > the VFIO PCI driver won't prevent MSIX vectors area from being mapped.
> >
> > With this change, all BARs are mapped in a single chunk and MSIX vectors
> > are emulated on top unless the machine requests not to by defining and
> > enabling a new "vfio-no-msix-emulation" property. At the moment only
> > sPAPR machine does so - it prohibits MSIX emulation and does not allow
> > enabling it as it does not define the "set" callback for the new 
> > property;
> > the new property also does not appear in "-machine pseries,help".
> >
> > If MSIX vectors section is not aligned to the page size, the KVM memory
> > listener does not register it with the KVM as a memory slot and MSIX is
> > emulated by QEMU as before. This may create MMIO RAM memory sections 
> > with
> > an address or/and a size not aligned which will make vfio_dma_map() 
> > fail;
> > to address this, this makes treats such failures as non-fatal.
> >
> > This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" -
> > for the new capability: https://www.spinics.net/lists/kvm/msg160282.html
> >
> > Signed-off-by: Alexey Kardashevskiy 
> > ---
> > Changes:
> > v3:
> > * vfio_listener_region_add() won't make qemu exit if failed on MMIO MR
> >
> > ---
> >  include/hw/vfio/vfio-common.h |  1 +
> >  linux-headers/linux/vfio.h|  5 +
> >  hw/ppc/spapr.c|  7 +++
> >  hw/vfio/common.c  | 19 +++
> >  hw/vfio/pci.c | 10 ++
> >  5 files changed, 42 insertions(+)
> >
> > diff --git a/include/hw/vfio/vfio-common.h 
> > b/include/hw/vfio/vfio-common.h
> > index f3a2ac9..927d600 100644
> > --- a/include/hw/vfio/vfio-common.h
> > +++ b/include/hw/vfio/vfio-common.h
> > @@ -171,6 +171,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
> > index,
> >   struct vfio_region_info **info);
> >  int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
> >   uint32_t subtype, struct vfio_region_info 
> > **info);
> > +bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int 
> > region);
> >  #endif
> >  extern const MemoryListener vfio_prereg_listener;
> >  
> > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > index 4312e96..b45182e 100644
> > --- a/linux-headers/linux/vfio.h
> > +++ b/linux-headers/linux/vfio.h
> > @@ -301,6 +301,11 @@ struct vfio_region_info_cap_type {
> >  #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> >  #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG  (3)
> >  
> > +/*
> > + * The MSIX mappable capability informs that MSIX data of a BAR can be 
> > mmapped.
> > + */
> > +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3
> > +
> >  /**
> >   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> >   * struct vfio_irq_info)
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index d1acfe8..5ff43ce 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2789,6 +2789,11 @@ static void 
> > spapr_set_modern_hotplug_events(Object *obj, bool value,
> >  spapr->use_hotplug_event_source = value;
> >  }
> >  
> > +static bool spapr_get_msix_emulation(Object *obj, Error **errp)
> > +{
> > +return true;
> > +}
> > +
> >  static char *spapr_get_resize_hpt(Object *obj, Error **errp)
> >  {
> >  sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > @@ -2870,6 +2875,8 @@ static void spapr_instance_init(Object *obj)
> >  object_property_set_description(obj, "vsmt",
> >  "Virtual SMT: KVM behaves as if 
> > this were"
> >  " the host's SMT mode", 
> > &error_abort);
> > +object_property_add_bool(obj, "vfio-no-msix-emulation",
> > + spapr_get_msix_emulation, NULL, NULL);
> >  }
> >  
> >  static void spapr_machine_finalizefn(Object *obj)
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index b77be3a..842c5b2 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
>

Re: [Qemu-devel] [PATCH qemu v3] RFC: vfio-pci: Allow mmap of MSIX BAR

2018-01-24 Thread Auger Eric
Hi Alex,
On 24/01/18 16:02, Alex Williamson wrote:
> On Wed, 24 Jan 2018 15:13:06 +0100
> Auger Eric  wrote:
> 
>> Hi Alex,
>> On 23/01/18 20:55, Alex Williamson wrote:
>>> On Tue, 23 Jan 2018 16:34:34 +0100
>>> Auger Eric  wrote:
>>>   
 Hi Alexey,
 On 23/01/18 02:22, Alexey Kardashevskiy wrote:  
> This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability
> which tells that a region with MSIX data can be mapped entirely, i.e.
> the VFIO PCI driver won't prevent MSIX vectors area from being mapped.
>
> With this change, all BARs are mapped in a single chunk and MSIX vectors
> are emulated on top unless the machine requests not to by defining and
> enabling a new "vfio-no-msix-emulation" property. At the moment only
> sPAPR machine does so - it prohibits MSIX emulation and does not allow
> enabling it as it does not define the "set" callback for the new property;
> the new property also does not appear in "-machine pseries,help".
>
> If MSIX vectors section is not aligned to the page size, the KVM memory
> listener does not register it with the KVM as a memory slot and MSIX is
> emulated by QEMU as before. This may create MMIO RAM memory sections with
> an address or/and a size not aligned which will make vfio_dma_map() fail;
> to address this, this makes treats such failures as non-fatal.
>
> This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" -
> for the new capability: https://www.spinics.net/lists/kvm/msg160282.html
>
> Signed-off-by: Alexey Kardashevskiy 
> ---
> Changes:
> v3:
> * vfio_listener_region_add() won't make qemu exit if failed on MMIO MR
>
> ---
>  include/hw/vfio/vfio-common.h |  1 +
>  linux-headers/linux/vfio.h|  5 +
>  hw/ppc/spapr.c|  7 +++
>  hw/vfio/common.c  | 19 +++
>  hw/vfio/pci.c | 10 ++
>  5 files changed, 42 insertions(+)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index f3a2ac9..927d600 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -171,6 +171,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
> index,
>   struct vfio_region_info **info);
>  int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
>   uint32_t subtype, struct vfio_region_info 
> **info);
> +bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int 
> region);
>  #endif
>  extern const MemoryListener vfio_prereg_listener;
>  
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 4312e96..b45182e 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -301,6 +301,11 @@ struct vfio_region_info_cap_type {
>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG   (2)
>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG(3)
>  
> +/*
> + * The MSIX mappable capability informs that MSIX data of a BAR can be 
> mmapped.
> + */
> +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE   3
> +
>  /**
>   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
>   *   struct vfio_irq_info)
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d1acfe8..5ff43ce 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2789,6 +2789,11 @@ static void spapr_set_modern_hotplug_events(Object 
> *obj, bool value,
>  spapr->use_hotplug_event_source = value;
>  }
>  
> +static bool spapr_get_msix_emulation(Object *obj, Error **errp)
> +{
> +return true;
> +}
> +
>  static char *spapr_get_resize_hpt(Object *obj, Error **errp)
>  {
>  sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> @@ -2870,6 +2875,8 @@ static void spapr_instance_init(Object *obj)
>  object_property_set_description(obj, "vsmt",
>  "Virtual SMT: KVM behaves as if this 
> were"
>  " the host's SMT mode", 
> &error_abort);
> +object_property_add_bool(obj, "vfio-no-msix-emulation",
> + spapr_get_msix_emulation, NULL, NULL);
>  }
>  
>  static void spapr_machine_finalizefn(Object *obj)
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index b77be3a..842c5b2 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -530,6 +530,10 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>  return;
>  
>  fail:
> +if (memory_region_is_ram_device(section->mr)) {
> +error_report("failed to vfio_dma_map. pci p2p may not work");
> +return;
> +}

Re: [Qemu-devel] [PATCH qemu v3] RFC: vfio-pci: Allow mmap of MSIX BAR

2018-01-24 Thread Alex Williamson
On Wed, 24 Jan 2018 15:13:06 +0100
Auger Eric  wrote:

> Hi Alex,
> On 23/01/18 20:55, Alex Williamson wrote:
> > On Tue, 23 Jan 2018 16:34:34 +0100
> > Auger Eric  wrote:
> >   
> >> Hi Alexey,
> >> On 23/01/18 02:22, Alexey Kardashevskiy wrote:  
> >>> This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability
> >>> which tells that a region with MSIX data can be mapped entirely, i.e.
> >>> the VFIO PCI driver won't prevent MSIX vectors area from being mapped.
> >>>
> >>> With this change, all BARs are mapped in a single chunk and MSIX vectors
> >>> are emulated on top unless the machine requests not to by defining and
> >>> enabling a new "vfio-no-msix-emulation" property. At the moment only
> >>> sPAPR machine does so - it prohibits MSIX emulation and does not allow
> >>> enabling it as it does not define the "set" callback for the new property;
> >>> the new property also does not appear in "-machine pseries,help".
> >>>
> >>> If MSIX vectors section is not aligned to the page size, the KVM memory
> >>> listener does not register it with the KVM as a memory slot and MSIX is
> >>> emulated by QEMU as before. This may create MMIO RAM memory sections with
> >>> an address or/and a size not aligned which will make vfio_dma_map() fail;
> >>> to address this, this makes treats such failures as non-fatal.
> >>>
> >>> This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" -
> >>> for the new capability: https://www.spinics.net/lists/kvm/msg160282.html
> >>>
> >>> Signed-off-by: Alexey Kardashevskiy 
> >>> ---
> >>> Changes:
> >>> v3:
> >>> * vfio_listener_region_add() won't make qemu exit if failed on MMIO MR
> >>>
> >>> ---
> >>>  include/hw/vfio/vfio-common.h |  1 +
> >>>  linux-headers/linux/vfio.h|  5 +
> >>>  hw/ppc/spapr.c|  7 +++
> >>>  hw/vfio/common.c  | 19 +++
> >>>  hw/vfio/pci.c | 10 ++
> >>>  5 files changed, 42 insertions(+)
> >>>
> >>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> >>> index f3a2ac9..927d600 100644
> >>> --- a/include/hw/vfio/vfio-common.h
> >>> +++ b/include/hw/vfio/vfio-common.h
> >>> @@ -171,6 +171,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
> >>> index,
> >>>   struct vfio_region_info **info);
> >>>  int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
> >>>   uint32_t subtype, struct vfio_region_info 
> >>> **info);
> >>> +bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int 
> >>> region);
> >>>  #endif
> >>>  extern const MemoryListener vfio_prereg_listener;
> >>>  
> >>> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> >>> index 4312e96..b45182e 100644
> >>> --- a/linux-headers/linux/vfio.h
> >>> +++ b/linux-headers/linux/vfio.h
> >>> @@ -301,6 +301,11 @@ struct vfio_region_info_cap_type {
> >>>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG   (2)
> >>>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG(3)
> >>>  
> >>> +/*
> >>> + * The MSIX mappable capability informs that MSIX data of a BAR can be 
> >>> mmapped.
> >>> + */
> >>> +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE   3
> >>> +
> >>>  /**
> >>>   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> >>>   *   struct vfio_irq_info)
> >>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>> index d1acfe8..5ff43ce 100644
> >>> --- a/hw/ppc/spapr.c
> >>> +++ b/hw/ppc/spapr.c
> >>> @@ -2789,6 +2789,11 @@ static void spapr_set_modern_hotplug_events(Object 
> >>> *obj, bool value,
> >>>  spapr->use_hotplug_event_source = value;
> >>>  }
> >>>  
> >>> +static bool spapr_get_msix_emulation(Object *obj, Error **errp)
> >>> +{
> >>> +return true;
> >>> +}
> >>> +
> >>>  static char *spapr_get_resize_hpt(Object *obj, Error **errp)
> >>>  {
> >>>  sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> >>> @@ -2870,6 +2875,8 @@ static void spapr_instance_init(Object *obj)
> >>>  object_property_set_description(obj, "vsmt",
> >>>  "Virtual SMT: KVM behaves as if this 
> >>> were"
> >>>  " the host's SMT mode", 
> >>> &error_abort);
> >>> +object_property_add_bool(obj, "vfio-no-msix-emulation",
> >>> + spapr_get_msix_emulation, NULL, NULL);
> >>>  }
> >>>  
> >>>  static void spapr_machine_finalizefn(Object *obj)
> >>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >>> index b77be3a..842c5b2 100644
> >>> --- a/hw/vfio/common.c
> >>> +++ b/hw/vfio/common.c
> >>> @@ -530,6 +530,10 @@ static void vfio_listener_region_add(MemoryListener 
> >>> *listener,
> >>>  return;
> >>>  
> >>>  fail:
> >>> +if (memory_region_is_ram_device(section->mr)) {
> >>> +error_report("failed to vfio_dma_map. pci p2p may not work");
> >>> +return;
> >>> +}
> >>
> >> I don't think this is an acceptable solution as 

Re: [Qemu-devel] [PATCH qemu v3] RFC: vfio-pci: Allow mmap of MSIX BAR

2018-01-24 Thread Auger Eric
Hi Alex,
On 23/01/18 20:55, Alex Williamson wrote:
> On Tue, 23 Jan 2018 16:34:34 +0100
> Auger Eric  wrote:
> 
>> Hi Alexey,
>> On 23/01/18 02:22, Alexey Kardashevskiy wrote:
>>> This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability
>>> which tells that a region with MSIX data can be mapped entirely, i.e.
>>> the VFIO PCI driver won't prevent MSIX vectors area from being mapped.
>>>
>>> With this change, all BARs are mapped in a single chunk and MSIX vectors
>>> are emulated on top unless the machine requests not to by defining and
>>> enabling a new "vfio-no-msix-emulation" property. At the moment only
>>> sPAPR machine does so - it prohibits MSIX emulation and does not allow
>>> enabling it as it does not define the "set" callback for the new property;
>>> the new property also does not appear in "-machine pseries,help".
>>>
>>> If MSIX vectors section is not aligned to the page size, the KVM memory
>>> listener does not register it with the KVM as a memory slot and MSIX is
>>> emulated by QEMU as before. This may create MMIO RAM memory sections with
>>> an address or/and a size not aligned which will make vfio_dma_map() fail;
>>> to address this, this makes treats such failures as non-fatal.
>>>
>>> This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" -
>>> for the new capability: https://www.spinics.net/lists/kvm/msg160282.html
>>>
>>> Signed-off-by: Alexey Kardashevskiy 
>>> ---
>>> Changes:
>>> v3:
>>> * vfio_listener_region_add() won't make qemu exit if failed on MMIO MR
>>>
>>> ---
>>>  include/hw/vfio/vfio-common.h |  1 +
>>>  linux-headers/linux/vfio.h|  5 +
>>>  hw/ppc/spapr.c|  7 +++
>>>  hw/vfio/common.c  | 19 +++
>>>  hw/vfio/pci.c | 10 ++
>>>  5 files changed, 42 insertions(+)
>>>
>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>> index f3a2ac9..927d600 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -171,6 +171,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
>>> index,
>>>   struct vfio_region_info **info);
>>>  int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
>>>   uint32_t subtype, struct vfio_region_info 
>>> **info);
>>> +bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int 
>>> region);
>>>  #endif
>>>  extern const MemoryListener vfio_prereg_listener;
>>>  
>>> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
>>> index 4312e96..b45182e 100644
>>> --- a/linux-headers/linux/vfio.h
>>> +++ b/linux-headers/linux/vfio.h
>>> @@ -301,6 +301,11 @@ struct vfio_region_info_cap_type {
>>>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
>>>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG  (3)
>>>  
>>> +/*
>>> + * The MSIX mappable capability informs that MSIX data of a BAR can be 
>>> mmapped.
>>> + */
>>> +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3
>>> +
>>>  /**
>>>   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
>>>   * struct vfio_irq_info)
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index d1acfe8..5ff43ce 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -2789,6 +2789,11 @@ static void spapr_set_modern_hotplug_events(Object 
>>> *obj, bool value,
>>>  spapr->use_hotplug_event_source = value;
>>>  }
>>>  
>>> +static bool spapr_get_msix_emulation(Object *obj, Error **errp)
>>> +{
>>> +return true;
>>> +}
>>> +
>>>  static char *spapr_get_resize_hpt(Object *obj, Error **errp)
>>>  {
>>>  sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
>>> @@ -2870,6 +2875,8 @@ static void spapr_instance_init(Object *obj)
>>>  object_property_set_description(obj, "vsmt",
>>>  "Virtual SMT: KVM behaves as if this 
>>> were"
>>>  " the host's SMT mode", &error_abort);
>>> +object_property_add_bool(obj, "vfio-no-msix-emulation",
>>> + spapr_get_msix_emulation, NULL, NULL);
>>>  }
>>>  
>>>  static void spapr_machine_finalizefn(Object *obj)
>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>> index b77be3a..842c5b2 100644
>>> --- a/hw/vfio/common.c
>>> +++ b/hw/vfio/common.c
>>> @@ -530,6 +530,10 @@ static void vfio_listener_region_add(MemoryListener 
>>> *listener,
>>>  return;
>>>  
>>>  fail:
>>> +if (memory_region_is_ram_device(section->mr)) {
>>> +error_report("failed to vfio_dma_map. pci p2p may not work");
>>> +return;
>>> +}  
>>
>> I don't think this is an acceptable solution as it produces plenty of
>> errors such as
>>
>> qemu-system-aarch64: VFIO_MAP_DMA: -22
>> qemu-system-aarch64: vfio_dma_map(0x30a7ab90, 0x80, 0x2000,
>> 0xfffd79a0) = -22 (Invalid argument)
> 
> This much of the error above could be avoided by looking at the type1
> page size bitmap before tryin

Re: [Qemu-devel] [PATCH qemu v3] RFC: vfio-pci: Allow mmap of MSIX BAR

2018-01-23 Thread Alex Williamson
On Tue, 23 Jan 2018 16:34:34 +0100
Auger Eric  wrote:

> Hi Alexey,
> On 23/01/18 02:22, Alexey Kardashevskiy wrote:
> > This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability
> > which tells that a region with MSIX data can be mapped entirely, i.e.
> > the VFIO PCI driver won't prevent MSIX vectors area from being mapped.
> > 
> > With this change, all BARs are mapped in a single chunk and MSIX vectors
> > are emulated on top unless the machine requests not to by defining and
> > enabling a new "vfio-no-msix-emulation" property. At the moment only
> > sPAPR machine does so - it prohibits MSIX emulation and does not allow
> > enabling it as it does not define the "set" callback for the new property;
> > the new property also does not appear in "-machine pseries,help".
> > 
> > If MSIX vectors section is not aligned to the page size, the KVM memory
> > listener does not register it with the KVM as a memory slot and MSIX is
> > emulated by QEMU as before. This may create MMIO RAM memory sections with
> > an address or/and a size not aligned which will make vfio_dma_map() fail;
> > to address this, this makes treats such failures as non-fatal.
> > 
> > This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" -
> > for the new capability: https://www.spinics.net/lists/kvm/msg160282.html
> > 
> > Signed-off-by: Alexey Kardashevskiy 
> > ---
> > Changes:
> > v3:
> > * vfio_listener_region_add() won't make qemu exit if failed on MMIO MR
> > 
> > ---
> >  include/hw/vfio/vfio-common.h |  1 +
> >  linux-headers/linux/vfio.h|  5 +
> >  hw/ppc/spapr.c|  7 +++
> >  hw/vfio/common.c  | 19 +++
> >  hw/vfio/pci.c | 10 ++
> >  5 files changed, 42 insertions(+)
> > 
> > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> > index f3a2ac9..927d600 100644
> > --- a/include/hw/vfio/vfio-common.h
> > +++ b/include/hw/vfio/vfio-common.h
> > @@ -171,6 +171,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int 
> > index,
> >   struct vfio_region_info **info);
> >  int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
> >   uint32_t subtype, struct vfio_region_info 
> > **info);
> > +bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int 
> > region);
> >  #endif
> >  extern const MemoryListener vfio_prereg_listener;
> >  
> > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> > index 4312e96..b45182e 100644
> > --- a/linux-headers/linux/vfio.h
> > +++ b/linux-headers/linux/vfio.h
> > @@ -301,6 +301,11 @@ struct vfio_region_info_cap_type {
> >  #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
> >  #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG  (3)
> >  
> > +/*
> > + * The MSIX mappable capability informs that MSIX data of a BAR can be 
> > mmapped.
> > + */
> > +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3
> > +
> >  /**
> >   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
> >   * struct vfio_irq_info)
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index d1acfe8..5ff43ce 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2789,6 +2789,11 @@ static void spapr_set_modern_hotplug_events(Object 
> > *obj, bool value,
> >  spapr->use_hotplug_event_source = value;
> >  }
> >  
> > +static bool spapr_get_msix_emulation(Object *obj, Error **errp)
> > +{
> > +return true;
> > +}
> > +
> >  static char *spapr_get_resize_hpt(Object *obj, Error **errp)
> >  {
> >  sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> > @@ -2870,6 +2875,8 @@ static void spapr_instance_init(Object *obj)
> >  object_property_set_description(obj, "vsmt",
> >  "Virtual SMT: KVM behaves as if this 
> > were"
> >  " the host's SMT mode", &error_abort);
> > +object_property_add_bool(obj, "vfio-no-msix-emulation",
> > + spapr_get_msix_emulation, NULL, NULL);
> >  }
> >  
> >  static void spapr_machine_finalizefn(Object *obj)
> > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> > index b77be3a..842c5b2 100644
> > --- a/hw/vfio/common.c
> > +++ b/hw/vfio/common.c
> > @@ -530,6 +530,10 @@ static void vfio_listener_region_add(MemoryListener 
> > *listener,
> >  return;
> >  
> >  fail:
> > +if (memory_region_is_ram_device(section->mr)) {
> > +error_report("failed to vfio_dma_map. pci p2p may not work");
> > +return;
> > +}  
> 
> I don't think this is an acceptable solution as it produces plenty of
> errors such as
> 
> qemu-system-aarch64: VFIO_MAP_DMA: -22
> qemu-system-aarch64: vfio_dma_map(0x30a7ab90, 0x80, 0x2000,
> 0xfffd79a0) = -22 (Invalid argument)

This much of the error above could be avoided by looking at the type1
page size bitmap before trying to perform the mapping.

> qemu-system-aarch64: failed to vfio

Re: [Qemu-devel] [PATCH qemu v3] RFC: vfio-pci: Allow mmap of MSIX BAR

2018-01-23 Thread Auger Eric
Hi Alexey,
On 23/01/18 02:22, Alexey Kardashevskiy wrote:
> This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability
> which tells that a region with MSIX data can be mapped entirely, i.e.
> the VFIO PCI driver won't prevent MSIX vectors area from being mapped.
> 
> With this change, all BARs are mapped in a single chunk and MSIX vectors
> are emulated on top unless the machine requests not to by defining and
> enabling a new "vfio-no-msix-emulation" property. At the moment only
> sPAPR machine does so - it prohibits MSIX emulation and does not allow
> enabling it as it does not define the "set" callback for the new property;
> the new property also does not appear in "-machine pseries,help".
> 
> If MSIX vectors section is not aligned to the page size, the KVM memory
> listener does not register it with the KVM as a memory slot and MSIX is
> emulated by QEMU as before. This may create MMIO RAM memory sections with
> an address or/and a size not aligned which will make vfio_dma_map() fail;
> to address this, this makes treats such failures as non-fatal.
> 
> This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" -
> for the new capability: https://www.spinics.net/lists/kvm/msg160282.html
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
> Changes:
> v3:
> * vfio_listener_region_add() won't make qemu exit if failed on MMIO MR
> 
> ---
>  include/hw/vfio/vfio-common.h |  1 +
>  linux-headers/linux/vfio.h|  5 +
>  hw/ppc/spapr.c|  7 +++
>  hw/vfio/common.c  | 19 +++
>  hw/vfio/pci.c | 10 ++
>  5 files changed, 42 insertions(+)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index f3a2ac9..927d600 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -171,6 +171,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
>   struct vfio_region_info **info);
>  int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
>   uint32_t subtype, struct vfio_region_info 
> **info);
> +bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int 
> region);
>  #endif
>  extern const MemoryListener vfio_prereg_listener;
>  
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 4312e96..b45182e 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -301,6 +301,11 @@ struct vfio_region_info_cap_type {
>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG   (2)
>  #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG(3)
>  
> +/*
> + * The MSIX mappable capability informs that MSIX data of a BAR can be 
> mmapped.
> + */
> +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE   3
> +
>  /**
>   * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
>   *   struct vfio_irq_info)
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d1acfe8..5ff43ce 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2789,6 +2789,11 @@ static void spapr_set_modern_hotplug_events(Object 
> *obj, bool value,
>  spapr->use_hotplug_event_source = value;
>  }
>  
> +static bool spapr_get_msix_emulation(Object *obj, Error **errp)
> +{
> +return true;
> +}
> +
>  static char *spapr_get_resize_hpt(Object *obj, Error **errp)
>  {
>  sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
> @@ -2870,6 +2875,8 @@ static void spapr_instance_init(Object *obj)
>  object_property_set_description(obj, "vsmt",
>  "Virtual SMT: KVM behaves as if this 
> were"
>  " the host's SMT mode", &error_abort);
> +object_property_add_bool(obj, "vfio-no-msix-emulation",
> + spapr_get_msix_emulation, NULL, NULL);
>  }
>  
>  static void spapr_machine_finalizefn(Object *obj)
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index b77be3a..842c5b2 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -530,6 +530,10 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>  return;
>  
>  fail:
> +if (memory_region_is_ram_device(section->mr)) {
> +error_report("failed to vfio_dma_map. pci p2p may not work");
> +return;
> +}

I don't think this is an acceptable solution as it produces plenty of
errors such as

qemu-system-aarch64: VFIO_MAP_DMA: -22
qemu-system-aarch64: vfio_dma_map(0x30a7ab90, 0x80, 0x2000,
0xfffd79a0) = -22 (Invalid argument)
qemu-system-aarch64: failed to vfio_dma_map. pci p2p may not work


testing context is: aarch64 + Mellanox CX-4 PF where we still try to map
the BAR0 area [0 - 0x2000] before the MSI-X table which is not aligned
with the host 64kB page.

Region 0: Memory at 804 (64-bit, prefetchable) [size=32M]
Capabilities: [9c] MSI-X: Enable+ Count=64 Masked-
Vector table: BAR=0 offset=2000
PBA: BAR=0 offset=3

[Qemu-devel] [PATCH qemu v3] RFC: vfio-pci: Allow mmap of MSIX BAR

2018-01-22 Thread Alexey Kardashevskiy
This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability
which tells that a region with MSIX data can be mapped entirely, i.e.
the VFIO PCI driver won't prevent MSIX vectors area from being mapped.

With this change, all BARs are mapped in a single chunk and MSIX vectors
are emulated on top unless the machine requests not to by defining and
enabling a new "vfio-no-msix-emulation" property. At the moment only
sPAPR machine does so - it prohibits MSIX emulation and does not allow
enabling it as it does not define the "set" callback for the new property;
the new property also does not appear in "-machine pseries,help".

If MSIX vectors section is not aligned to the page size, the KVM memory
listener does not register it with the KVM as a memory slot and MSIX is
emulated by QEMU as before. This may create MMIO RAM memory sections with
an address or/and a size not aligned which will make vfio_dma_map() fail;
to address this, this makes treats such failures as non-fatal.

This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" -
for the new capability: https://www.spinics.net/lists/kvm/msg160282.html

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v3:
* vfio_listener_region_add() won't make qemu exit if failed on MMIO MR

---
 include/hw/vfio/vfio-common.h |  1 +
 linux-headers/linux/vfio.h|  5 +
 hw/ppc/spapr.c|  7 +++
 hw/vfio/common.c  | 19 +++
 hw/vfio/pci.c | 10 ++
 5 files changed, 42 insertions(+)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f3a2ac9..927d600 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -171,6 +171,7 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
  struct vfio_region_info **info);
 int vfio_get_dev_region_info(VFIODevice *vbasedev, uint32_t type,
  uint32_t subtype, struct vfio_region_info **info);
+bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int region);
 #endif
 extern const MemoryListener vfio_prereg_listener;
 
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 4312e96..b45182e 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -301,6 +301,11 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG  (3)
 
+/*
+ * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped.
+ */
+#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3
+
 /**
  * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
  * struct vfio_irq_info)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d1acfe8..5ff43ce 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2789,6 +2789,11 @@ static void spapr_set_modern_hotplug_events(Object *obj, 
bool value,
 spapr->use_hotplug_event_source = value;
 }
 
+static bool spapr_get_msix_emulation(Object *obj, Error **errp)
+{
+return true;
+}
+
 static char *spapr_get_resize_hpt(Object *obj, Error **errp)
 {
 sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
@@ -2870,6 +2875,8 @@ static void spapr_instance_init(Object *obj)
 object_property_set_description(obj, "vsmt",
 "Virtual SMT: KVM behaves as if this were"
 " the host's SMT mode", &error_abort);
+object_property_add_bool(obj, "vfio-no-msix-emulation",
+ spapr_get_msix_emulation, NULL, NULL);
 }
 
 static void spapr_machine_finalizefn(Object *obj)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b77be3a..842c5b2 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -530,6 +530,10 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
 return;
 
 fail:
+if (memory_region_is_ram_device(section->mr)) {
+error_report("failed to vfio_dma_map. pci p2p may not work");
+return;
+}
 /*
  * On the initfn path, store the first error in the container so we
  * can gracefully fail.  Runtime, there's not much we can do other
@@ -1387,6 +1391,21 @@ int vfio_get_dev_region_info(VFIODevice *vbasedev, 
uint32_t type,
 return -ENODEV;
 }
 
+bool vfio_is_cap_present(VFIODevice *vbasedev, uint16_t cap_type, int region)
+{
+struct vfio_region_info *info = NULL;
+bool ret = false;
+
+if (!vfio_get_region_info(vbasedev, region, &info)) {
+if (vfio_get_region_info_cap(info, cap_type)) {
+ret = true;
+}
+g_free(info);
+}
+
+return ret;
+}
+
 /*
  * Interfaces for IBM EEH (Enhanced Error Handling)
  */
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 359a8f1..a96ece6 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1289,6 +1289,11 @@ static void vfio_pci_fixup_msix_region(VFIOPCIDevice 
*vdev)
 off_t start, end;
 VFIORegion *region = &vdev->bars[vdev->msix->tabl