Hi Shameer, Qubingbing
On 11/23/20 1:51 PM, Shameerali Kolothum Thodi wrote:
> Hi Eric,
> 
>> -----Original Message-----
>> From: Eric Auger [mailto:eric.au...@redhat.com]
>> Sent: 16 November 2020 11:00
>> To: eric.auger....@gmail.com; eric.au...@redhat.com;
>> iommu@lists.linux-foundation.org; linux-ker...@vger.kernel.org;
>> k...@vger.kernel.org; kvm...@lists.cs.columbia.edu; w...@kernel.org;
>> j...@8bytes.org; m...@kernel.org; robin.mur...@arm.com;
>> alex.william...@redhat.com
>> Cc: jean-phili...@linaro.org; zhangfei....@linaro.org;
>> zhangfei....@gmail.com; vivek.gau...@arm.com; Shameerali Kolothum
>> Thodi <shameerali.kolothum.th...@huawei.com>;
>> jacob.jun....@linux.intel.com; yi.l....@intel.com; t...@semihalf.com;
>> nicoleots...@gmail.com; yuzenghui <yuzeng...@huawei.com>
>> Subject: [PATCH v11 08/13] vfio/pci: Add framework for custom interrupt
>> indices
>>
>> Implement IRQ capability chain infrastructure. All interrupt
>> indexes beyond VFIO_PCI_NUM_IRQS are handled as extended
>> interrupts. They are registered with a specific type/subtype
>> and supported flags.
>>
>> Signed-off-by: Eric Auger <eric.au...@redhat.com>
>> ---
>>  drivers/vfio/pci/vfio_pci.c         | 99 +++++++++++++++++++++++------
>>  drivers/vfio/pci/vfio_pci_intrs.c   | 62 ++++++++++++++++++
>>  drivers/vfio/pci/vfio_pci_private.h | 14 ++++
>>  3 files changed, 157 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>> index 2a6cc1a87323..93e03a4a5f32 100644
>> --- a/drivers/vfio/pci/vfio_pci.c
>> +++ b/drivers/vfio/pci/vfio_pci.c
>> @@ -608,6 +608,14 @@ static void vfio_pci_disable(struct vfio_pci_device
>> *vdev)
>>
>>      WARN_ON(iommu_unregister_device_fault_handler(&vdev->pdev->dev));
>>
>> +    for (i = 0; i < vdev->num_ext_irqs; i++)
>> +            vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
>> +                                    VFIO_IRQ_SET_ACTION_TRIGGER,
>> +                                    VFIO_PCI_NUM_IRQS + i, 0, 0, NULL);
>> +    vdev->num_ext_irqs = 0;
>> +    kfree(vdev->ext_irqs);
>> +    vdev->ext_irqs = NULL;
>> +
>>      /* Device closed, don't need mutex here */
>>      list_for_each_entry_safe(ioeventfd, ioeventfd_tmp,
>>                               &vdev->ioeventfds_list, next) {
>> @@ -823,6 +831,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device
>> *vdev, int irq_type)
>>                      return 1;
>>      } else if (irq_type == VFIO_PCI_REQ_IRQ_INDEX) {
>>              return 1;
>> +    } else if (irq_type >= VFIO_PCI_NUM_IRQS &&
>> +               irq_type < VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs) {
>> +            return 1;
>>      }
>>
>>      return 0;
>> @@ -1008,7 +1019,7 @@ static long vfio_pci_ioctl(void *device_data,
>>                      info.flags |= VFIO_DEVICE_FLAGS_RESET;
>>
>>              info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
>> -            info.num_irqs = VFIO_PCI_NUM_IRQS;
>> +            info.num_irqs = VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs;
>>
>>              if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
>>                      int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
>> @@ -1187,36 +1198,87 @@ static long vfio_pci_ioctl(void *device_data,
>>
>>      } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
>>              struct vfio_irq_info info;
>> +            struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
>> +            unsigned long capsz;
>>
>>              minsz = offsetofend(struct vfio_irq_info, count);
>>
>> +            /* For backward compatibility, cannot require this */
>> +            capsz = offsetofend(struct vfio_irq_info, cap_offset);
>> +
>>              if (copy_from_user(&info, (void __user *)arg, minsz))
>>                      return -EFAULT;
>>
>> -            if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
>> +            if (info.argsz < minsz ||
>> +                    info.index >= VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs)
>>                      return -EINVAL;
>>
>> -            switch (info.index) {
>> -            case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
>> -            case VFIO_PCI_REQ_IRQ_INDEX:
>> -                    break;
>> -            case VFIO_PCI_ERR_IRQ_INDEX:
>> -                    if (pci_is_pcie(vdev->pdev))
>> -                            break;
>> -                    fallthrough;
>> -            default:
>> -                    return -EINVAL;
>> -            }
>> +            if (info.argsz >= capsz)
>> +                    minsz = capsz;
>>
>>              info.flags = VFIO_IRQ_INFO_EVENTFD;
>>
>> -            info.count = vfio_pci_get_irq_count(vdev, info.index);
>> -
>> -            if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
>> +            switch (info.index) {
>> +            case VFIO_PCI_INTX_IRQ_INDEX:
>>                      info.flags |= (VFIO_IRQ_INFO_MASKABLE |
>>                                     VFIO_IRQ_INFO_AUTOMASKED);
>> -            else
>> +                    break;
>> +            case VFIO_PCI_MSI_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
>> +            case VFIO_PCI_REQ_IRQ_INDEX:
>>                      info.flags |= VFIO_IRQ_INFO_NORESIZE;
>> +                    break;
>> +            case VFIO_PCI_ERR_IRQ_INDEX:
>> +                    info.flags |= VFIO_IRQ_INFO_NORESIZE;
>> +                    if (!pci_is_pcie(vdev->pdev))
>> +                            return -EINVAL;
>> +                    break;
>> +            default:
>> +            {
>> +                    struct vfio_irq_info_cap_type cap_type = {
>> +                            .header.id = VFIO_IRQ_INFO_CAP_TYPE,
>> +                            .header.version = 1 };
>> +                    int ret, i;
>> +
>> +                    if (info.index >= VFIO_PCI_NUM_IRQS +
>> +                                            vdev->num_ext_irqs)
>> +                            return -EINVAL;
>> +                    info.index = array_index_nospec(info.index,
>> +                                                    VFIO_PCI_NUM_IRQS +
>> +                                                    vdev->num_ext_irqs);
>> +                    i = info.index - VFIO_PCI_NUM_IRQS;
>> +
>> +                    info.flags = vdev->ext_irqs[i].flags;
>> +                    cap_type.type = vdev->ext_irqs[i].type;
>> +                    cap_type.subtype = vdev->ext_irqs[i].subtype;
>> +
>> +                    ret = vfio_info_add_capability(&caps,
>> +                                    &cap_type.header,
>> +                                    sizeof(cap_type));
>> +                    if (ret)
>> +                            return ret;
>> +            }
>> +            }
>> +
>> +            info.count = vfio_pci_get_irq_count(vdev, info.index);
>> +
>> +            if (caps.size) {
>> +                    info.flags |= VFIO_IRQ_INFO_FLAG_CAPS;
>> +                    if (info.argsz < sizeof(info) + caps.size) {
>> +                            info.argsz = sizeof(info) + caps.size;
>> +                            info.cap_offset = 0;
>> +                    } else {
>> +                            vfio_info_cap_shift(&caps, sizeof(info));
>> +                            if (copy_to_user((void __user *)arg +
>> +                                              sizeof(info), caps.buf,
>> +                                              caps.size)) {
>> +                                    kfree(caps.buf);
>> +                                    return -EFAULT;
>> +                            }
>> +                            info.cap_offset = sizeof(info);
>> +                    }
>> +
>> +                    kfree(caps.buf);
>> +            }
>>
>>              return copy_to_user((void __user *)arg, &info, minsz) ?
>>                      -EFAULT : 0;
>> @@ -1235,7 +1297,8 @@ static long vfio_pci_ioctl(void *device_data,
>>              max = vfio_pci_get_irq_count(vdev, hdr.index);
>>
>>              ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
>> -                                             VFIO_PCI_NUM_IRQS, &data_size);
>> +                            VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs,
>> +                            &data_size);
>>              if (ret)
>>                      return ret;
>>
>> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
>> b/drivers/vfio/pci/vfio_pci_intrs.c
>> index 869dce5f134d..1e785a5f5fb2 100644
>> --- a/drivers/vfio/pci/vfio_pci_intrs.c
>> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
>> @@ -19,6 +19,7 @@
>>  #include <linux/vfio.h>
>>  #include <linux/wait.h>
>>  #include <linux/slab.h>
>> +#include <linux/nospec.h>
>>
>>  #include "vfio_pci_private.h"
>>
>> @@ -635,6 +636,24 @@ static int vfio_pci_set_req_trigger(struct
>> vfio_pci_device *vdev,
>>                                             count, flags, data);
>>  }
>>
>> +static int vfio_pci_set_ext_irq_trigger(struct vfio_pci_device *vdev,
>> +                                    unsigned int index, unsigned int start,
>> +                                    unsigned int count, uint32_t flags,
>> +                                    void *data)
>> +{
>> +    int i;
>> +
>> +    if (start != 0 || count > 1)
>> +            return -EINVAL;
> 
> It looks like we need to add ! vdev->num_ext_irqs check above
> as the vdev->ext_irqs is allocated for "nested" case only.
> 
> Thanks to qubingbing for reporting the below crash,
> 
> [ 5034.902114] Unable to handle kernel paging request at virtual address 
> ffffffffffffff98
> [ 5034.927645] Mem abort info:
> ...
> [ 5035.088409] pc : vfio_pci_set_ctx_trigger_single+0x2c/0x108 [vfio_pci]
> [ 5035.102140] lr : vfio_pci_set_ext_irq_trigger+0x84/0xa0 [vfio_pci]
> ...
> [ 5035.289655] Call trace:
> [ 5035.294796]  vfio_pci_set_ctx_trigger_single+0x2c/0x108 [vfio_pci]
> [ 5035.307795]  vfio_pci_set_ext_irq_trigger+0x84/0xa0 [vfio_pci]
> [ 5035.320062]  vfio_pci_set_irqs_ioctl+0xf0/0x120 [vfio_pci]
> [ 5035.331601]  vfio_pci_disable+0x60/0x4e0 [vfio_pci]
> [ 5035.341859]  vfio_pci_release+0x74/0xb0 [vfio_pci]
> 
> This is observed when the Guest is launched without vsmmu and then "poweroff".
> 
> estuary:/home/vsmmu$ ./qemu-system-aarch64_vsmmu5.20 \
>> -machine virt,gic-version=3 \
>> -cpu host \
>> -smp 1 \
>> -m 1024 \
>> -kernel Image_vsmmu  \
>> -initrd rootfs_vsmmu.cpio.gz  \
>> -device vfio-pci,host=7d:01.0,id=net0 \
>> -net none  \
>> -nographic -D -d -enable-kvm   \
>> -append "rdinit=init console=ttyAMA0 ealycon=pl0ll,0x90000000"
> 
> estuary:/$ poweroff
> 
> Could you please take a look.
Sure I will.

Thanks

Eric
> 
> Thanks,
> Shameer
>  
> 
>> +    index = array_index_nospec(index,
>> +                               VFIO_PCI_NUM_IRQS + vdev->num_ext_irqs);
>> +    i = index - VFIO_PCI_NUM_IRQS;
>> +
>> +    return vfio_pci_set_ctx_trigger_single(&vdev->ext_irqs[i].trigger,
>> +                                           count, flags, data);
>> +}
>> +
>>  int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
>>                          unsigned index, unsigned start, unsigned count,
>>                          void *data)
>> @@ -684,6 +703,13 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device
>> *vdev, uint32_t flags,
>>                      break;
>>              }
>>              break;
>> +    default:
>> +            switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
>> +            case VFIO_IRQ_SET_ACTION_TRIGGER:
>> +                    func = vfio_pci_set_ext_irq_trigger;
>> +                    break;
>> +            }
>> +            break;
>>      }
>>
>>      if (!func)
>> @@ -691,3 +717,39 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device
>> *vdev, uint32_t flags,
>>
>>      return func(vdev, index, start, count, flags, data);
>>  }
>> +
>> +int vfio_pci_get_ext_irq_index(struct vfio_pci_device *vdev,
>> +                           unsigned int type, unsigned int subtype)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i <  vdev->num_ext_irqs; i++) {
>> +            if (vdev->ext_irqs[i].type == type &&
>> +                vdev->ext_irqs[i].subtype == subtype) {
>> +                    return i;
>> +            }
>> +    }
>> +    return -EINVAL;
>> +}
>> +
>> +int vfio_pci_register_irq(struct vfio_pci_device *vdev,
>> +                      unsigned int type, unsigned int subtype,
>> +                      u32 flags)
>> +{
>> +    struct vfio_ext_irq *ext_irqs;
>> +
>> +    ext_irqs = krealloc(vdev->ext_irqs,
>> +                        (vdev->num_ext_irqs + 1) * sizeof(*ext_irqs),
>> +                        GFP_KERNEL);
>> +    if (!ext_irqs)
>> +            return -ENOMEM;
>> +
>> +    vdev->ext_irqs = ext_irqs;
>> +
>> +    vdev->ext_irqs[vdev->num_ext_irqs].type = type;
>> +    vdev->ext_irqs[vdev->num_ext_irqs].subtype = subtype;
>> +    vdev->ext_irqs[vdev->num_ext_irqs].flags = flags;
>> +    vdev->ext_irqs[vdev->num_ext_irqs].trigger = NULL;
>> +    vdev->num_ext_irqs++;
>> +    return 0;
>> +}
>> diff --git a/drivers/vfio/pci/vfio_pci_private.h
>> b/drivers/vfio/pci/vfio_pci_private.h
>> index 1d9b0f648133..e180b5435c8f 100644
>> --- a/drivers/vfio/pci/vfio_pci_private.h
>> +++ b/drivers/vfio/pci/vfio_pci_private.h
>> @@ -77,6 +77,13 @@ struct vfio_pci_region {
>>      u32                             flags;
>>  };
>>
>> +struct vfio_ext_irq {
>> +    u32                             type;
>> +    u32                             subtype;
>> +    u32                             flags;
>> +    struct eventfd_ctx              *trigger;
>> +};
>> +
>>  struct vfio_pci_dummy_resource {
>>      struct resource         resource;
>>      int                     index;
>> @@ -111,6 +118,8 @@ struct vfio_pci_device {
>>      struct vfio_pci_irq_ctx *ctx;
>>      int                     num_ctx;
>>      int                     irq_type;
>> +    struct vfio_ext_irq     *ext_irqs;
>> +    int                     num_ext_irqs;
>>      int                     num_regions;
>>      struct vfio_pci_region  *region;
>>      u8                      msi_qmax;
>> @@ -154,6 +163,11 @@ struct vfio_pci_device {
>>
>>  extern void vfio_pci_intx_mask(struct vfio_pci_device *vdev);
>>  extern void vfio_pci_intx_unmask(struct vfio_pci_device *vdev);
>> +extern int vfio_pci_register_irq(struct vfio_pci_device *vdev,
>> +                             unsigned int type, unsigned int subtype,
>> +                             u32 flags);
>> +extern int vfio_pci_get_ext_irq_index(struct vfio_pci_device *vdev,
>> +                                  unsigned int type, unsigned int subtype);
>>
>>  extern int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev,
>>                                 uint32_t flags, unsigned index,
>> --
>> 2.21.3
> 

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to