Re: [RFC V1 3/7] x86/ims: Add support for a new IMS irq domain

2019-09-13 Thread Alex Williamson
On Thu, 12 Sep 2019 18:32:04 -0700
Megha Dey  wrote:

> This patch adds support for the creation of a new IMS irq domain. It
> creates a new irq_chip associated with the IMS domain and adds the
> necessary domain operations to it.
> 
> Cc: Jacob Pan 
> Signed-off-by: Sanjay Kumar 
> Signed-off-by: Megha Dey 
> ---
>  arch/x86/include/asm/msi.h   |  4 ++
>  arch/x86/kernel/apic/Makefile|  1 +
>  arch/x86/kernel/apic/ims.c   | 93 
> 
>  arch/x86/kernel/apic/msi.c   |  4 +-
>  drivers/vfio/mdev/mdev_core.c|  6 +++
>  drivers/vfio/mdev/mdev_private.h |  1 -
>  include/linux/mdev.h |  2 +
>  7 files changed, 108 insertions(+), 3 deletions(-)
>  create mode 100644 arch/x86/kernel/apic/ims.c
> 
> diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
> index 25ddd09..51f9d25 100644
> --- a/arch/x86/include/asm/msi.h
> +++ b/arch/x86/include/asm/msi.h
> @@ -11,4 +11,8 @@ int pci_msi_prepare(struct irq_domain *domain, struct 
> device *dev, int nvec,
>  
>  void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc);
>  
> +struct msi_domain_info;
> +
> +irq_hw_number_t msi_get_hwirq(struct msi_domain_info *info,
> + msi_alloc_info_t *arg);
>  #endif /* _ASM_X86_MSI_H */
> diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
> index a6fcaf16..75a2270 100644
> --- a/arch/x86/kernel/apic/Makefile
> +++ b/arch/x86/kernel/apic/Makefile
> @@ -12,6 +12,7 @@ obj-y   += hw_nmi.o
>  
>  obj-$(CONFIG_X86_IO_APIC)+= io_apic.o
>  obj-$(CONFIG_PCI_MSI)+= msi.o
> +obj-$(CONFIG_MSI_IMS)+= ims.o
>  obj-$(CONFIG_SMP)+= ipi.o
>  
>  ifeq ($(CONFIG_X86_64),y)
> diff --git a/arch/x86/kernel/apic/ims.c b/arch/x86/kernel/apic/ims.c
> new file mode 100644
> index 000..d9808a5
> --- /dev/null
> +++ b/arch/x86/kernel/apic/ims.c
> @@ -0,0 +1,93 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright © 2019 Intel Corporation.
> + *
> + * Author: Megha Dey 
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/*
> + * Determine if a dev is mdev or not. Return NULL if not mdev device.
> + * Return mdev's parent dev if success.
> + */
> +static inline struct device *mdev_to_parent(struct device *dev)
> +{
> + struct device *ret = NULL;
> + struct device *(*fn)(struct device *dev);
> + struct bus_type *bus = symbol_get(mdev_bus_type);
> +
> + if (bus && dev->bus == bus) {
> + fn = symbol_get(mdev_dev_to_parent_dev);
> + ret = fn(dev);
> + symbol_put(mdev_dev_to_parent_dev);
> + symbol_put(mdev_bus_type);

Leaks a reference to the mdev module if dev->bus != bus.  The new
version of dev_is_mdev() unconditionally leaks a reference.  Thanks,

Alex

> + }
> +
> + return ret;
> +}
> +
> +static struct pci_dev *ims_get_pci_dev(struct device *dev)
> +{
> + struct pci_dev *pdev;
> +
> + if (dev_is_mdev(dev)) {
> + struct device *parent = mdev_to_parent(dev);
> +
> + pdev = to_pci_dev(parent);
> + } else {
> + pdev = to_pci_dev(dev);
> + }
> +
> + return pdev;
> +}
> +
> +int dev_ims_prepare(struct irq_domain *domain, struct device *dev, int nvec,
> + msi_alloc_info_t *arg)
> +{
> + struct pci_dev *pdev = ims_get_pci_dev(dev);
> +
> + init_irq_alloc_info(arg, NULL);
> + arg->msi_dev = pdev;
> + arg->type = X86_IRQ_ALLOC_TYPE_MSIX;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(dev_ims_prepare);
> +
> +#ifdef CONFIG_IRQ_REMAP
> +
> +static struct msi_domain_ops dev_ims_domain_ops = {
> + .get_hwirq  = msi_get_hwirq,
> + .msi_prepare= dev_ims_prepare,
> +};
> +
> +static struct irq_chip dev_ims_ir_controller = {
> + .name   = "IR-DEV-IMS",
> + .irq_unmask = dev_ims_unmask_irq,
> + .irq_mask   = dev_ims_mask_irq,
> + .irq_ack= irq_chip_ack_parent,
> + .irq_retrigger  = irq_chip_retrigger_hierarchy,
> + .irq_set_vcpu_affinity  = irq_chip_set_vcpu_affinity_parent,
> + .flags  = IRQCHIP_SKIP_SET_WAKE,
> + .irq_write_msi_msg  = dev_ims_write_msg,
> +};
> +
> +static struct msi_domain_info ims_ir_domain_info = {
> + .flags  = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
> +   MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
> + .ops= _ims_domain_ops,
> + .chip   = _ims_ir_controller,
> + .handler= handle_edge_irq,
> + .handler_name   = "edge",
> +};
> +
> +struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent)
> +{
> + return pci_msi_create_irq_domain(NULL, _ir_domain_info, parent);
> +}
> +
> +#endif
> diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
> index 435bcda..65da813 100644
> --- 

Re: [RFC V1 3/7] x86/ims: Add support for a new IMS irq domain

2019-09-13 Thread Jason Gunthorpe
On Thu, Sep 12, 2019 at 06:32:04PM -0700, Megha Dey wrote:
> +/*
> + * Determine if a dev is mdev or not. Return NULL if not mdev device.
> + * Return mdev's parent dev if success.
> + */
> +static inline struct device *mdev_to_parent(struct device *dev)
> +{
> + struct device *ret = NULL;
> + struct device *(*fn)(struct device *dev);
> + struct bus_type *bus = symbol_get(mdev_bus_type);
> +
> + if (bus && dev->bus == bus) {
> + fn = symbol_get(mdev_dev_to_parent_dev);
> + ret = fn(dev);
> + symbol_put(mdev_dev_to_parent_dev);
> + symbol_put(mdev_bus_type);
> + }

Yuk, arch code should not have special knowledge about vfio-mdev, and
in any case the above way to get it is really gross.

> +static struct msi_domain_ops dev_ims_domain_ops = {
> + .get_hwirq  = msi_get_hwirq,
> + .msi_prepare= dev_ims_prepare,
> +};
> +
> +static struct irq_chip dev_ims_ir_controller = {
> + .name   = "IR-DEV-IMS",
> + .irq_unmask = dev_ims_unmask_irq,
> + .irq_mask   = dev_ims_mask_irq,
> + .irq_ack= irq_chip_ack_parent,
> + .irq_retrigger  = irq_chip_retrigger_hierarchy,
> + .irq_set_vcpu_affinity  = irq_chip_set_vcpu_affinity_parent,
> + .flags  = IRQCHIP_SKIP_SET_WAKE,
> + .irq_write_msi_msg  = dev_ims_write_msg,
> +};

It seems really weird to wrapper an irq_chip like this, if the caller
is supposed to provide the functions more or less directly why not
have the caller create and own the irq chip? Is something in the way
of this? It looked like the platform version of this did the same API approach..

Jason


[RFC V1 3/7] x86/ims: Add support for a new IMS irq domain

2019-09-12 Thread Megha Dey
This patch adds support for the creation of a new IMS irq domain. It
creates a new irq_chip associated with the IMS domain and adds the
necessary domain operations to it.

Cc: Jacob Pan 
Signed-off-by: Sanjay Kumar 
Signed-off-by: Megha Dey 
---
 arch/x86/include/asm/msi.h   |  4 ++
 arch/x86/kernel/apic/Makefile|  1 +
 arch/x86/kernel/apic/ims.c   | 93 
 arch/x86/kernel/apic/msi.c   |  4 +-
 drivers/vfio/mdev/mdev_core.c|  6 +++
 drivers/vfio/mdev/mdev_private.h |  1 -
 include/linux/mdev.h |  2 +
 7 files changed, 108 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/kernel/apic/ims.c

diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h
index 25ddd09..51f9d25 100644
--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -11,4 +11,8 @@ int pci_msi_prepare(struct irq_domain *domain, struct device 
*dev, int nvec,
 
 void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc);
 
+struct msi_domain_info;
+
+irq_hw_number_t msi_get_hwirq(struct msi_domain_info *info,
+   msi_alloc_info_t *arg);
 #endif /* _ASM_X86_MSI_H */
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index a6fcaf16..75a2270 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -12,6 +12,7 @@ obj-y += hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)  += io_apic.o
 obj-$(CONFIG_PCI_MSI)  += msi.o
+obj-$(CONFIG_MSI_IMS)  += ims.o
 obj-$(CONFIG_SMP)  += ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/ims.c b/arch/x86/kernel/apic/ims.c
new file mode 100644
index 000..d9808a5
--- /dev/null
+++ b/arch/x86/kernel/apic/ims.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2019 Intel Corporation.
+ *
+ * Author: Megha Dey 
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+/*
+ * Determine if a dev is mdev or not. Return NULL if not mdev device.
+ * Return mdev's parent dev if success.
+ */
+static inline struct device *mdev_to_parent(struct device *dev)
+{
+   struct device *ret = NULL;
+   struct device *(*fn)(struct device *dev);
+   struct bus_type *bus = symbol_get(mdev_bus_type);
+
+   if (bus && dev->bus == bus) {
+   fn = symbol_get(mdev_dev_to_parent_dev);
+   ret = fn(dev);
+   symbol_put(mdev_dev_to_parent_dev);
+   symbol_put(mdev_bus_type);
+   }
+
+   return ret;
+}
+
+static struct pci_dev *ims_get_pci_dev(struct device *dev)
+{
+   struct pci_dev *pdev;
+
+   if (dev_is_mdev(dev)) {
+   struct device *parent = mdev_to_parent(dev);
+
+   pdev = to_pci_dev(parent);
+   } else {
+   pdev = to_pci_dev(dev);
+   }
+
+   return pdev;
+}
+
+int dev_ims_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+   msi_alloc_info_t *arg)
+{
+   struct pci_dev *pdev = ims_get_pci_dev(dev);
+
+   init_irq_alloc_info(arg, NULL);
+   arg->msi_dev = pdev;
+   arg->type = X86_IRQ_ALLOC_TYPE_MSIX;
+
+   return 0;
+}
+EXPORT_SYMBOL_GPL(dev_ims_prepare);
+
+#ifdef CONFIG_IRQ_REMAP
+
+static struct msi_domain_ops dev_ims_domain_ops = {
+   .get_hwirq  = msi_get_hwirq,
+   .msi_prepare= dev_ims_prepare,
+};
+
+static struct irq_chip dev_ims_ir_controller = {
+   .name   = "IR-DEV-IMS",
+   .irq_unmask = dev_ims_unmask_irq,
+   .irq_mask   = dev_ims_mask_irq,
+   .irq_ack= irq_chip_ack_parent,
+   .irq_retrigger  = irq_chip_retrigger_hierarchy,
+   .irq_set_vcpu_affinity  = irq_chip_set_vcpu_affinity_parent,
+   .flags  = IRQCHIP_SKIP_SET_WAKE,
+   .irq_write_msi_msg  = dev_ims_write_msg,
+};
+
+static struct msi_domain_info ims_ir_domain_info = {
+   .flags  = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX,
+   .ops= _ims_domain_ops,
+   .chip   = _ims_ir_controller,
+   .handler= handle_edge_irq,
+   .handler_name   = "edge",
+};
+
+struct irq_domain *arch_create_ims_irq_domain(struct irq_domain *parent)
+{
+   return pci_msi_create_irq_domain(NULL, _ir_domain_info, parent);
+}
+
+#endif
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 435bcda..65da813 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -84,7 +84,7 @@ void native_teardown_msi_irq(unsigned int irq)
irq_domain_free_irqs(irq, 1);
 }
 
-static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info,
+irq_hw_number_t msi_get_hwirq(struct msi_domain_info *info,
 msi_alloc_info_t *arg)
 {
return arg->msi_hwirq;
@@ -116,7 +116,7 @@