Re: [patch RFC 13/38] PCI: MSI: Rework pci_msi_domain_calc_hwirq()

2020-08-25 Thread Bjorn Helgaas
On Fri, Aug 21, 2020 at 02:24:37AM +0200, Thomas Gleixner wrote:
> Retrieve the PCI device from the msi descriptor instead of doing so at the
> call sites.

I'd like it *better* with "PCI/MSI: " in the subject (to match history
and other patches in this series) and "MSI" here in the commit log,
but nice cleanup and:

Acked-by: Bjorn Helgaas 

Minor comments below.

> Signed-off-by: Thomas Gleixner 
> Cc: linux-...@vger.kernel.org
> ---
>  arch/x86/kernel/apic/msi.c |2 +-
>  drivers/pci/msi.c  |   13 ++---
>  include/linux/msi.h|3 +--
>  3 files changed, 8 insertions(+), 10 deletions(-)
> 
> --- a/arch/x86/kernel/apic/msi.c
> +++ b/arch/x86/kernel/apic/msi.c
> @@ -232,7 +232,7 @@ EXPORT_SYMBOL_GPL(pci_msi_prepare);
>  
>  void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
>  {
> - arg->msi_hwirq = pci_msi_domain_calc_hwirq(arg->msi_dev, desc);
> + arg->msi_hwirq = pci_msi_domain_calc_hwirq(desc);

I guess it's safe to assume that "arg->msi_dev ==
msi_desc_to_pci_dev(desc)"?  I didn't try to verify that.

>  }
>  EXPORT_SYMBOL_GPL(pci_msi_set_desc);
>  
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -1346,17 +1346,17 @@ void pci_msi_domain_write_msg(struct irq
>  
>  /**
>   * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source
> - * @dev: Pointer to the PCI device
>   * @desc:Pointer to the MSI descriptor
>   *
>   * The ID number is only used within the irqdomain.
>   */
> -irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
> -   struct msi_desc *desc)
> +irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc)
>  {
> + struct pci_dev *pdev = msi_desc_to_pci_dev(desc);

If you named this "struct pci_dev *dev" (not "pdev"), the diff would
be a little smaller and it would match other usage in the file.

>   return (irq_hw_number_t)desc->msi_attrib.entry_nr |
> - pci_dev_id(dev) << 11 |
> - (pci_domain_nr(dev->bus) & 0x) << 27;
> + pci_dev_id(pdev) << 11 |
> + (pci_domain_nr(pdev->bus) & 0x) << 27;
>  }
>  
>  static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc)
> @@ -1406,8 +1406,7 @@ static void pci_msi_domain_set_desc(msi_
>   struct msi_desc *desc)
>  {
>   arg->desc = desc;
> - arg->hwirq = pci_msi_domain_calc_hwirq(msi_desc_to_pci_dev(desc),
> -desc);
> + arg->hwirq = pci_msi_domain_calc_hwirq(desc);
>  }
>  #else
>  #define pci_msi_domain_set_desc  NULL
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -369,8 +369,7 @@ void pci_msi_domain_write_msg(struct irq
>  struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
>struct msi_domain_info *info,
>struct irq_domain *parent);
> -irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
> -   struct msi_desc *desc);
> +irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc);
>  int pci_msi_domain_check_cap(struct irq_domain *domain,
>struct msi_domain_info *info, struct device *dev);
>  u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev 
> *pdev);
> 



Re: [patch RFC 20/38] PCI: vmd: Mark VMD irqdomain with DOMAIN_BUS_VMD_MSI

2020-08-25 Thread Bjorn Helgaas
On Fri, Aug 21, 2020 at 02:24:44AM +0200, Thomas Gleixner wrote:
> Devices on the VMD bus use their own MSI irq domain, but it is not
> distinguishable from regular PCI/MSI irq domains. This is required
> to exclude VMD devices from getting the irq domain pointer set by
> interrupt remapping.
> 
> Override the default bus token.
> 
> Signed-off-by: Thomas Gleixner 
> Cc: Bjorn Helgaas 
> Cc: Lorenzo Pieralisi 
> Cc: Jonathan Derrick 
> Cc: linux-...@vger.kernel.org

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/controller/vmd.c |6 ++
>  1 file changed, 6 insertions(+)
> 
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -579,6 +579,12 @@ static int vmd_enable_domain(struct vmd_
>   return -ENODEV;
>   }
>  
> + /*
> +  * Override the irq domain bus token so the domain can be distinguished
> +  * from a regular PCI/MSI domain.
> +  */
> + irq_domain_update_bus_token(vmd->irq_domain, DOMAIN_BUS_VMD_MSI);
> +
>   pci_add_resource(&resources, &vmd->resources[0]);
>   pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
>   pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]);
> 



Re: [patch RFC 30/38] PCI/MSI: Allow to disable arch fallbacks

2020-08-25 Thread Bjorn Helgaas
On Fri, Aug 21, 2020 at 02:24:54AM +0200, Thomas Gleixner wrote:
> If an architecture does not require the MSI setup/teardown fallback
> functions, then allow them to be replaced by stub functions which emit a
> warning.
> 
> Signed-off-by: Thomas Gleixner 
> Cc: Bjorn Helgaas 
> Cc: linux-...@vger.kernel.org

Acked-by: Bjorn Helgaas 

Question/comment below.

> ---
>  drivers/pci/Kconfig |3 +++
>  drivers/pci/msi.c   |3 ++-
>  include/linux/msi.h |   31 ++-
>  3 files changed, 31 insertions(+), 6 deletions(-)
> 
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -56,6 +56,9 @@ config PCI_MSI_IRQ_DOMAIN
>   depends on PCI_MSI
>   select GENERIC_MSI_IRQ_DOMAIN
>  
> +config PCI_MSI_DISABLE_ARCH_FALLBACKS
> + bool
> +
>  config PCI_QUIRKS
>   default y
>   bool "Enable PCI quirk workarounds" if EXPERT
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -58,8 +58,8 @@ static void pci_msi_teardown_msi_irqs(st
>  #define pci_msi_teardown_msi_irqsarch_teardown_msi_irqs
>  #endif
>  
> +#ifndef CONFIG_PCI_MSI_DISABLE_ARCH_FALLBACKS
>  /* Arch hooks */
> -
>  int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
>  {
>   struct msi_controller *chip = dev->bus->msi;
> @@ -132,6 +132,7 @@ void __weak arch_teardown_msi_irqs(struc
>  {
>   return default_teardown_msi_irqs(dev);
>  }
> +#endif /* !CONFIG_PCI_MSI_DISABLE_ARCH_FALLBACKS */
>  
>  static void default_restore_msi_irq(struct pci_dev *dev, int irq)
>  {
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -193,17 +193,38 @@ void pci_msi_mask_irq(struct irq_data *d
>  void pci_msi_unmask_irq(struct irq_data *data);
>  
>  /*
> - * The arch hooks to setup up msi irqs. Those functions are
> - * implemented as weak symbols so that they /can/ be overriden by
> - * architecture specific code if needed.
> + * The arch hooks to setup up msi irqs. Default functions are implemented
> + * as weak symbols so that they /can/ be overriden by architecture specific
> + * code if needed.
> + *
> + * They can be replaced by stubs with warnings via
> + * CONFIG_PCI_MSI_DISABLE_ARCH_FALLBACKS when the architecture fully
> + * utilizes direct irqdomain based setup.

Do you expect *all* arches to eventually use direct irqdomain setup?
And in that case, to remove the config option?

If not, it seems like it'd be nicer to have the burden on the arches
that need/want to use arch-specific code instead of on the arches that
do things generically.

>   */
> +#ifndef CONFIG_PCI_MSI_DISABLE_ARCH_FALLBACKS
>  int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
>  void arch_teardown_msi_irq(unsigned int irq);
>  int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
>  void arch_teardown_msi_irqs(struct pci_dev *dev);
> -void arch_restore_msi_irqs(struct pci_dev *dev);
> -
>  void default_teardown_msi_irqs(struct pci_dev *dev);
> +#else
> +static inline int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int 
> type)
> +{
> + WARN_ON_ONCE(1);
> + return -ENODEV;
> +}
> +
> +static inline void arch_teardown_msi_irqs(struct pci_dev *dev)
> +{
> + WARN_ON_ONCE(1);
> +}
> +#endif
> +
> +/*
> + * The restore hooks are still available as they are useful even
> + * for fully irq domain based setups. Courtesy to XEN/X86.
> + */
> +void arch_restore_msi_irqs(struct pci_dev *dev);
>  void default_restore_msi_irqs(struct pci_dev *dev);
>  
>  struct msi_controller {
> 



Re: [patch RFC 21/38] PCI: MSI: Provide pci_dev_has_special_msi_domain() helper

2020-08-25 Thread Bjorn Helgaas
On Fri, Aug 21, 2020 at 02:24:45AM +0200, Thomas Gleixner wrote:
> Provide a helper function to check whether a PCI device is handled by a
> non-standard PCI/MSI domain. This will be used to exclude such devices
> which hang of a special bus, e.g. VMD, to be excluded from the irq domain
> override in irq remapping.
> 
> Signed-off-by: Thomas Gleixner 
> Cc: Bjorn Helgaas 
> Cc: linux-...@vger.kernel.org

Acked-by: Bjorn Helgaas 

s|PCI: MSI:|PCI/MSI:| in the subject if feasible.

> ---
>  drivers/pci/msi.c   |   22 ++
>  include/linux/msi.h |1 +
>  2 files changed, 23 insertions(+)
> 
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -1553,4 +1553,26 @@ struct irq_domain *pci_msi_get_device_do
>DOMAIN_BUS_PCI_MSI);
>   return dom;
>  }
> +
> +/**
> + * pci_dev_has_special_msi_domain - Check whether the device is handled by
> + *   a non-standard PCI-MSI domain
> + * @pdev:The PCI device to check.
> + *
> + * Returns: True if the device irqdomain or the bus irqdomain is
> + * non-standard PCI/MSI.
> + */
> +bool pci_dev_has_special_msi_domain(struct pci_dev *pdev)
> +{
> + struct irq_domain *dom = dev_get_msi_domain(&pdev->dev);
> +
> + if (!dom)
> + dom = dev_get_msi_domain(&pdev->bus->dev);
> +
> + if (!dom)
> + return true;
> +
> + return dom->bus_token != DOMAIN_BUS_PCI_MSI;
> +}
> +
>  #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -374,6 +374,7 @@ int pci_msi_domain_check_cap(struct irq_
>struct msi_domain_info *info, struct device *dev);
>  u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev 
> *pdev);
>  struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
> +bool pci_dev_has_special_msi_domain(struct pci_dev *pdev);
>  #else
>  static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev 
> *pdev)
>  {
> 



Re: [patch RFC 17/38] x86/pci: Reducde #ifdeffery in PCI init code

2020-08-25 Thread Bjorn Helgaas
s/Reducde/Reduce/ (in subject)

On Fri, Aug 21, 2020 at 02:24:41AM +0200, Thomas Gleixner wrote:
> Adding a function call before the first #ifdef in arch_pci_init() triggers
> a 'mixed declarations and code' warning if PCI_DIRECT is enabled.
> 
> Use stub functions and move the #ifdeffery to the header file where it is
> not in the way.
> 
> Signed-off-by: Thomas Gleixner 
> Cc: linux-...@vger.kernel.org

Nice cleanup, thanks.  Glad to get rid of the useless initializer,
too.

Acked-by: Bjorn Helgaas 

> ---
>  arch/x86/include/asm/pci_x86.h |   11 +++
>  arch/x86/pci/init.c|   10 +++---
>  2 files changed, 14 insertions(+), 7 deletions(-)
> 
> --- a/arch/x86/include/asm/pci_x86.h
> +++ b/arch/x86/include/asm/pci_x86.h
> @@ -114,9 +114,20 @@ extern const struct pci_raw_ops pci_dire
>  extern bool port_cf9_safe;
>  
>  /* arch_initcall level */
> +#ifdef CONFIG_PCI_DIRECT
>  extern int pci_direct_probe(void);
>  extern void pci_direct_init(int type);
> +#else
> +static inline int pci_direct_probe(void) { return -1; }
> +static inline  void pci_direct_init(int type) { }
> +#endif
> +
> +#ifdef CONFIG_PCI_BIOS
>  extern void pci_pcbios_init(void);
> +#else
> +static inline void pci_pcbios_init(void) { }
> +#endif
> +
>  extern void __init dmi_check_pciprobe(void);
>  extern void __init dmi_check_skip_isa_align(void);
>  
> --- a/arch/x86/pci/init.c
> +++ b/arch/x86/pci/init.c
> @@ -8,11 +8,9 @@
> in the right sequence from here. */
>  static __init int pci_arch_init(void)
>  {
> -#ifdef CONFIG_PCI_DIRECT
> - int type = 0;
> + int type;
>  
>   type = pci_direct_probe();
> -#endif
>  
>   if (!(pci_probe & PCI_PROBE_NOEARLY))
>   pci_mmcfg_early_init();
> @@ -20,18 +18,16 @@ static __init int pci_arch_init(void)
>   if (x86_init.pci.arch_init && !x86_init.pci.arch_init())
>   return 0;
>  
> -#ifdef CONFIG_PCI_BIOS
>   pci_pcbios_init();
> -#endif
> +
>   /*
>* don't check for raw_pci_ops here because we want pcbios as last
>* fallback, yet it's needed to run first to set pcibios_last_bus
>* in case legacy PCI probing is used. otherwise detecting peer busses
>* fails.
>*/
> -#ifdef CONFIG_PCI_DIRECT
>   pci_direct_init(type);
> -#endif
> +
>   if (!raw_pci_ops && !raw_pci_ext_ops)
>   printk(KERN_ERR
>   "PCI: Fatal: No config space access function found\n");
> 



Re: [patch RFC 34/38] x86/msi: Let pci_msi_prepare() handle non-PCI MSI

2020-08-25 Thread Bjorn Helgaas
On Fri, Aug 21, 2020 at 02:24:58AM +0200, Thomas Gleixner wrote:
> Rename it to x86_msi_prepare() and handle the allocation type setup
> depending on the device type.

I see what you're doing, but the subject reads a little strangely
("pci_msi_prepare() handling non-PCI" stuff) since it doesn't mention
the rename.  Maybe not practical or worthwhile to split into a rename
+ make generic, I dunno.

> Add a new arch_msi_prepare define which will be utilized by the upcoming
> device MSI support. Define it to NULL if not provided by an architecture in
> the generic MSI header.
> 
> One arch specific function for MSI support is truly enough.
> 
> Signed-off-by: Thomas Gleixner 
> Cc: linux-...@vger.kernel.org
> Cc: linux-hyp...@vger.kernel.org
> ---
>  arch/x86/include/asm/msi.h  |4 +++-
>  arch/x86/kernel/apic/msi.c  |   27 ---
>  drivers/pci/controller/pci-hyperv.c |2 +-
>  include/linux/msi.h |4 
>  4 files changed, 28 insertions(+), 9 deletions(-)
> 
> --- a/arch/x86/include/asm/msi.h
> +++ b/arch/x86/include/asm/msi.h
> @@ -6,7 +6,9 @@
>  
>  typedef struct irq_alloc_info msi_alloc_info_t;
>  
> -int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
> +int x86_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
>   msi_alloc_info_t *arg);
>  
> +#define arch_msi_prepare x86_msi_prepare
> +
>  #endif /* _ASM_X86_MSI_H */
> --- a/arch/x86/kernel/apic/msi.c
> +++ b/arch/x86/kernel/apic/msi.c
> @@ -182,26 +182,39 @@ static struct irq_chip pci_msi_controlle
>   .flags  = IRQCHIP_SKIP_SET_WAKE,
>  };
>  
> -int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
> - msi_alloc_info_t *arg)
> +static void pci_msi_prepare(struct device *dev, msi_alloc_info_t *arg)
>  {
> - struct pci_dev *pdev = to_pci_dev(dev);
> - struct msi_desc *desc = first_pci_msi_entry(pdev);
> + struct msi_desc *desc = first_msi_entry(dev);
>  
> - init_irq_alloc_info(arg, NULL);
>   if (desc->msi_attrib.is_msix) {
>   arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
>   } else {
>   arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
>   arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
>   }
> +}
> +
> +static void dev_msi_prepare(struct device *dev, msi_alloc_info_t *arg)
> +{
> + arg->type = X86_IRQ_ALLOC_TYPE_DEV_MSI;
> +}
> +
> +int x86_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
> + msi_alloc_info_t *arg)
> +{
> + init_irq_alloc_info(arg, NULL);
> +
> + if (dev_is_pci(dev))
> + pci_msi_prepare(dev, arg);
> + else
> + dev_msi_prepare(dev, arg);
>  
>   return 0;
>  }
> -EXPORT_SYMBOL_GPL(pci_msi_prepare);
> +EXPORT_SYMBOL_GPL(x86_msi_prepare);
>  
>  static struct msi_domain_ops pci_msi_domain_ops = {
> - .msi_prepare= pci_msi_prepare,
> + .msi_prepare= x86_msi_prepare,
>  };
>  
>  static struct msi_domain_info pci_msi_domain_info = {
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -1532,7 +1532,7 @@ static struct irq_chip hv_msi_irq_chip =
>  };
>  
>  static struct msi_domain_ops hv_msi_ops = {
> - .msi_prepare= pci_msi_prepare,
> + .msi_prepare= arch_msi_prepare,
>   .msi_free   = hv_msi_free,
>  };
>  
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -430,4 +430,8 @@ static inline struct irq_domain *pci_msi
>  }
>  #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
>  
> +#ifndef arch_msi_prepare
> +# define arch_msi_prepareNULL
> +#endif
> +
>  #endif /* LINUX_MSI_H */
> 



Re: [patch RFC 30/38] PCI/MSI: Allow to disable arch fallbacks

2020-08-25 Thread Bjorn Helgaas
On Tue, Aug 25, 2020 at 11:28:30PM +0200, Thomas Gleixner wrote:
> On Tue, Aug 25 2020 at 15:07, Bjorn Helgaas wrote:
> >> + * The arch hooks to setup up msi irqs. Default functions are implemented
> >> + * as weak symbols so that they /can/ be overriden by architecture 
> >> specific
> >> + * code if needed.
> >> + *
> >> + * They can be replaced by stubs with warnings via
> >> + * CONFIG_PCI_MSI_DISABLE_ARCH_FALLBACKS when the architecture fully
> >> + * utilizes direct irqdomain based setup.

> > If not, it seems like it'd be nicer to have the burden on the arches
> > that need/want to use arch-specific code instead of on the arches that
> > do things generically.
> 
> Right, but they still share the common code there and some of them
> provide only parts of the weak callbacks. I'm not sure whether it's a
> good idea to copy all of this into each affected architecture.
> 
> Or did you just mean that those architectures should select
> CONFIG_I_WANT_THE CRUFT instead of opting out on the fully irq domain
> based ones?

Yes, that was my real question -- can we confine the cruft in the
crufty arches?  If not, no big deal.

Bjorn



Re: [patch RFC 34/38] x86/msi: Let pci_msi_prepare() handle non-PCI MSI

2020-08-25 Thread Bjorn Helgaas
On Tue, Aug 25, 2020 at 11:30:41PM +0200, Thomas Gleixner wrote:
> On Tue, Aug 25 2020 at 15:24, Bjorn Helgaas wrote:
> > On Fri, Aug 21, 2020 at 02:24:58AM +0200, Thomas Gleixner wrote:
> >> Rename it to x86_msi_prepare() and handle the allocation type setup
> >> depending on the device type.
> >
> > I see what you're doing, but the subject reads a little strangely
> 
> Yes :(
> 
> > ("pci_msi_prepare() handling non-PCI" stuff) since it doesn't mention
> > the rename.  Maybe not practical or worthwhile to split into a rename
> > + make generic, I dunno.
> 
> What about
> 
> x86/msi: Rename and rework pci_msi_prepare() to cover non-PCI MSI

Perfect!



Re: [patch V2 34/46] PCI/MSI: Make arch_.*_msi_irq[s] fallbacks selectable

2020-08-27 Thread Bjorn Helgaas
[+cc Rob,
cover https://lore.kernel.org/r/20200826111628.794979...@linutronix.de/
this  https://lore.kernel.org/r/20200826112333.992429...@linutronix.de/]

On Wed, Aug 26, 2020 at 01:17:02PM +0200, Thomas Gleixner wrote:
> From: Thomas Gleixner 
> 
> The arch_.*_msi_irq[s] fallbacks are compiled in whether an architecture
> requires them or not. Architectures which are fully utilizing hierarchical
> irq domains should never call into that code.
> 
> It's not only architectures which depend on that by implementing one or
> more of the weak functions, there is also a bunch of drivers which relies
> on the weak functions which invoke msi_controller::setup_irq[s] and
> msi_controller::teardown_irq.
> 
> Make the architectures and drivers which rely on them select them in Kconfig
> and if not selected replace them by stub functions which emit a warning and
> fail the PCI/MSI interrupt allocation.

Sorry, I really don't understand this, so these are probably stupid
questions.

If CONFIG_PCI_MSI_ARCH_FALLBACKS is defined, we will supply
implementations of:

  arch_setup_msi_irq
  arch_teardown_msi_irq
  arch_setup_msi_irqs
  arch_teardown_msi_irqs
  default_teardown_msi_irqs# non-weak

You select CONFIG_PCI_MSI_ARCH_FALLBACKS for ia64, mips, powerpc,
s390, sparc, and x86.  I see that all of those arches implement at
least one of the functions above.  But x86 doesn't and I can't figure
out why it needs to select CONFIG_PCI_MSI_ARCH_FALLBACKS.

I assume there's a way to convert these arches to hierarchical irq
domains so they wouldn't need this at all?  Is there a sample
conversion to look at?

And I can't figure out what's special about tegra, rcar, and xilinx
that makes them need it as well.  Is there something I could grep for
to identify them?  Is there a way to convert them so they don't need
it?

> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -193,17 +193,38 @@ void pci_msi_mask_irq(struct irq_data *d
>  void pci_msi_unmask_irq(struct irq_data *data);
>  
>  /*
> - * The arch hooks to setup up msi irqs. Those functions are
> - * implemented as weak symbols so that they /can/ be overriden by
> - * architecture specific code if needed.
> + * The arch hooks to setup up msi irqs. Default functions are implemented

s/msi/MSI/ to match the one below.

> + * as weak symbols so that they /can/ be overriden by architecture specific
> + * code if needed. These hooks must be enabled by the architecture or by
> + * drivers which depend on them via msi_controller based MSI handling.



Re: [PCI] 3233e41d3e: WARNING:at_drivers/pci/pci.c:#pci_reset_hotplug_slot

2020-09-09 Thread Bjorn Helgaas
On Thu, Jul 23, 2020 at 11:51:52AM +0200, Lukas Wunner wrote:
> On Thu, Jul 23, 2020 at 05:13:06PM +0800, kernel test robot wrote:
> > FYI, we noticed the following commit (built with gcc-9):
> [...]
> > commit: 3233e41d3e8ebcd44e92da47ffed97fd49b84278 ("[PATCH] PCI: pciehp: Fix 
> > AB-BA deadlock between reset_lock and device_lock")
> [...]
> > caused below changes (please refer to attached dmesg/kmsg for entire 
> > log/backtrace):
> > [0.971752] WARNING: CPU: 0 PID: 1 at drivers/pci/pci.c:4905 
> > pci_reset_hotplug_slot+0x70/0x80
> 
> Thank you, trusty robot.
> 
> I botched the call to lockdep_assert_held_write(), it should have been
> conditional on "if (probe)".
> 
> Happy to respin the patch, but I'd like to hear opinions on the locking
> issues surrounding xen and octeon (and the patch in general).

I wish liquidio/octeon weren't a special case.  Why should that driver
reset the device when unbinding when no other drivers do?

Looks like this was added by 70535350e26f ("liquidio: with embedded
f/w, don't reload f/w, issue pf flr at exit").  Maybe Rick will chime
in.

> In particular, would a solution be entertained wherein the pci_dev is
> reset by the PCI core after driver unbinding, contingent on a flag which
> is set by a PCI driver to indicate that the pci_dev is returned to the
> core in an unclean state?

How would we do this?  The PCI core isn't called after unbinding, is
it?  So I guess we'd have to have a queue and a worker thread to
process it?

Device removal also has nasty locking issues, and a queue might help
solve those, too.  Might also help in the problematic case of
40f11adc7cd9 ("PCI: Avoid race while enabling upstream bridges"),
which we had to revert.

> Also, why does xen require a device reset on bind?
> 
> Thanks!
> 
> Lukas



Re: [PATCH] x86/xen: drop an unused parameter gsi_override

2020-05-21 Thread Bjorn Helgaas
On Tue, Apr 28, 2020 at 03:36:40PM +, Wei Liu wrote:
> All callers within the same file pass in -1 (no override).
> 
> Signed-off-by: Wei Liu 

Applied to pci/virtualization for v5.8, thanks!

I don't see anything else in linux-next that touches this file, but if
somebody wants to merge this via another tree, just let me know.

> ---
>  arch/x86/pci/xen.c | 16 ++--
>  1 file changed, 6 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
> index 91220cc25854..e3f1ca316068 100644
> --- a/arch/x86/pci/xen.c
> +++ b/arch/x86/pci/xen.c
> @@ -60,8 +60,7 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
>  }
>  
>  #ifdef CONFIG_ACPI
> -static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
> -  bool set_pirq)
> +static int xen_register_pirq(u32 gsi, int triggering, bool set_pirq)
>  {
>   int rc, pirq = -1, irq = -1;
>   struct physdev_map_pirq map_irq;
> @@ -94,9 +93,6 @@ static int xen_register_pirq(u32 gsi, int gsi_override, int 
> triggering,
>   name = "ioapic-level";
>   }
>  
> - if (gsi_override >= 0)
> - gsi = gsi_override;
> -
>   irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name);
>   if (irq < 0)
>   goto out;
> @@ -112,12 +108,12 @@ static int acpi_register_gsi_xen_hvm(struct device 
> *dev, u32 gsi,
>   if (!xen_hvm_domain())
>   return -1;
>  
> - return xen_register_pirq(gsi, -1 /* no GSI override */, trigger,
> + return xen_register_pirq(gsi, trigger,
>false /* no mapping of GSI to PIRQ */);
>  }
>  
>  #ifdef CONFIG_XEN_DOM0
> -static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int 
> polarity)
> +static int xen_register_gsi(u32 gsi, int triggering, int polarity)
>  {
>   int rc, irq;
>   struct physdev_setup_gsi setup_gsi;
> @@ -128,7 +124,7 @@ static int xen_register_gsi(u32 gsi, int gsi_override, 
> int triggering, int polar
>   printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
>   gsi, triggering, polarity);
>  
> - irq = xen_register_pirq(gsi, gsi_override, triggering, true);
> + irq = xen_register_pirq(gsi, triggering, true);
>  
>   setup_gsi.gsi = gsi;
>   setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
> @@ -148,7 +144,7 @@ static int xen_register_gsi(u32 gsi, int gsi_override, 
> int triggering, int polar
>  static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
>int trigger, int polarity)
>  {
> - return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, 
> polarity);
> + return xen_register_gsi(gsi, trigger, polarity);
>  }
>  #endif
>  #endif
> @@ -491,7 +487,7 @@ int __init pci_xen_initial_domain(void)
>   if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
>   continue;
>  
> - xen_register_pirq(irq, -1 /* no GSI override */,
> + xen_register_pirq(irq,
>   trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE,
>   true /* Map GSI to PIRQ */);
>   }
> -- 
> 2.20.1
> 



[PATCH 0/2] xen: Use dev_printk() when possible

2020-05-27 Thread Bjorn Helgaas
From: Bjorn Helgaas 

Use dev_printk() when possible to include device and driver information in
the conventional format.

Bjorn Helgaas (2):
  xen-pciback: Use dev_printk() when possible
  xenbus: Use dev_printk() when possible

 drivers/xen/xen-pciback/conf_space.c| 16 +
 drivers/xen/xen-pciback/conf_space_header.c | 40 +
 drivers/xen/xen-pciback/conf_space_quirks.c |  6 ++--
 drivers/xen/xen-pciback/pci_stub.c  | 38 +---
 drivers/xen/xen-pciback/pciback_ops.c   | 38 
 drivers/xen/xen-pciback/vpci.c  | 10 +++---
 drivers/xen/xenbus/xenbus_probe.c   | 11 +++---
 7 files changed, 70 insertions(+), 89 deletions(-)


base-commit: 8f3d9f354286745c751374f5f1fcafee6b3f3136
-- 
2.25.1




[PATCH 2/2] xenbus: Use dev_printk() when possible

2020-05-27 Thread Bjorn Helgaas
From: Bjorn Helgaas 

Use dev_printk() when possible to include device and driver information in
the conventional format.

Add "#define dev_fmt" to preserve KBUILD_MODNAME in messages.

No functional change intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/xen/xenbus/xenbus_probe.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_probe.c 
b/drivers/xen/xenbus/xenbus_probe.c
index 8c4d05b687b7..ee9a9170b43a 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -31,6 +31,7 @@
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
 
 #define DPRINTK(fmt, args...)  \
pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
@@ -608,7 +609,7 @@ int xenbus_dev_suspend(struct device *dev)
if (drv->suspend)
err = drv->suspend(xdev);
if (err)
-   pr_warn("suspend %s failed: %i\n", dev_name(dev), err);
+   dev_warn(dev, "suspend failed: %i\n", err);
return 0;
 }
 EXPORT_SYMBOL_GPL(xenbus_dev_suspend);
@@ -627,8 +628,7 @@ int xenbus_dev_resume(struct device *dev)
drv = to_xenbus_driver(dev->driver);
err = talk_to_otherend(xdev);
if (err) {
-   pr_warn("resume (talk_to_otherend) %s failed: %i\n",
-   dev_name(dev), err);
+   dev_warn(dev, "resume (talk_to_otherend) failed: %i\n", err);
return err;
}
 
@@ -637,15 +637,14 @@ int xenbus_dev_resume(struct device *dev)
if (drv->resume) {
err = drv->resume(xdev);
if (err) {
-   pr_warn("resume %s failed: %i\n", dev_name(dev), err);
+   dev_warn(dev, "resume failed: %i\n", err);
return err;
}
}
 
err = watch_otherend(xdev);
if (err) {
-   pr_warn("resume (watch_otherend) %s failed: %d.\n",
-   dev_name(dev), err);
+   dev_warn(dev, "resume (watch_otherend) failed: %d\n", err);
return err;
}
 
-- 
2.25.1




[PATCH 1/2] xen-pciback: Use dev_printk() when possible

2020-05-27 Thread Bjorn Helgaas
From: Bjorn Helgaas 

Use dev_printk() when possible to include device and driver information in
the conventional format.

Add "#define dev_fmt" when needed to preserve DRV_NAME or KBUILD_MODNAME in
messages.

No functional change intended.

Signed-off-by: Bjorn Helgaas 
---
 drivers/xen/xen-pciback/conf_space.c| 16 +
 drivers/xen/xen-pciback/conf_space_header.c | 40 +
 drivers/xen/xen-pciback/conf_space_quirks.c |  6 ++--
 drivers/xen/xen-pciback/pci_stub.c  | 38 +---
 drivers/xen/xen-pciback/pciback_ops.c   | 38 
 drivers/xen/xen-pciback/vpci.c  | 10 +++---
 6 files changed, 65 insertions(+), 83 deletions(-)

diff --git a/drivers/xen/xen-pciback/conf_space.c 
b/drivers/xen/xen-pciback/conf_space.c
index da51a5d34e6e..f2df4e55fc1b 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -10,6 +10,8 @@
  * Author: Ryan Wilson 
  */
 
+#define dev_fmt(fmt) DRV_NAME ": " fmt
+
 #include 
 #include 
 #include 
@@ -155,8 +157,8 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, 
int size,
u32 value = 0, tmp_val;
 
if (unlikely(verbose_request))
-   printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
-  pci_name(dev), size, offset);
+   dev_printk(KERN_DEBUG, &dev->dev, "read %d bytes at 0x%x\n",
+  size, offset);
 
if (!valid_request(offset, size)) {
err = XEN_PCI_ERR_invalid_offset;
@@ -196,8 +198,8 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, 
int size,
 
 out:
if (unlikely(verbose_request))
-   printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
-  pci_name(dev), size, offset, value);
+   dev_printk(KERN_DEBUG, &dev->dev,
+  "read %d bytes at 0x%x = %x\n", size, offset, value);
 
*ret_val = value;
return xen_pcibios_err_to_errno(err);
@@ -213,9 +215,9 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, 
int size, u32 value)
int field_start, field_end;
 
if (unlikely(verbose_request))
-   printk(KERN_DEBUG
-  DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
-  pci_name(dev), size, offset, value);
+   dev_printk(KERN_DEBUG, &dev->dev,
+  "write request %d bytes at 0x%x = %x\n", size,
+  offset, value);
 
if (!valid_request(offset, size))
return XEN_PCI_ERR_invalid_offset;
diff --git a/drivers/xen/xen-pciback/conf_space_header.c 
b/drivers/xen/xen-pciback/conf_space_header.c
index fb4fccb4aecc..b277b689f257 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -6,6 +6,7 @@
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define dev_fmt pr_fmt
 
 #include 
 #include 
@@ -68,8 +69,7 @@ static int command_write(struct pci_dev *dev, int offset, u16 
value, void *data)
dev_data = pci_get_drvdata(dev);
if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
if (unlikely(verbose_request))
-   printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
-  pci_name(dev));
+   dev_printk(KERN_DEBUG, &dev->dev, "enable\n");
err = pci_enable_device(dev);
if (err)
return err;
@@ -77,8 +77,7 @@ static int command_write(struct pci_dev *dev, int offset, u16 
value, void *data)
dev_data->enable_intx = 1;
} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
if (unlikely(verbose_request))
-   printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
-  pci_name(dev));
+   dev_printk(KERN_DEBUG, &dev->dev, "disable\n");
pci_disable_device(dev);
if (dev_data)
dev_data->enable_intx = 0;
@@ -86,34 +85,30 @@ static int command_write(struct pci_dev *dev, int offset, 
u16 value, void *data)
 
if (!dev->is_busmaster && is_master_cmd(value)) {
if (unlikely(verbose_request))
-   printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
-  pci_name(dev));
+   dev_printk(KERN_DEBUG, &dev->dev, "set bus master\n");
pci_set_master(dev);
} else if (dev->is_busmaster && !is_master_cmd(value)) {
if (unlikely(verbose_request))
-   printk(KERN_DEBUG DR

Re: [PATCH 1/2] xen-pciback: Use dev_printk() when possible

2020-05-27 Thread Bjorn Helgaas
On Wed, May 27, 2020 at 03:34:26PM -0700, Boris Ostrovsky wrote:
> On 5/27/20 1:43 PM, Bjorn Helgaas wrote:
> > @@ -155,8 +157,8 @@ int xen_pcibk_config_read(struct pci_dev *dev, int 
> > offset, int size,
> > u32 value = 0, tmp_val;
> >  
> > if (unlikely(verbose_request))
> > -   printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
> > -  pci_name(dev), size, offset);
> > +   dev_printk(KERN_DEBUG, &dev->dev, "read %d bytes at 0x%x\n",
> > +  size, offset);
> 
> 
> Maybe then dev_dbg() ?

printk(KERN_DEBUG) always produces output, so I used
dev_printk(KERN_DEBUG) to retain that behavior.

dev_dbg() does not always produce output, since it depends on DEBUG or
CONFIG_DYNAMIC_DEBUG and the dynamic debug settings.

If dev_dbg() seems like the right thing, I would probably add a
separate patch on top to convert dev_printk(KERN_DEBUG) to dev_dbg().

Thanks for taking a look!  

Bjorn



[PATCH] xen-blkfront: Fix 'physical' typos

2021-01-26 Thread Bjorn Helgaas
From: Bjorn Helgaas 

Fix misspelling of "physical".

Signed-off-by: Bjorn Helgaas 
---
 drivers/block/xen-blkfront.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 5265975b3fba..876db9fcf388 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2403,7 +2403,7 @@ static void blkfront_connect(struct blkfront_info *info)
}
 
/*
-* physcial-sector-size is a newer field, so old backends may not
+* physical-sector-size is a newer field, so old backends may not
 * provide this. Assume physical sector size to be the same as
 * sector_size in that case.
 */
-- 
2.25.1




Re: [PATCH -next] xen: Fix a previous prototype warning in xen.c

2020-09-28 Thread Bjorn Helgaas
On Thu, Sep 24, 2020 at 10:36:16PM +0800, Li Heng wrote:
> Fix the warning:
> arch/x86/pci/xen.c:423:13: warning:
> no previous prototype for ‘xen_msi_init’ [-Wmissing-prototypes]
> 
> Reported-by: Hulk Robot 
> Signed-off-by: Li Heng 

Applied to pci/misc for v5.10, thanks!

> ---
>  arch/x86/pci/xen.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
> index 89395a5..f663a5f 100644
> --- a/arch/x86/pci/xen.c
> +++ b/arch/x86/pci/xen.c
> @@ -420,7 +420,7 @@ int __init pci_xen_init(void)
>  }
> 
>  #ifdef CONFIG_PCI_MSI
> -void __init xen_msi_init(void)
> +static void __init xen_msi_init(void)
>  {
>   if (!disable_apic) {
>   /*
> --
> 2.7.4
> 



Re: [PATCH] xen/pci: remove redundant assignment to variable irq

2020-06-30 Thread Bjorn Helgaas
[+cc Juergen, Boris]

On Thu, Apr 09, 2020 at 12:41:18PM +0100, Colin King wrote:
> From: Colin Ian King 
> 
> The variable irq is being initialized with a value that is never read
> and it is being updated later with a new value.  The initialization is
> redundant and can be removed.
> 
> Addresses-Coverity: ("Unused value")
> Signed-off-by: Colin Ian King 

Applied to pci/virtualization for v5.9, thanks!

I don't see this in linux-next yet, but if anybody else would prefer
to take it, let me know and I'll drop it.  

> ---
>  arch/x86/pci/xen.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
> index 91220cc25854..80272eb49230 100644
> --- a/arch/x86/pci/xen.c
> +++ b/arch/x86/pci/xen.c
> @@ -63,7 +63,7 @@ static int xen_pcifront_enable_irq(struct pci_dev *dev)
>  static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
>bool set_pirq)
>  {
> - int rc, pirq = -1, irq = -1;
> + int rc, pirq = -1, irq;
>   struct physdev_map_pirq map_irq;
>   int shareable = 0;
>   char *name;
> -- 
> 2.25.1
> 



Re: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86

2020-07-13 Thread Bjorn Helgaas
On Mon, Jul 13, 2020 at 02:22:12PM +0200, Saheed O. Bolarinwa wrote:
> This goal of these series is to move the definition of *all* PCIBIOS* from
> include/linux/pci.h to arch/x86 and limit their use within there.
> All other tree specific definition will be left for intact. Maybe they can
> be renamed.

More comments later, but a few trivial whitespace issues you can clean
up in the meantime.  Don't repost for at least a few days to avoid
spamming everybody.  I found these with:

  $ b4 am -om/ 20200713122247.10985-1-refactormys...@gmail.com
  $ git am 
m/20200713_refactormyself_move_all_pcibios_definitions_into_arch_x86.mbx

  Applying: atm: Change PCIBIOS_SUCCESSFUL to 0
  .git/rebase-apply/patch:11: trailing whitespace.
  iadev = INPH_IA_DEV(dev);
  .git/rebase-apply/patch:12: trailing whitespace.
  for(i=0; i<64; i++)
  .git/rebase-apply/patch:13: trailing whitespace.
if ((error = pci_read_config_dword(iadev->pci,
  .git/rebase-apply/patch:16: trailing whitespace, space before tab in indent.
return error;
  .git/rebase-apply/patch:17: trailing whitespace.
  writel(0, iadev->reg+IPHASE5575_EXT_RESET);
  warning: squelched 5 whitespace errors
  warning: 10 lines add whitespace errors.
  Applying: atm: Tidy Success/Failure checks
  .git/rebase-apply/patch:13: trailing whitespace.

  .git/rebase-apply/patch:14: trailing whitespace.
  iadev = INPH_IA_DEV(dev);
  .git/rebase-apply/patch:15: trailing whitespace.
  for(i=0; i<64; i++)
  .git/rebase-apply/patch:21: trailing whitespace.
  writel(0, iadev->reg+IPHASE5575_EXT_RESET);
  .git/rebase-apply/patch:22: trailing whitespace.
  for(i=0; i<64; i++)
  warning: squelched 3 whitespace errors
  warning: 8 lines add whitespace errors.
  Applying: atm: Fix Style ERROR- assignment in if condition
  .git/rebase-apply/patch:12: trailing whitespace.
  unsigned int pci[64];
  .git/rebase-apply/patch:13: trailing whitespace.

  .git/rebase-apply/patch:14: trailing whitespace.
  iadev = INPH_IA_DEV(dev);
  .git/rebase-apply/patch:23: trailing whitespace.
  writel(0, iadev->reg+IPHASE5575_EXT_RESET);
  .git/rebase-apply/patch:32: trailing whitespace.
  udelay(5);
  warning: squelched 2 whitespace errors
  warning: 7 lines add whitespace errors.
  Applying: PCI: Change PCIBIOS_SUCCESSFUL to 0
  .git/rebase-apply/patch:37: trailing whitespace.
  struct pci_ops apecs_pci_ops =
  .git/rebase-apply/patch:50: trailing whitespace.
  static int
  .git/rebase-apply/patch:59: trailing whitespace.
  struct pci_ops cia_pci_ops =
  .git/rebase-apply/patch:94: trailing whitespace.
  static int
  .git/rebase-apply/patch:103: trailing whitespace.
  struct pci_ops lca_pci_ops =
  warning: squelched 10 whitespace errors
  warning: 15 lines add whitespace errors.



Re: [PATCH v6 00/11] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-10-12 Thread Bjorn Helgaas
On Mon, Oct 04, 2021 at 02:59:24PM +0200, Uwe Kleine-König wrote:
> Hello,
> 
> this is v6 of the quest to drop the "driver" member from struct pci_dev
> which tracks the same data (apart from a constant offset) as dev.driver.

I like this a lot and applied it to pci/driver for v5.16, thanks!

I split some of the bigger patches apart so they only touched one
driver or subsystem at a time.  I also updated to_pci_driver() so it
returns NULL when given NULL, which makes some of the validations
quite a bit simpler, especially in the PM code in pci-driver.c.

Full interdiff from this v6 series:

diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index deaaef6efe34..36e84d904260 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -80,17 +80,15 @@ static struct resource video_rom_resource = {
  */
 static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned 
short device)
 {
+   struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
const struct pci_device_id *id;
 
if (pdev->vendor == vendor && pdev->device == device)
return true;
 
-   if (pdev->dev.driver) {
-   struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
-   for (id = drv->id_table; id && id->vendor; id++)
-   if (id->vendor == vendor && id->device == device)
-   break;
-   }
+   for (id = drv ? drv->id_table : NULL; id && id->vendor; id++)
+   if (id->vendor == vendor && id->device == device)
+   break;
 
return id && id->vendor;
 }
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index d997c9c3ebb5..7eb3706cf42d 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -20,38 +20,38 @@ static void pci_error_handlers(struct cxl_afu *afu,
pci_channel_state_t state)
 {
struct pci_dev *afu_dev;
+   struct pci_driver *afu_drv;
+   struct pci_error_handlers *err_handler;
 
if (afu->phb == NULL)
return;
 
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
-   struct pci_driver *afu_drv;
-
-   if (!afu_dev->dev.driver)
-   continue;
-
afu_drv = to_pci_driver(afu_dev->dev.driver);
+   if (!afu_drv)
+   continue;
 
+   err_handler = afu_drv->err_handler;
switch (bus_error_event) {
case CXL_ERROR_DETECTED_EVENT:
afu_dev->error_state = state;
 
-   if (afu_drv->err_handler &&
-   afu_drv->err_handler->error_detected)
-   afu_drv->err_handler->error_detected(afu_dev, 
state);
-   break;
+   if (err_handler &&
+   err_handler->error_detected)
+   err_handler->error_detected(afu_dev, state);
+   break;
case CXL_SLOT_RESET_EVENT:
afu_dev->error_state = state;
 
-   if (afu_drv->err_handler &&
-   afu_drv->err_handler->slot_reset)
-   afu_drv->err_handler->slot_reset(afu_dev);
-   break;
+   if (err_handler &&
+   err_handler->slot_reset)
+   err_handler->slot_reset(afu_dev);
+   break;
case CXL_RESUME_EVENT:
-   if (afu_drv->err_handler &&
-   afu_drv->err_handler->resume)
-   afu_drv->err_handler->resume(afu_dev);
-   break;
+   if (err_handler &&
+   err_handler->resume)
+   err_handler->resume(afu_dev);
+   break;
}
}
 }
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 7e7545d01e27..08bd81854101 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1795,6 +1795,8 @@ static pci_ers_result_t cxl_vphb_error_detected(struct 
cxl_afu *afu,
pci_channel_state_t state)
 {
struct pci_dev *afu_dev;
+   struct pci_driver *afu_drv;
+   struct pci_error_handlers *err_handler;
pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
 
@@ -1805,16 +1807,16 @@ static pci_ers_result_t cxl_vphb_error_detected(struct 
cxl_afu *afu,
return result;
 
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
-   struct pci_driver *afu_drv;
-   if (!afu_dev->dev.driver)
-   continue;
-
afu_drv = to_pci_driver(afu_dev->dev.driv

Re: [PATCH v6 00/11] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-10-13 Thread Bjorn Helgaas
On Wed, Oct 13, 2021 at 10:51:31AM +0200, Uwe Kleine-König wrote:
> On Tue, Oct 12, 2021 at 06:32:12PM -0500, Bjorn Helgaas wrote:
> > On Mon, Oct 04, 2021 at 02:59:24PM +0200, Uwe Kleine-König wrote:
> > > Hello,
> > > 
> > > this is v6 of the quest to drop the "driver" member from struct pci_dev
> > > which tracks the same data (apart from a constant offset) as dev.driver.
> > 
> > I like this a lot and applied it to pci/driver for v5.16, thanks!
> > 
> > I split some of the bigger patches apart so they only touched one
> > driver or subsystem at a time.  I also updated to_pci_driver() so it
> > returns NULL when given NULL, which makes some of the validations
> > quite a bit simpler, especially in the PM code in pci-driver.c.
> 
> OK.
> 
> > Full interdiff from this v6 series:
> > 
> > diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
> > index deaaef6efe34..36e84d904260 100644
> > --- a/arch/x86/kernel/probe_roms.c
> > +++ b/arch/x86/kernel/probe_roms.c
> > @@ -80,17 +80,15 @@ static struct resource video_rom_resource = {
> >   */
> >  static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned 
> > short device)
> >  {
> > +   struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
> > const struct pci_device_id *id;
> >  
> > if (pdev->vendor == vendor && pdev->device == device)
> > return true;
> >  
> > -   if (pdev->dev.driver) {
> > -   struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
> > -   for (id = drv->id_table; id && id->vendor; id++)
> > -   if (id->vendor == vendor && id->device == device)
> > -   break;
> > -   }
> > +   for (id = drv ? drv->id_table : NULL; id && id->vendor; id++)
> > +   if (id->vendor == vendor && id->device == device)
> > +   break;
> >  
> > return id && id->vendor;
> >  }
> > diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
> > index d997c9c3ebb5..7eb3706cf42d 100644
> > --- a/drivers/misc/cxl/guest.c
> > +++ b/drivers/misc/cxl/guest.c
> > @@ -20,38 +20,38 @@ static void pci_error_handlers(struct cxl_afu *afu,
> > pci_channel_state_t state)
> >  {
> > struct pci_dev *afu_dev;
> > +   struct pci_driver *afu_drv;
> > +   struct pci_error_handlers *err_handler;
> 
> These two could be moved into the for loop (where afu_drv was with my
> patch already). This is also possible in a few other drivers.

That's true, they could.  I tried to follow the prevailing style in
the file.  At least in cxl, I didn't see any other cases of
declarations being in the minimal scope like that.

Bjorn



Re: [PATCH v6 00/11] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-10-13 Thread Bjorn Helgaas
On Wed, Oct 13, 2021 at 12:26:42PM +0300, Andy Shevchenko wrote:
> On Wed, Oct 13, 2021 at 2:33 AM Bjorn Helgaas  wrote:
> > On Mon, Oct 04, 2021 at 02:59:24PM +0200, Uwe Kleine-König wrote:
> 
> > I split some of the bigger patches apart so they only touched one
> > driver or subsystem at a time.  I also updated to_pci_driver() so it
> > returns NULL when given NULL, which makes some of the validations
> > quite a bit simpler, especially in the PM code in pci-driver.c.
> 
> It's a bit unusual. Other to_*_dev() are not NULL-aware IIRC.

It is a little unusual.  I only found three of 77 that are NULL-aware:

  to_moxtet_driver()
  to_siox_driver()
  to_spi_driver()

It seems worthwhile to me because it makes the patch and the resulting
code significantly cleaner.  Here's one example without the NULL
check:

  @@ -493,12 +493,15 @@ static void pci_device_remove(struct device *dev)
   static void pci_device_shutdown(struct device *dev)
   {
  struct pci_dev *pci_dev = to_pci_dev(dev);
  -   struct pci_driver *drv = pci_dev->driver;

  pm_runtime_resume(dev);

  -   if (drv && drv->shutdown)
  -   drv->shutdown(pci_dev);
  +   if (pci_dev->dev.driver) {
  +   struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
  +
  +   if (drv->shutdown)
  +   drv->shutdown(pci_dev);
  +   }

  static void pci_device_shutdown(struct device *dev)
  {
struct pci_dev *pci_dev = to_pci_dev(dev);

pm_runtime_resume(dev);

if (pci_dev->dev.driver) {
  struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);

  if (drv->shutdown)
drv->shutdown(pci_dev);
}

and here's the same thing with the NULL check:

  @@ -493,7 +493,7 @@ static void pci_device_remove(struct device *dev)
   static void pci_device_shutdown(struct device *dev)
   {
  struct pci_dev *pci_dev = to_pci_dev(dev);
  -   struct pci_driver *drv = pci_dev->driver;
  +   struct pci_driver *drv = to_pci_driver(dev->driver);

  static void pci_device_shutdown(struct device *dev)
  {
struct pci_dev *pci_dev = to_pci_dev(dev);
struct pci_driver *drv = to_pci_driver(dev->driver);

pm_runtime_resume(dev);

if (drv && drv->shutdown)
  drv->shutdown(pci_dev);

> >  static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned 
> > short device)
> >  {
> > +   struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
> > const struct pci_device_id *id;
> >
> > if (pdev->vendor == vendor && pdev->device == device)
> > return true;
> 
> > +   for (id = drv ? drv->id_table : NULL; id && id->vendor; id++)
> > +   if (id->vendor == vendor && id->device == device)
> 
> > +   break;
> 
> return true;
> 
> > return id && id->vendor;
> 
> return false;

Good cleanup for a follow-up patch, but doesn't seem directly related
to the objective here.  The current patch is:

  @@ -80,7 +80,7 @@ static struct resource video_rom_resource = {
*/
   static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned 
short device)
   {
  -   struct pci_driver *drv = pdev->driver;
  +   struct pci_driver *drv = to_pci_driver(pdev->dev.driver);
  const struct pci_device_id *id;

  if (pdev->vendor == vendor && pdev->device == device)

> > device_lock(&vf_dev->dev);
> > -   if (vf_dev->dev.driver) {
> > +   if (to_pci_driver(vf_dev->dev.driver)) {
> 
> Hmm...

Yeah, it could be either of:

  if (to_pci_driver(vf_dev->dev.driver))
  if (vf_dev->dev.driver)

I went back and forth on that and went with to_pci_driver() on the
theory that we were testing the pci_driver * before and the patch is
more of a mechanical change and easier to review if we test the
pci_driver * after.

> > +   if (!pci_dev->state_saved && pci_dev->current_state != 
> > PCI_D0
> 
> > +   && pci_dev->current_state != PCI_UNKNOWN) {
> 
> Can we keep && on the previous line?

I think this is in pci_legacy_suspend(), and I didn't touch that line.
It shows up in the interdiff because without the NULL check in
to_pci_driver(), we had to indent this code another level.  With the
NULL check, we don't need that extra indentation.

> > +   pci_WARN_ONCE(pci_dev, pci_dev->current_state != 
> > prev,
> > + "PCI PM: Device state not saved by 
> > %pS\n",
> > + drv->

Re: [PATCH v6 00/11] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-10-15 Thread Bjorn Helgaas
On Wed, Oct 13, 2021 at 04:23:09PM +0300, Andy Shevchenko wrote:
> On Wed, Oct 13, 2021 at 06:33:56AM -0500, Bjorn Helgaas wrote:
> > On Wed, Oct 13, 2021 at 12:26:42PM +0300, Andy Shevchenko wrote:
> > > On Wed, Oct 13, 2021 at 2:33 AM Bjorn Helgaas  wrote:
> > > > On Mon, Oct 04, 2021 at 02:59:24PM +0200, Uwe Kleine-König wrote:

> > > > +   return drv && drv->resume ?
> > > > +   drv->resume(pci_dev) : 
> > > > pci_pm_reenable_device(pci_dev);
> > > 
> > > One line?
> > 
> > I don't think I touched that line.
> 
> Then why they are both in + section?

They're both in the + section of the interdiff because Uwe's v6 patch
looks like this:

  static int pci_legacy_resume(struct device *dev)
  {
  struct pci_dev *pci_dev = to_pci_dev(dev);
  -   return drv && drv->resume ?
  -   drv->resume(pci_dev) : 
pci_pm_reenable_device(pci_dev);
  +   if (pci_dev->dev.driver) {
  +   struct pci_driver *drv = to_pci_driver(pci_dev->dev.driver);
  +
  +   if (drv->resume)
  +   return drv->resume(pci_dev);
  +   }
  +
  +   return pci_pm_reenable_device(pci_dev);

and my revision looks like this:

   static int pci_legacy_resume(struct device *dev)
   {
  struct pci_dev *pci_dev = to_pci_dev(dev);
  -   struct pci_driver *drv = pci_dev->driver;
  +   struct pci_driver *drv = to_pci_driver(dev->driver);

so compared to Uwe's v6, I restored that section to the original code.
My goal here was to make the patch as simple and easy to review as
possible.

> > > > +   struct pci_driver *drv = to_pci_driver(dev->dev.driver);
> > > > const struct pci_error_handlers *err_handler =
> > > > -   dev->dev.driver ? 
> > > > to_pci_driver(dev->dev.driver)->err_handler : NULL;
> > > > +   drv ? drv->err_handler : NULL;
> > > 
> > > Isn't dev->driver == to_pci_driver(dev->dev.driver)?
> > 
> > Yes, I think so, but not sure what you're getting at here, can you
> > elaborate?
> 
> Getting pointer from another pointer seems waste of resources, why we
> can't simply
> 
>   struct pci_driver *drv = dev->driver;

I think this is in pci_dev_save_and_disable(), and "dev" here is a
struct pci_dev *.  We're removing the dev->driver member.  Let me know
if I'm still missing something.

> > > > -   "bad request in aer recovery "
> > > > -   "operation!\n");
> > > > +   "bad request in AER recovery 
> > > > operation!\n");

> > > Stray change? Or is it in a separate patch in your tree?
> > 
> > Could be skipped.  The string now fits on one line so I combined it to
> > make it more greppable.
> 
> This is inconsistency in your changes, in one case you are objecting of
> doing something close to the changed lines, in the other you are doing
> unrelated change.

You're right, this didn't make much sense in that patch.  I moved the
line join to the previous patch, which unindented this section and
made space for this to fit on one line.  Here's the revised commit:

  
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?id=34ab316d7287




Re: [PATCH] PCI/MSI: Re-add checks for skip masking MSI-X on Xen PV

2021-10-19 Thread Bjorn Helgaas
[+cc Marc]

On Mon, Oct 18, 2021 at 08:22:32AM +0200, Josef Johansson wrote:
> From: Josef Johansson 
> 
> 
> PCI/MSI: Re-add checks for skip masking MSI-X on Xen PV
> 
> 'commit fcacdfbef5a1 ("PCI/MSI: Provide a new set of mask and unmask
> functions")' introduced functions pci_msi_update_mask() and 
> pci_msix_write_vector_ctrl() that were missing checks for
> pci_msi_ignore_mask that existed in 'commit 446a98b19fd6 ("PCI/MSI: Use
> new mask/unmask functions")'. This patch adds them back since it was
> causing softlocks in amdgpu drivers under Xen.
> 
> As explained in 'commit 1a519dc7a73c ("PCI/MSI: Skip masking MSI-X
> on Xen PV")', when running as Xen PV guest, masking MSI-X is a 
> responsibility of the hypervisor.
> 
> Fixes: fcacdfbef5a1 ("PCI/MSI: Provide a new set of mask and unmask
> functions")
> Suggested-by: Jason Andryuk 
> Signed-off-by: Josef Johansson 

fcacdfbef5a1 appeared in v5.15-rc1, and we should try to get this
fixed before v5.15.

I could merge it but would like an ack from Thomas since he wrote
fcacdfbef5a1.  He merged fcacdfbef5a1, so I wouldn't complain if the
fix followed the same path.

If I merged it (or if you were to repost it), I would drop the single
quotes around the commit citations and write the commit log in
imperative mood, as you did for the subject
(https://chris.beams.io/posts/git-commit/)

> ---
> 
> This patch solves a major issue with booting 5.15-rc1 under Xen
> with amdgpu drivers. Specifically Lenovo P14s Gen 1, AMD 4750U.
> 
> The softlock below takes about ~2-3 minutes to release, and will
> lock again if I switch between X and console, when staying in
> X I can do things without it softlock.

I don't actually see a softlock mentioned below; am I missing
something?  It's nice to include a couple lines of dmesg log to help
people connect the issue with the fix, but most of the below is not
relevant and can be easily found from the Link: tag.  Also, some of
the lines seem to be wrapped.  They're more useful when not wrapped
because grep can find them.

> I have to note that this is my first commit and PCI/MSI area is
> not my area of expertise. I tried to mimic what was
> obviously missing between the aforementioned commits. There may be
> better ways to solve this problem, or other places to put the checks.
> Should desc->msi_attrib.is_virtual be checked? It is not checked in
> 'commit 1a519dc7a73c ("PCI/MSI: Skip masking MSI-X on Xen PV")'
> 
> I should also note that my original problem with *flip done timeout*
> inside drm during suspend/resume is not solved, but with this patch
> at least the kernel boots properly.
> 
> The kernel is much more stable not running inside Xen, and much 
> more stable running pci=nomsi (under Xen). Are we missing something
> more regarding masking?

It does sound like something else is broken as well, but I have no
idea what that would be.

> The error that occurs is:
> 
> kernel: [ cut here ]
> kernel: WARNING: CPU: 6 PID: 3754 at
> drivers/gpu/drm/amd/amdgpu/../display/amdgp
> u_dm/amdgpu_dm.c:8630 amdgpu_dm_commit_planes+0x9b4/0x9c0 [amdgpu]
> kernel: Modules linked in: nf_tables nfnetlink vfat fat intel_rapl_msr 
> wmi_bmof
> intel_rapl_common pcspkr joydev uvcvideo k10temp sp5100_tco videobuf2_vmalloc 
> vi
> deobuf2_memops i2c_piix4 videobuf2_v4l2 videobuf2_common videodev mc iwlwifi 
> thi
> nkpad_acpi platform_profile ipmi_devintf ledtrig_audio ucsi_acpi cfg80211 
> ipmi_m
> sghandler r8169 snd typec_ucsi soundcore typec rfkill wmi video amd_pmc 
> i2c_scmi
>  fuse xenfs ip_tables dm_thin_pool dm_persistent_data dm_bio_prison dm_crypt 
> tru
> sted asn1_encoder hid_multitouch amdgpu crct10dif_pclmul iommu_v2 
> crc32_pclmul c
> rc32c_intel gpu_sched i2c_algo_bit drm_ttm_helper ttm drm_kms_helper ccp cec 
> gha
> sh_clmulni_intel sdhci_pci xhci_pci xhci_pci_renesas serio_raw cqhci drm 
> sdhci x
> hci_hcd mmc_core nvme ehci_pci ehci_hcd nvme_core xen_acpi_processor 
> xen_privcmd
>  xen_pciback xen_blkback xen_gntalloc xen_gntdev xen_evtchn uinput
> kernel: CPU: 6 PID: 3754 Comm: Xorg Tainted: G   W 5.15.0-1.fc32.qubes.x86_64 
> #1
> kernel: Hardware name: LENOVO 20Y1S02400/20Y1S02400, BIOS R1BET61W(1.30) 
> 12/21/
> 2020
> kernel: RIP: e030:amdgpu_dm_commit_planes+0x9b4/0x9c0 [amdgpu]
> kernel: Code: 8b 45 b0 48 c7 c7 4b fc 90 c0 4c 89 55 88 8b b0 f0 03 00 00 e8 
> 6d
> cb ca ff 4c 8b 55 88 0f b6 55 ab 49 8b 72 08 e9 2b fa ff ff <0f> 0b e9 fa fe 
> ff
> ff e8 40 2f 6e c1 0f 1f 44 00 00 55 b9 27 00 00
> kernel: RSP: e02b:c90042d93638 EFLAGS: 00010002
> kernel: RAX: 888110840210 RBX: 83c1 RCX: 0466
> kernel: RDX: 0001 RSI: 0204 RDI: 888110840170
> kernel: RBP: c90042d936f8 R08: 0002 R09: 0001
> kernel: R10:  R11: 88810cb2e118 R12: 888110840210
> kernel: R13: 88810cb2e000 R14: 888103d50400 R15: 888103bb2c00
> kernel: FS:  718c6de4da40() GS:88814078(

Re: [PATCH] PCI/MSI: Re-add checks for skip masking MSI-X on Xen PV

2021-10-19 Thread Bjorn Helgaas
On Tue, Oct 19, 2021 at 10:15:05PM +0200, Josef Johansson wrote:
> On 10/19/21 21:57, Bjorn Helgaas wrote:
> > On Mon, Oct 18, 2021 at 08:22:32AM +0200, Josef Johansson wrote:

> I'll make an effort to do a better commit log. Thanks for reviewing the
> entry!
> 
> What is the time frame for v5.15?

Soon.  v5.15-rc7 should happen Oct 24, final release likely Oct 31.
Ideally a fix would be in before the 24th.

> >> This patch solves a major issue with booting 5.15-rc1 under Xen
> >> with amdgpu drivers. Specifically Lenovo P14s Gen 1, AMD 4750U.
> >>
> >> The softlock below takes about ~2-3 minutes to release, and will
> >> lock again if I switch between X and console, when staying in
> >> X I can do things without it softlock.
> >
> > I don't actually see a softlock mentioned below; am I missing
> > something?  It's nice to include a couple lines of dmesg log to help
> > people connect the issue with the fix, but most of the below is not
> > relevant and can be easily found from the Link: tag.  Also, some of
> > the lines seem to be wrapped.  They're more useful when not wrapped
> > because grep can find them.
> 
> Sorry for my lack of words here. I used deadlock when I first
> described it, but since it was released after a while, I thought
> softlock would be more fitting.

I looked up the WARN_ON at amdgpu_dm.c:8630 but I didn't see anything
related to deadlock or soft lockup or any kind of delay:

  WARN_ON(acrtc_attach->pflip_status != AMDGPU_FLIP_NONE);

DRM folks might have an idea.

> I'll dig a bit to try to get a better dmesg around the stacktrace. Sorry
> about the wrapping,
> 
> I'm trying hard to keep those 80 chars ;-)

I appreciate that.  Commit logs should be 75 chars because git log
indents by 4.  Quotes of dmesg and the like should be unwrapped.

> >> I should also note that my original problem with *flip done timeout*
> >> inside drm during suspend/resume is not solved, but with this patch
> >> at least the kernel boots properly.
> >>
> >> The kernel is much more stable not running inside Xen, and much 
> >> more stable running pci=nomsi (under Xen). Are we missing something
> >> more regarding masking?
> > It does sound like something else is broken as well, but I have no
> > idea what that would be.
> 
> Should I take a stab at describing the issue better at hand or
> should we focus on getting this specific patch out of the way?

Unless the other issues seem related, we should dispose of this one
by itself ASAP.

> I have 'solved' my current problems a bit by kernel flags right now,
> but I would be happy to share my story so far.

Description of your workaround would be quite useful.  Probably not to
*me*, but to people who know the area (I'd start with the DRM folks).

> >> The error that occurs is:
> >>
> >> kernel: [ cut here ]
> >> kernel: WARNING: CPU: 6 PID: 3754 at
> >> drivers/gpu/drm/amd/amdgpu/../display/amdgp
> >> u_dm/amdgpu_dm.c:8630 amdgpu_dm_commit_planes+0x9b4/0x9c0 [amdgpu]



Re: [PATCH] PCI: Fix general code style

2021-08-25 Thread Bjorn Helgaas
On Thu, Aug 05, 2021 at 12:28:32AM +0200, Sergio Miguéns Iglesias wrote:
> The code style for most files was fixed. This means that blank lines
> were added when needed (normally after variable declarations), spaces
> before tabs were removed, some code alignment issues were solved, block
> comment style was fixed, every instance of "unsigned var" was replaced
> with "unsigned int var"... Etc.
> 
> This commit does not change the logic of the code, it just fixes
> aesthetic problems.

I generally *like* this, and it does fix some annoying things, but I
think it's a little too much all at once.  If we're working in a file
and doing actual bug fixes or new functionality, and we want to fix
some typos or something at the end, that might be OK, but I think the
churn in the git history outweighs the benefit of this huge patch.

So I would encourage you to use some of the PCI expertise you've
gained by looking at all this code to work on something with a little
more impact.  Here are a couple ideas:

  - There are only two uses of __ref and __refdata in drivers/pci/.
The fact that they're so rare makes me suspect that we don't need
them.  But I haven't investigated these to see.  Somebody could
check that out and remove them if we don't need them.  Be aware
that I will want a clear argument for why they're not needed :)

  - Coverity complains about several issues in drivers/pci/ [1].  Most
of the time these are false positives, but not always.  Sometimes
there's an actual bug, and sometimes there's a way to restructure
the code to avoid the warning (which usually means doing things
the same way they are done elsewhere).

  - "make C=2 drivers/pci/" (sparse checker, [2]) complains about a
few things.  Leave the pci_power_t ones alone for now, but there
are a couple other type issues that could be cleaned up.

[1] 
https://docs.google.com/spreadsheets/d/19eyNDou83JACzf44j0NRzEWysva6g44G2_Z9IEXGVNk/edit?usp=sharing
[2] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/dev-tools/sparse.rst?id=v5.13

> Signed-off-by: Sergio Miguéns Iglesias 
> ---
>  drivers/pci/access.c   | 22 +-
>  drivers/pci/bus.c  |  3 ++-
>  drivers/pci/msi.c  | 12 +++-
>  drivers/pci/pci-acpi.c |  3 ++-
>  drivers/pci/pci-driver.c   | 19 +--
>  drivers/pci/pci-sysfs.c| 14 --
>  drivers/pci/pci.c  | 16 
>  drivers/pci/proc.c | 15 +++
>  drivers/pci/quirks.c   | 35 ---
>  drivers/pci/remove.c   |  1 +
>  drivers/pci/rom.c  |  2 +-
>  drivers/pci/setup-bus.c|  5 -
>  drivers/pci/setup-irq.c| 12 +++-
>  drivers/pci/setup-res.c|  2 +-
>  drivers/pci/slot.c |  5 -
>  drivers/pci/syscall.c  |  5 +++--
>  drivers/pci/xen-pcifront.c | 20 
>  17 files changed, 133 insertions(+), 58 deletions(-)



Re: [PATCH] PCI/MSI: skip masking MSI on Xen PV

2021-08-26 Thread Bjorn Helgaas
If/when you repost this, please run "git log --oneline
drivers/pci/msi.c" and follow the convention of capitalizing the
subject line.

Also, I think this patch refers specifically to MSI-X, not MSI, so
please update the subject line and the "masking MSI" below to reflect
that.

On Thu, Aug 26, 2021 at 03:43:37PM +0200, Marek Marczykowski-Górecki wrote:
> When running as Xen PV guest, masking MSI is a responsibility of the
> hypervisor. Guest has no write access to relevant BAR at all - when it
> tries to, it results in a crash like this:
> 
> BUG: unable to handle page fault for address: c9004069100c
> #PF: supervisor write access in kernel mode
> #PF: error_code(0x0003) - permissions violation
> PGD 18f1c067 P4D 18f1c067 PUD 4dbd067 PMD 4fba067 PTE 8010febd4075
> Oops: 0003 [#1] SMP NOPTI
> CPU: 0 PID: 234 Comm: kworker/0:2 Tainted: GW 
> 5.14.0-rc7-1.fc32.qubes.x86_64 #15
> Workqueue: events work_for_cpu_fn
> RIP: e030:__pci_enable_msix_range.part.0+0x26b/0x5f0
> Code: 2f 96 ff 48 89 44 24 28 48 89 c7 48 85 c0 0f 84 f6 01 00 00 45 0f 
> b7 f6 48 8d 40 0c ba 01 00 00 00 49 c1 e6 04 4a 8d 4c 37 1c <89> 10 48 83 c0 
> 10 48 39 c1 75 f5 41 0f b6 44 24 6a 84 c0 0f 84 48
> RSP: e02b:c9004018bd50 EFLAGS: 00010212
> RAX: c9004069100c RBX: 88800ed412f8 RCX: c9004069105c
> RDX: 0001 RSI: 000febd4 RDI: c90040691000
> RBP: 0003 R08:  R09: febd404f
> R10: 7ff0 R11: 88800ee8ae40 R12: 88800ed41000
> R13:  R14: 0040 R15: feba
> FS:  () GS:88801840() 
> knlGS:
> CS:  e030 DS:  ES:  CR0: 80050033
> CR2: 807f5ea0 CR3: 12f6a000 CR4: 0660
> Call Trace:
>  e1000e_set_interrupt_capability+0xbf/0xd0 [e1000e]
>  e1000_probe+0x41f/0xdb0 [e1000e]
>  local_pci_probe+0x42/0x80
> (...)
> 
> There is pci_msi_ignore_mask variable for bypassing MSI masking on Xen
> PV, but msix_mask_all() missed checking it. Add the check there too.
> 
> Fixes: 7d5ec3d36123 ("PCI/MSI: Mask all unused MSI-X entries")
> Cc: sta...@vger.kernel.org

7d5ec3d36123 appeared in v5.14-rc6, so if this fix is merged before
v5.14, the stable tag will be unnecessary.  But we are running out of
time there.

> Signed-off-by: Marek Marczykowski-Górecki 
> ---
> Cc: xen-devel@lists.xenproject.org
> ---
>  drivers/pci/msi.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index e5e75331b415..3a9f4f8ad8f9 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -776,6 +776,9 @@ static void msix_mask_all(void __iomem *base, int tsize)
>   u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
>   int i;
>  
> + if (pci_msi_ignore_mask)
> + return;
> +
>   for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
>   writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
>  }
> -- 
> 2.31.1
> 



Re: [PATCH] PCI/MSI: skip masking MSI on Xen PV

2021-08-26 Thread Bjorn Helgaas
On Thu, Aug 26, 2021 at 06:36:49PM +0200, Marek Marczykowski-Górecki wrote:
> On Thu, Aug 26, 2021 at 09:55:32AM -0500, Bjorn Helgaas wrote:
> > If/when you repost this, please run "git log --oneline
> > drivers/pci/msi.c" and follow the convention of capitalizing the
> > subject line.
> > 
> > Also, I think this patch refers specifically to MSI-X, not MSI, so
> > please update the subject line and the "masking MSI" below to reflect
> > that.
> 
> Sure, thanks for pointing this out. Is the patch otherwise ok? Should I
> post v2 with just updated commit message?

Wouldn't hurt to post a v2.  I don't have any objections to the patch,
but ultimately up to Thomas.

> > On Thu, Aug 26, 2021 at 03:43:37PM +0200, Marek Marczykowski-Górecki wrote:
> > > When running as Xen PV guest, masking MSI is a responsibility of the
> > > hypervisor. Guest has no write access to relevant BAR at all - when it
> > > tries to, it results in a crash like this:
> > > 
> > > BUG: unable to handle page fault for address: c9004069100c
> > > #PF: supervisor write access in kernel mode
> > > #PF: error_code(0x0003) - permissions violation
> > > PGD 18f1c067 P4D 18f1c067 PUD 4dbd067 PMD 4fba067 PTE 8010febd4075
> > > Oops: 0003 [#1] SMP NOPTI
> > > CPU: 0 PID: 234 Comm: kworker/0:2 Tainted: GW 
> > > 5.14.0-rc7-1.fc32.qubes.x86_64 #15
> > > Workqueue: events work_for_cpu_fn
> > > RIP: e030:__pci_enable_msix_range.part.0+0x26b/0x5f0
> > > Code: 2f 96 ff 48 89 44 24 28 48 89 c7 48 85 c0 0f 84 f6 01 00 00 45 
> > > 0f b7 f6 48 8d 40 0c ba 01 00 00 00 49 c1 e6 04 4a 8d 4c 37 1c <89> 10 48 
> > > 83 c0 10 48 39 c1 75 f5 41 0f b6 44 24 6a 84 c0 0f 84 48
> > > RSP: e02b:c9004018bd50 EFLAGS: 00010212
> > > RAX: c9004069100c RBX: 88800ed412f8 RCX: c9004069105c
> > > RDX: 0001 RSI: 000febd4 RDI: c90040691000
> > > RBP: 0003 R08:  R09: febd404f
> > > R10: 7ff0 R11: 88800ee8ae40 R12: 88800ed41000
> > > R13:  R14: 0040 R15: feba
> > > FS:  () GS:88801840() 
> > > knlGS:
> > > CS:  e030 DS:  ES:  CR0: 80050033
> > > CR2: 807f5ea0 CR3: 12f6a000 CR4: 0660
> > > Call Trace:
> > >  e1000e_set_interrupt_capability+0xbf/0xd0 [e1000e]
> > >  e1000_probe+0x41f/0xdb0 [e1000e]
> > >  local_pci_probe+0x42/0x80
> > > (...)
> > > 
> > > There is pci_msi_ignore_mask variable for bypassing MSI masking on Xen
> > > PV, but msix_mask_all() missed checking it. Add the check there too.
> > > 
> > > Fixes: 7d5ec3d36123 ("PCI/MSI: Mask all unused MSI-X entries")
> > > Cc: sta...@vger.kernel.org
> > 
> > 7d5ec3d36123 appeared in v5.14-rc6, so if this fix is merged before
> > v5.14, the stable tag will be unnecessary.  But we are running out of
> > time there.
> 
> 7d5ec3d36123 was already backported to stable branches (at least 5.10
> and 5.4), and in fact this is how I discovered the issue...

Oh, right, of course.  Sorry :)

> > > Signed-off-by: Marek Marczykowski-Górecki 
> > > 
> > > ---
> > > Cc: xen-devel@lists.xenproject.org
> > > ---
> > >  drivers/pci/msi.c | 3 +++
> > >  1 file changed, 3 insertions(+)
> > > 
> > > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> > > index e5e75331b415..3a9f4f8ad8f9 100644
> > > --- a/drivers/pci/msi.c
> > > +++ b/drivers/pci/msi.c
> > > @@ -776,6 +776,9 @@ static void msix_mask_all(void __iomem *base, int 
> > > tsize)
> > >   u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
> > >   int i;
> > >  
> > > + if (pci_msi_ignore_mask)
> > > + return;
> > > +
> > >   for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
> > >   writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
> > >  }
> > > -- 
> > > 2.31.1
> > > 
> 
> -- 
> Best Regards,
> Marek Marczykowski-Górecki
> Invisible Things Lab





Re: [PATCH v2] PCI/MSI: Skip masking MSI-X on Xen PV

2021-08-26 Thread Bjorn Helgaas
On Thu, Aug 26, 2021 at 07:03:42PM +0200, Marek Marczykowski-Górecki wrote:
> When running as Xen PV guest, masking MSI-X is a responsibility of the
> hypervisor. Guest has no write access to relevant BAR at all - when it
> tries to, it results in a crash like this:
> 
> BUG: unable to handle page fault for address: c9004069100c
> #PF: supervisor write access in kernel mode
> #PF: error_code(0x0003) - permissions violation
> PGD 18f1c067 P4D 18f1c067 PUD 4dbd067 PMD 4fba067 PTE 8010febd4075
> Oops: 0003 [#1] SMP NOPTI
> CPU: 0 PID: 234 Comm: kworker/0:2 Tainted: GW 
> 5.14.0-rc7-1.fc32.qubes.x86_64 #15
> Workqueue: events work_for_cpu_fn
> RIP: e030:__pci_enable_msix_range.part.0+0x26b/0x5f0
> Code: 2f 96 ff 48 89 44 24 28 48 89 c7 48 85 c0 0f 84 f6 01 00 00 45 0f 
> b7 f6 48 8d 40 0c ba 01 00 00 00 49 c1 e6 04 4a 8d 4c 37 1c <89> 10 48 83 c0 
> 10 48 39 c1 75 f5 41 0f b6 44 24 6a 84 c0 0f 84 48
> RSP: e02b:c9004018bd50 EFLAGS: 00010212
> RAX: c9004069100c RBX: 88800ed412f8 RCX: c9004069105c
> RDX: 0001 RSI: 000febd4 RDI: c90040691000
> RBP: 0003 R08:  R09: febd404f
> R10: 7ff0 R11: 88800ee8ae40 R12: 88800ed41000
> R13:  R14: 0040 R15: feba
> FS:  () GS:88801840() 
> knlGS:
> CS:  e030 DS:  ES:  CR0: 80050033
> CR2: 807f5ea0 CR3: 12f6a000 CR4: 0660
> Call Trace:
>  e1000e_set_interrupt_capability+0xbf/0xd0 [e1000e]
>  e1000_probe+0x41f/0xdb0 [e1000e]
>  local_pci_probe+0x42/0x80
> (...)
> 
> There is pci_msi_ignore_mask variable for bypassing MSI(-X) masking on Xen
> PV, but msix_mask_all() missed checking it. Add the check there too.
> 
> Fixes: 7d5ec3d36123 ("PCI/MSI: Mask all unused MSI-X entries")
> Cc: sta...@vger.kernel.org
> Signed-off-by: Marek Marczykowski-Górecki 

Acked-by: Bjorn Helgaas 

> ---
> Cc: xen-devel@lists.xenproject.org
> 
> Changes in v2:
> - update commit message (MSI -> MSI-X)
> ---
>  drivers/pci/msi.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index e5e75331b415..3a9f4f8ad8f9 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -776,6 +776,9 @@ static void msix_mask_all(void __iomem *base, int tsize)
>   u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT;
>   int i;
>  
> + if (pci_msi_ignore_mask)
> + return;
> +
>   for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE)
>   writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL);
>  }
> -- 
> 2.31.1
> 



Re: [PATCH v2] xen/pcifront: Removed unnecessary __ref annotation

2021-08-30 Thread Bjorn Helgaas
On Mon, Aug 30, 2021 at 07:53:05PM +0200, Sergio Miguéns Iglesias wrote:
> An unnecessary "__ref" annotation was removed from the
> "drivers/pci/xen_pcifront.c" file. The function where the annotation
> was used was "pcifront_backend_changed()", which does not call any
> functions annotated as "__*init" nor "__*exit". This makes "__ref"
> unnecessary since this annotation is used to make the compiler ignore
> section miss-matches when they are not happening here in the first
> place.
> 
> In addition to the aforementioned change, some code style issues were
> fixed in the same file.

One of the Xen folks may apply this, and they may not be as nit-picky
as I am :)

If I were to apply this, I would suggest:

  - Write subject line and commit message in imperative mood.  This is
a really good guide to this and other commit message this:
https://chris.beams.io/posts/git-commit/

For example, in the subject, say "Remove" (not "Removed").  Same
in the body.  In the body, I would mention the function but not
the filename since that's obvious from the diff.

  - Split the __ref change into a separate patch from the style
changes.  The __ref removal should come first and be the absolute
minimal patch.  That makes it much easier to review, backport, and
revert if necessary.  And, if the maintainer isn't wild about
style patches, it's trivial to just ignore that patch.

Commit logs that say "Also, ..." or "In addition, ..." are always
red flags to me because they usually indicate the patch could be
split into two or more simpler patches.

  - When reviewing changes like this, I assume __ref was added in the
first place for some good reason, so I want to know why, and I
want to know when that reason changed.  So I would look for the
commit that *introduced* __ref and for the commit that removed the
need for it.  It would save me time if the log said something
like:

  956a9202cd12 ("xen-pcifront: Xen PCI frontend driver.") added
  __initrefok because pcifront_backend_changed() called
  pcifront_try_connect() and pcifront_attach_devices(), which were
  both __devinit.

  The __devinit annotations were removed by 15856ad50bf5 ("PCI:
  Remove __dev* markings"), which made __initrefok unnecessary.

  Finally, bd721ea73e1f ("treewide: replace obsolete _refok by
  __ref") replaced __initrefok with __ref.

That might be too much for a commit log, but it shows that you've
done your homework and makes it easier to review (and helps people
make similar fixes elsewhere).  If it *is* too much, it's trivial
for a maintainer to cut it out.

More notes about my idiosyncracies:
https://lore.kernel.org/r/20171026223701.ga25...@bhelgaas-glaptop.roam.corp.google.com

> Signed-off-by: Sergio Miguéns Iglesias 
> ---
>  drivers/pci/xen-pcifront.c | 30 +++---
>  1 file changed, 19 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
> index b7a8f3a1921f..427041c1e408 100644
> --- a/drivers/pci/xen-pcifront.c
> +++ b/drivers/pci/xen-pcifront.c
> @@ -115,7 +115,7 @@ static int do_pci_op(struct pcifront_device *pdev, struct 
> xen_pci_op *op)
>   struct xen_pci_op *active_op = &pdev->sh_info->op;
>   unsigned long irq_flags;
>   evtchn_port_t port = pdev->evtchn;
> - unsigned irq = pdev->irq;
> + unsigned int irq = pdev->irq;
>   s64 ns, ns_timeout;
>  
>   spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
> @@ -153,10 +153,10 @@ static int do_pci_op(struct pcifront_device *pdev, 
> struct xen_pci_op *op)
>   }
>  
>   /*
> - * We might lose backend service request since we
> - * reuse same evtchn with pci_conf backend response. So re-schedule
> - * aer pcifront service.
> - */
> +  * We might lose backend service request since we
> +  * reuse same evtchn with pci_conf backend response. So re-schedule
> +  * aer pcifront service.
> +  */
>   if (test_bit(_XEN_PCIB_active,
>   (unsigned long *)&pdev->sh_info->flags)) {
>   dev_err(&pdev->xdev->dev,
> @@ -414,7 +414,8 @@ static int pcifront_scan_bus(struct pcifront_device *pdev,
>   struct pci_dev *d;
>   unsigned int devfn;
>  
> - /* Scan the bus for functions and add.
> + /*
> +  * Scan the bus for functions and add.
>* We omit handling of PCI bridge attachment because pciback prevents
>* bridges from being exported.
>*/
> @@ -492,8 +493,10 @@ static int pcifront_scan_root(struct pcifront_device 
> *pdev,
>  
>   list_add(&bus_entry->list, &pdev->root_buses);
>  
> - /* pci_scan_root_bus skips devices which do not have a
> - * devfn==0. The pcifront_scan_bus enumerates all devfn. */
> + /*
> +  * pci_scan_root_bus skips devices which do not have a
> +  * devfn==0. The pcifront_scan_bus enumerates all devfn.
> +  */
>   err = p

Re: [PATCH v2] xen/pcifront: Removed unnecessary __ref annotation

2021-08-30 Thread Bjorn Helgaas
On Mon, Aug 30, 2021 at 10:14:26PM +0200, Sergio Miguéns Iglesias wrote:
> Thanks again for you answers!
> I am lerning a lot from your replys and I really appreciate it. Should I
> make a v3 patch and split that one into 2 different patches or would it
> be confusing?
> 
> I don't want to take more of your time with poor patches so I don't know
> if I should resend this one.

If it's already applied, it doesn't matter for this case.  But in this
situation I would generally post a v3 incorporating the feedback.  To
be respectful of reviewers' time, try to avoid posting more than one
or two revisions per week.  As long as you tag reposts appropriately
with v2, v3, etc (as you did), there's no confusion.

It's nice if you include a note about what changed between v1 and v2
in the cover letter or below the "---" line as was done here:

  
https://lore.kernel.org/linux-pci/8f9a13ac-8ab1-15ac-06cb-c131b488a...@siemens.com/



Re: [PATCH 10/12] xen-pcifront: this module is PV-only

2021-09-07 Thread Bjorn Helgaas
Update subject to follow conventions (use "git log --oneline
drivers/pci/Kconfig").  Should say what this patch does.

Commit log below should also say what this patch does.  Currently it's
part of the rationale for the change, but doesn't say what the patch
does.

On Tue, Sep 07, 2021 at 02:10:41PM +0200, Jan Beulich wrote:
> It's module init function does a xen_pv_domain() check first thing.
> Hence there's no point building it in non-PV configurations.

s/It's//   # xen-pcifront.o?

I see that CONFIG_XEN_PV is only mentioned in arch/x86, so
CONFIG_XEN_PV=y cannot be set on other arches.  Is the current
"depends on X86" just a reflection of that, or is it because of some
other x86 dependency in the code?

The connection between xen_pv_domain() and CONFIG_XEN_PV is not
completely obvious.

If you only build xen-pcifront.o when CONFIG_XEN_PV=y, and
xen_pv_domain() is true if and only if CONFIG_XEN_PV=y, why bother
calling xen_pv_domain() at all?

> Signed-off-by: Jan Beulich 
> 
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -110,7 +110,7 @@ config PCI_PF_STUB
>  
>  config XEN_PCIDEV_FRONTEND
>   tristate "Xen PCI Frontend"
> - depends on X86 && XEN
> + depends on XEN_PV
>   select PCI_XEN
>   select XEN_XENBUS_FRONTEND
>   default y
> 



Re: [PATCH 10/12] xen-pcifront: this module is PV-only

2021-09-07 Thread Bjorn Helgaas
On Tue, Sep 07, 2021 at 06:14:16PM +0200, Jan Beulich wrote:
> On 07.09.2021 17:30, Bjorn Helgaas wrote:
> > Update subject to follow conventions (use "git log --oneline
> > drivers/pci/Kconfig").  Should say what this patch does.
> 
> I can change that; I don't think it'll carry any different information.

It might not be different information, but if you use the same
sentence structure and formatting as all the other subject lines,
they're easier to read as a group.

> > Commit log below should also say what this patch does.  Currently it's
> > part of the rationale for the change, but doesn't say what the patch
> > does.
> 
> "There's no point building ..." to me is as good as "Don't build ...".
> But oh well, I can adjust ...
> 
> > On Tue, Sep 07, 2021 at 02:10:41PM +0200, Jan Beulich wrote:
> >> It's module init function does a xen_pv_domain() check first thing.
> >> Hence there's no point building it in non-PV configurations.
> > 
> > s/It's/ 
> I don't understand this - how is "module init function" not clear
> enough?

Saying "module init function" makes the reader do extra work to
figure out what function you are referring to.  I had to look
at drivers/pci/Makefile to find the module name, then look at
drivers/pci/xen-pcifront.c to look for the init function.
Saying pcifront_init() makes it trivial to look *there*.

> > s/building it/building /   # xen-pcifront.o?
> 
> The driver name is already part of the subject; I didn't think I
> need to repeat that one here.

Why be vague when it's so easy to be explicit and save everybody else
the effort?  Part of the disconnect here is that the subject line is
not *part* of the commit log, so the commit log should make sense even
if you can't see the subject line.  It's like an essay that should
make sense without its title.

Most of this *is* trivial, I agree.  Just minor hiccups in the process
of reading.

> > I see that CONFIG_XEN_PV is only mentioned in arch/x86, so
> > CONFIG_XEN_PV=y cannot be set on other arches.  Is the current
> > "depends on X86" just a reflection of that, or is it because of some
> > other x86 dependency in the code?
> > 
> > The connection between xen_pv_domain() and CONFIG_XEN_PV is not
> > completely obvious.
> > 
> > If you only build xen-pcifront.o when CONFIG_XEN_PV=y, and
> > xen_pv_domain() is true if and only if CONFIG_XEN_PV=y, why bother
> > calling xen_pv_domain() at all?
> 
> Because XEN_PV=y only _enables_ the kernel to run in PV mode. It
> may be enabled to also run in HVM and/or PVH modes. And it may
> then _run_ in any of the enabled modes. IOW xen_pv_domain() will
> always return false when !XEN_PV; no other implication is valid.
> I don't think this basic concept needs explaining in a simple
> patch like this. Instead I think the config option in question,
> despite living in drivers/pci/Kconfig, should be under "XEN
> HYPERVISOR INTERFACE" maintainership. I realize that's not even
> expressable in ./MAINTAINERS. I wonder why the option was put
> there in the first place, rather than in drivers/xen/Kconfig.

No doubt it's obvious to Xen experts.  Unfortunately I am not one.

Evidently there's no real dependency on the X86 arch, which makes me
wonder why the "depends on X86" was there in the first place.

Bjorn



Re: [PATCH v2 2/4] PCI: only build xen-pcifront in PV-enabled environments

2021-09-17 Thread Bjorn Helgaas
s/only/Only/ in subject

On Fri, Sep 17, 2021 at 12:48:03PM +0200, Jan Beulich wrote:
> The driver's module init function, pcifront_init(), invokes
> xen_pv_domain() first thing. That construct produces constant "false"
> when !CONFIG_XEN_PV. Hence there's no point building the driver in
> non-PV configurations.

Thanks for these bread crumbs.  xen_domain_type is set to
XEN_PV_DOMAIN only by xen_start_kernel() in enlighten_pv.c, which is
only built when CONFIG_XEN_PV=y, so even I can verify this :)

> Drop the (now implicit and generally wrong) X86 dependency: At present,
> XEN_PV con only be set when X86 is also enabled. In general an
> architecture supporting Xen PV (and PCI) would want to have this driver
> built.

s/con only/can only/

> Signed-off-by: Jan Beulich 
> Reviewed-by: Stefano Stabellini 

Acked-by: Bjorn Helgaas 

> ---
> v2: Title and description redone.
> 
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -110,7 +110,7 @@ config PCI_PF_STUB
>  
>  config XEN_PCIDEV_FRONTEND
>   tristate "Xen PCI Frontend"
> - depends on X86 && XEN
> + depends on XEN_PV
>   select PCI_XEN
>   select XEN_XENBUS_FRONTEND
>   default y
> 



Re: [PATCH] PCI/MSI: msix_setup_msi_descs: Restore logic for msi_attrib.can_mask

2022-02-10 Thread Bjorn Helgaas
[+cc Jason, since you reviewed the original commit]

On Sat, Jan 22, 2022 at 02:10:01AM +0100, Josef Johansson wrote:
> From: Josef Johansson 
> 
> PCI/MSI: msix_setup_msi_descs: Restore logic for msi_attrib.can_mask

Please match the form and style of previous subject lines (in
particular, omit "msix_setup_msi_descs:").

> Commit 71020a3c0dff4 ("PCI/MSI: Use msi_add_msi_desc()") modifies
> the logic of checking msi_attrib.can_mask, without any reason.
> 
> This commits restores that logic.

I agree, this looks like a typo in 71020a3c0dff4, but I might be
missing something, so Thomas should take a look, and I added Jason
since he reviewed it.

Since it was merged by Thomas, I'll let him take care of this, too.
If it *is* a typo, the fix looks like v5.17 material.

Before: 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/pci/msi/msi.c?id=71020a3c0dff4%5E#n522
After:  
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/pci/msi/msi.c?id=71020a3c0dff4#n520

> Fixes: 71020a3c0dff4 ("PCI/MSI: Use msi_add_msi_desc()")
> Signed-off-by: Josef Johansson 
> 
> ---
> Trying to fix a NULL BUG in the NVMe MSIX implementation I stumbled upon this 
> code,
> which ironically was what my last MSI patch resulted into.
> 
> I don't see any reason why this logic was change, nor do I have the 
> possibility
> to see if anything works with my patch or without, since the kernel crashes
> in other places.
> 
> As such this is still untested, but as far as I can tell it should restore
> functionality.
> 
> Re-sending since it was rejected by linux-...@vger.kernel.org due to HTML 
> contents.
> Sorry about that.
> 
> CC xen-devel since it very much relates to Xen kernel (via 
> pci_msi_ignore_mask).
> ---
> 
> diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
> index c19c7ca58186..146e7b9a01cc 100644
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -526,7 +526,7 @@ static int msix_setup_msi_descs(struct pci_dev *dev, void 
> __iomem *base,
>   desc.pci.msi_attrib.can_mask = !pci_msi_ignore_mask &&
>  !desc.pci.msi_attrib.is_virtual;
>  
> - if (!desc.pci.msi_attrib.can_mask) {
> + if (desc.pci.msi_attrib.can_mask) {
>   addr = pci_msix_desc_addr(&desc);
>   desc.pci.msix_ctrl = readl(addr + 
> PCI_MSIX_ENTRY_VECTOR_CTRL);
>   }
> 
> --
> 2.31.1
> 



Re: [PATCH] PCI/MSI: msix_setup_msi_descs: Restore logic for msi_attrib.can_mask

2022-02-11 Thread Bjorn Helgaas
On Fri, Feb 11, 2022 at 01:10:22AM +0100, Josef Johansson wrote:
> On 2/11/22 00:55, Bjorn Helgaas wrote:
> > On Sat, Jan 22, 2022 at 02:10:01AM +0100, Josef Johansson wrote:
> >> From: Josef Johansson 
> >>
> >> PCI/MSI: msix_setup_msi_descs: Restore logic for msi_attrib.can_mask
> > Please match the form and style of previous subject lines (in
> > particular, omit "msix_setup_msi_descs:").
> Would this subject suffice?
> PCI/MSI: Correct use of can_mask in msi_add_msi_desc()

Looks good to me!

Bjorn



Re: [patch V2 02/23] PCI/MSI: Fix pci_irq_vector()/pci_irq_get_affinity()

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:26PM +0100, Thomas Gleixner wrote:
> pci_irq_vector() and pci_irq_get_affinity() use the list position to find the
> MSI-X descriptor at a given index. That's correct for the normal case where
> the entry number is the same as the list position.
> 
> But it's wrong for cases where MSI-X was allocated with an entries array
> describing sparse entry numbers into the hardware message descriptor
> table. That's inconsistent at best.
> 
> Make it always check the entry number because that's what the zero base
> index really means. This change won't break existing users which use a
> sparse entries array for allocation because these users retrieve the Linux
> interrupt number from the entries array after allocation and none of them
> uses pci_irq_vector() or pci_irq_get_affinity().
> 
> Fixes: aff171641d18 ("PCI: Provide sensible IRQ vector alloc/free routines")
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
> V2: Fix typo in subject - Jason
> ---
>  drivers/pci/msi.c |   26 ++
>  1 file changed, 18 insertions(+), 8 deletions(-)
> 
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -1187,19 +1187,24 @@ EXPORT_SYMBOL(pci_free_irq_vectors);
>  
>  /**
>   * pci_irq_vector - return Linux IRQ number of a device vector
> - * @dev: PCI device to operate on
> - * @nr: device-relative interrupt vector index (0-based).
> + * @dev: PCI device to operate on
> + * @nr:  Interrupt vector index (0-based)
> + *
> + * @nr has the following meanings depending on the interrupt mode:
> + *   MSI-X:  The index in the MSI-X vector table
> + *   MSI:The index of the enabled MSI vectors
> + *   INTx:   Must be 0
> + *
> + * Return: The Linux interrupt number or -EINVAl if @nr is out of range.
>   */
>  int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
>  {
>   if (dev->msix_enabled) {
>   struct msi_desc *entry;
> - int i = 0;
>  
>   for_each_pci_msi_entry(entry, dev) {
> - if (i == nr)
> + if (entry->msi_attrib.entry_nr == nr)
>   return entry->irq;
> - i++;
>   }
>   WARN_ON_ONCE(1);
>   return -EINVAL;
> @@ -1223,17 +1228,22 @@ EXPORT_SYMBOL(pci_irq_vector);
>   * pci_irq_get_affinity - return the affinity of a particular MSI vector
>   * @dev: PCI device to operate on
>   * @nr:  device-relative interrupt vector index (0-based).
> + *
> + * @nr has the following meanings depending on the interrupt mode:
> + *   MSI-X:  The index in the MSI-X vector table
> + *   MSI:The index of the enabled MSI vectors
> + *   INTx:   Must be 0
> + *
> + * Return: A cpumask pointer or NULL if @nr is out of range
>   */
>  const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
>  {
>   if (dev->msix_enabled) {
>   struct msi_desc *entry;
> - int i = 0;
>  
>   for_each_pci_msi_entry(entry, dev) {
> - if (i == nr)
> + if (entry->msi_attrib.entry_nr == nr)
>   return &entry->affinity->mask;
> - i++;
>   }
>   WARN_ON_ONCE(1);
>   return NULL;
> 



Re: [patch V2 06/23] PCI/MSI: Make pci_msi_domain_write_msg() static

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:33PM +0100, Thomas Gleixner wrote:
> There is no point to have this function public as it is set by the PCI core
> anyway when a PCI/MSI irqdomain is created.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas# PCI

> ---
>  drivers/irqchip/irq-gic-v2m.c|1 -
>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |1 -
>  drivers/irqchip/irq-gic-v3-mbi.c |1 -
>  drivers/pci/msi.c|2 +-
>  include/linux/msi.h  |1 -
>  5 files changed, 1 insertion(+), 5 deletions(-)
> 
> --- a/drivers/irqchip/irq-gic-v2m.c
> +++ b/drivers/irqchip/irq-gic-v2m.c
> @@ -88,7 +88,6 @@ static struct irq_chip gicv2m_msi_irq_ch
>   .irq_mask   = gicv2m_mask_msi_irq,
>   .irq_unmask = gicv2m_unmask_msi_irq,
>   .irq_eoi= irq_chip_eoi_parent,
> - .irq_write_msi_msg  = pci_msi_domain_write_msg,
>  };
>  
>  static struct msi_domain_info gicv2m_msi_domain_info = {
> --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
> +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
> @@ -28,7 +28,6 @@ static struct irq_chip its_msi_irq_chip
>   .irq_unmask = its_unmask_msi_irq,
>   .irq_mask   = its_mask_msi_irq,
>   .irq_eoi= irq_chip_eoi_parent,
> - .irq_write_msi_msg  = pci_msi_domain_write_msg,
>  };
>  
>  static int its_pci_msi_vec_count(struct pci_dev *pdev, void *data)
> --- a/drivers/irqchip/irq-gic-v3-mbi.c
> +++ b/drivers/irqchip/irq-gic-v3-mbi.c
> @@ -171,7 +171,6 @@ static struct irq_chip mbi_msi_irq_chip
>   .irq_unmask = mbi_unmask_msi_irq,
>   .irq_eoi= irq_chip_eoi_parent,
>   .irq_compose_msi_msg= mbi_compose_msi_msg,
> - .irq_write_msi_msg  = pci_msi_domain_write_msg,
>  };
>  
>  static struct msi_domain_info mbi_msi_domain_info = {
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -1281,7 +1281,7 @@ EXPORT_SYMBOL_GPL(msi_desc_to_pci_sysdat
>   * @irq_data:Pointer to interrupt data of the MSI interrupt
>   * @msg: Pointer to the message
>   */
> -void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg)
> +static void pci_msi_domain_write_msg(struct irq_data *irq_data, struct 
> msi_msg *msg)
>  {
>   struct msi_desc *desc = irq_data_get_msi_desc(irq_data);
>  
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -455,7 +455,6 @@ void *platform_msi_get_host_data(struct
>  #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
>  
>  #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
> -void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg 
> *msg);
>  struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
>struct msi_domain_info *info,
>struct irq_domain *parent);
> 



Re: [patch V2 07/23] PCI/MSI: Remove msi_desc_to_pci_sysdata()

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:34PM +0100, Thomas Gleixner wrote:
> Last user is gone long ago.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi.c   |8 
>  include/linux/msi.h |5 -
>  2 files changed, 13 deletions(-)
> 
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -1267,14 +1267,6 @@ struct pci_dev *msi_desc_to_pci_dev(stru
>  }
>  EXPORT_SYMBOL(msi_desc_to_pci_dev);
>  
> -void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
> -{
> - struct pci_dev *dev = msi_desc_to_pci_dev(desc);
> -
> - return dev->bus->sysdata;
> -}
> -EXPORT_SYMBOL_GPL(msi_desc_to_pci_sysdata);
> -
>  #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
>  /**
>   * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -218,13 +218,8 @@ static inline void msi_desc_set_iommu_co
>   for_each_msi_entry((desc), &(pdev)->dev)
>  
>  struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc);
> -void *msi_desc_to_pci_sysdata(struct msi_desc *desc);
>  void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
>  #else /* CONFIG_PCI_MSI */
> -static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
> -{
> - return NULL;
> -}
>  static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
>  {
>  }
> 



Re: [patch V2 08/23] PCI/sysfs: Use pci_irq_vector()

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:36PM +0100, Thomas Gleixner wrote:
> instead of fiddling with msi descriptors.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

s/msi/MSI/ above if you have a chance.  Nice cleanup, thanks!

> ---
>  drivers/pci/pci-sysfs.c |7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -62,11 +62,8 @@ static ssize_t irq_show(struct device *d
>* For MSI, show the first MSI IRQ; for all other cases including
>* MSI-X, show the legacy INTx IRQ.
>*/
> - if (pdev->msi_enabled) {
> - struct msi_desc *desc = first_pci_msi_entry(pdev);
> -
> - return sysfs_emit(buf, "%u\n", desc->irq);
> - }
> + if (pdev->msi_enabled)
> + return sysfs_emit(buf, "%u\n", pci_irq_vector(pdev, 0));
>  #endif
>  
>   return sysfs_emit(buf, "%u\n", pdev->irq);
> 



Re: [patch V2 12/23] PCI/MSI: Make arch_restore_msi_irqs() less horrible.

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:42PM +0100, Thomas Gleixner wrote:
> Make arch_restore_msi_irqs() return a boolean which indicates whether the
> core code should restore the MSI message or not. Get rid of the indirection
> in x86.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 
> Cc: x...@kernel.org
> Cc: xen-devel@lists.xenproject.org
> Cc: Christian Borntraeger 
> Cc: Heiko Carstens 

Acked-by: Bjorn Helgaas# PCI

> ---
>  arch/s390/pci/pci_irq.c   |4 +-
>  arch/x86/include/asm/x86_init.h   |6 ---
>  arch/x86/include/asm/xen/hypervisor.h |8 +
>  arch/x86/kernel/apic/msi.c|6 +++
>  arch/x86/kernel/x86_init.c|   12 ---
>  arch/x86/pci/xen.c|   13 
>  drivers/pci/msi.c |   54 
> +++---
>  include/linux/msi.h   |7 +---
>  8 files changed, 45 insertions(+), 65 deletions(-)
> 
> --- a/arch/s390/pci/pci_irq.c
> +++ b/arch/s390/pci/pci_irq.c
> @@ -387,13 +387,13 @@ void arch_teardown_msi_irqs(struct pci_d
>   airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, 
> zdev->msi_nr_irqs);
>  }
>  
> -void arch_restore_msi_irqs(struct pci_dev *pdev)
> +bool arch_restore_msi_irqs(struct pci_dev *pdev)
>  {
>   struct zpci_dev *zdev = to_zpci(pdev);
>  
>   if (!zdev->irqs_registered)
>   zpci_set_irq(zdev);
> - default_restore_msi_irqs(pdev);
> + return true;
>  }
>  
>  static struct airq_struct zpci_airq = {
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -289,12 +289,6 @@ struct x86_platform_ops {
>   struct x86_hyper_runtime hyper;
>  };
>  
> -struct pci_dev;
> -
> -struct x86_msi_ops {
> - void (*restore_msi_irqs)(struct pci_dev *dev);
> -};
> -
>  struct x86_apic_ops {
>   unsigned int(*io_apic_read)   (unsigned int apic, unsigned int reg);
>   void(*restore)(void);
> --- a/arch/x86/include/asm/xen/hypervisor.h
> +++ b/arch/x86/include/asm/xen/hypervisor.h
> @@ -57,6 +57,14 @@ static inline bool __init xen_x2apic_par
>  }
>  #endif
>  
> +struct pci_dev;
> +
> +#ifdef CONFIG_XEN_DOM0
> +bool xen_initdom_restore_msi(struct pci_dev *dev);
> +#else
> +static inline bool xen_initdom_restore_msi(struct pci_dev *dev) { return 
> true; }
> +#endif
> +
>  #ifdef CONFIG_HOTPLUG_CPU
>  void xen_arch_register_cpu(int num);
>  void xen_arch_unregister_cpu(int num);
> --- a/arch/x86/kernel/apic/msi.c
> +++ b/arch/x86/kernel/apic/msi.c
> @@ -19,6 +19,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  struct irq_domain *x86_pci_msi_default_domain __ro_after_init;
>  
> @@ -345,3 +346,8 @@ void dmar_free_hwirq(int irq)
>   irq_domain_free_irqs(irq, 1);
>  }
>  #endif
> +
> +bool arch_restore_msi_irqs(struct pci_dev *dev)
> +{
> + return xen_initdom_restore_msi(dev);
> +}
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -145,18 +145,6 @@ struct x86_platform_ops x86_platform __r
>  
>  EXPORT_SYMBOL_GPL(x86_platform);
>  
> -#if defined(CONFIG_PCI_MSI)
> -struct x86_msi_ops x86_msi __ro_after_init = {
> - .restore_msi_irqs   = default_restore_msi_irqs,
> -};
> -
> -/* MSI arch specific hooks */
> -void arch_restore_msi_irqs(struct pci_dev *dev)
> -{
> - x86_msi.restore_msi_irqs(dev);
> -}
> -#endif
> -
>  struct x86_apic_ops x86_apic_ops __ro_after_init = {
>   .io_apic_read   = native_io_apic_read,
>   .restore= native_restore_boot_irq_mode,
> --- a/arch/x86/pci/xen.c
> +++ b/arch/x86/pci/xen.c
> @@ -351,10 +351,13 @@ static int xen_initdom_setup_msi_irqs(st
>   return ret;
>  }
>  
> -static void xen_initdom_restore_msi_irqs(struct pci_dev *dev)
> +bool xen_initdom_restore_msi(struct pci_dev *dev)
>  {
>   int ret = 0;
>  
> + if (!xen_initial_domain())
> + return true;
> +
>   if (pci_seg_supported) {
>   struct physdev_pci_device restore_ext;
>  
> @@ -375,10 +378,10 @@ static void xen_initdom_restore_msi_irqs
>   ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
>   WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
>   }
> + return false;
>  }
>  #else /* CONFIG_XEN_PV_DOM0 */
>  #define xen_initdom_setup_msi_irqs   NULL
> -#define xen_initdom_restore_msi_irqs NULL
>  #endif /* !CONFIG_XEN_PV_DOM0 */
>  
>  static void xen_teardown_msi_irqs(struct pci_dev *dev)
> @@ -466,12 +469,10 @

Re: [patch V2 13/23] PCI/MSI: Cleanup include zoo

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:44PM +0100, Thomas Gleixner wrote:
> Get rid of the pile of unneeded includes which accumulated over time.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

Nice, thanks!

> ---
> V2: Address build fail on powerpc - Cedric
> ---
>  drivers/pci/msi.c |   16 
>  1 file changed, 4 insertions(+), 12 deletions(-)
> 
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -7,22 +7,14 @@
>   * Copyright (C) 2016 Christoph Hellwig.
>   */
>  
> +#include 
>  #include 
> -#include 
> -#include 
> -#include 
>  #include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> +#include 
>  #include 
> +#include 
>  #include 
> +#include 
>  
>  #include "pci.h"
>  
> 



Re: [patch V2 14/23] PCI/MSI: Make msix_update_entries() smarter

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:46PM +0100, Thomas Gleixner wrote:
> No need to walk the descriptors and check for each one whether the entries
> pointer function argument is NULL. Do it once.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi.c |4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -642,8 +642,8 @@ static void msix_update_entries(struct p
>  {
>   struct msi_desc *entry;
>  
> - for_each_pci_msi_entry(entry, dev) {
> - if (entries) {
> + if (entries) {
> + for_each_pci_msi_entry(entry, dev) {
>   entries->vector = entry->irq;
>   entries++;
>   }
> 



Re: [patch V2 17/23] PCI/MSI: Split out !IRQDOMAIN code

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:51PM +0100, Thomas Gleixner wrote:
> Split out the non irqdomain code into its own file.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
> V2: Add proper includes and fix variable name - Cedric
> ---
>  drivers/pci/msi/Makefile |5 ++--
>  drivers/pci/msi/legacy.c |   52 
> +++
>  drivers/pci/msi/msi.c|   46 -
>  3 files changed, 55 insertions(+), 48 deletions(-)
> 
> --- a/drivers/pci/msi/Makefile
> +++ b/drivers/pci/msi/Makefile
> @@ -1,5 +1,6 @@
>  # SPDX-License-Identifier: GPL-2.0
>  #
>  # Makefile for the PCI/MSI
> -obj-$(CONFIG_PCI)+= pcidev_msi.o
> -obj-$(CONFIG_PCI_MSI)+= msi.o
> +obj-$(CONFIG_PCI)+= pcidev_msi.o
> +obj-$(CONFIG_PCI_MSI)+= msi.o
> +obj-$(CONFIG_PCI_MSI_ARCH_FALLBACKS) += legacy.o
> --- /dev/null
> +++ b/drivers/pci/msi/legacy.c
> @@ -0,0 +1,52 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * PCI Message Signaled Interrupt (MSI).
> + *
> + * Legacy architecture specific setup and teardown mechanism.
> + */
> +#include 
> +#include 
> +
> +/* Arch hooks */
> +int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
> +{
> + return -EINVAL;
> +}
> +
> +void __weak arch_teardown_msi_irq(unsigned int irq)
> +{
> +}
> +
> +int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
> +{
> + struct msi_desc *desc;
> + int ret;
> +
> + /*
> +  * If an architecture wants to support multiple MSI, it needs to
> +  * override arch_setup_msi_irqs()
> +  */
> + if (type == PCI_CAP_ID_MSI && nvec > 1)
> + return 1;
> +
> + for_each_pci_msi_entry(desc, dev) {
> + ret = arch_setup_msi_irq(dev, desc);
> + if (ret)
> + return ret < 0 ? ret : -ENOSPC;
> + }
> +
> + return 0;
> +}
> +
> +void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
> +{
> + struct msi_desc *desc;
> + int i;
> +
> + for_each_pci_msi_entry(desc, dev) {
> + if (desc->irq) {
> + for (i = 0; i < desc->nvec_used; i++)
> + arch_teardown_msi_irq(desc->irq + i);
> + }
> + }
> +}
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -50,52 +50,6 @@ static void pci_msi_teardown_msi_irqs(st
>  #define pci_msi_teardown_msi_irqsarch_teardown_msi_irqs
>  #endif
>  
> -#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
> -/* Arch hooks */
> -int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
> -{
> - return -EINVAL;
> -}
> -
> -void __weak arch_teardown_msi_irq(unsigned int irq)
> -{
> -}
> -
> -int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
> -{
> - struct msi_desc *entry;
> - int ret;
> -
> - /*
> -  * If an architecture wants to support multiple MSI, it needs to
> -  * override arch_setup_msi_irqs()
> -  */
> - if (type == PCI_CAP_ID_MSI && nvec > 1)
> - return 1;
> -
> - for_each_pci_msi_entry(entry, dev) {
> - ret = arch_setup_msi_irq(dev, entry);
> - if (ret < 0)
> - return ret;
> - if (ret > 0)
> - return -ENOSPC;
> - }
> -
> - return 0;
> -}
> -
> -void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
> -{
> - int i;
> - struct msi_desc *entry;
> -
> - for_each_pci_msi_entry(entry, dev)
> - if (entry->irq)
> - for (i = 0; i < entry->nvec_used; i++)
> - arch_teardown_msi_irq(entry->irq + i);
> -}
> -#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
> -
>  /*
>   * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
>   * mask all MSI interrupts by clearing the MSI enable bit does not work
> 



Re: [patch V2 18/23] PCI/MSI: Split out irqdomain code

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:52PM +0100, Thomas Gleixner wrote:
> Move the irqdomain specific code into it's own file.

s/it's/its/

> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/Makefile|1 
>  drivers/pci/msi/irqdomain.c |  279 ++
>  drivers/pci/msi/legacy.c|   13 +
>  drivers/pci/msi/msi.c   |  319 
> +---
>  drivers/pci/msi/msi.h   |   39 +
>  include/linux/msi.h |   11 -
>  6 files changed, 340 insertions(+), 322 deletions(-)
> 
> --- a/drivers/pci/msi/Makefile
> +++ b/drivers/pci/msi/Makefile
> @@ -3,4 +3,5 @@
>  # Makefile for the PCI/MSI
>  obj-$(CONFIG_PCI)+= pcidev_msi.o
>  obj-$(CONFIG_PCI_MSI)+= msi.o
> +obj-$(CONFIG_PCI_MSI_IRQ_DOMAIN) += irqdomain.o
>  obj-$(CONFIG_PCI_MSI_ARCH_FALLBACKS) += legacy.o
> --- /dev/null
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -0,0 +1,279 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * PCI Message Signaled Interrupt (MSI) - irqdomain support
> + */
> +#include 
> +#include 
> +#include 
> +
> +#include "msi.h"
> +
> +int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
> +{
> + struct irq_domain *domain;
> +
> + domain = dev_get_msi_domain(&dev->dev);
> + if (domain && irq_domain_is_hierarchy(domain))
> + return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
> +
> + return pci_msi_legacy_setup_msi_irqs(dev, nvec, type);
> +}
> +
> +void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
> +{
> + struct irq_domain *domain;
> +
> + domain = dev_get_msi_domain(&dev->dev);
> + if (domain && irq_domain_is_hierarchy(domain))
> + msi_domain_free_irqs(domain, &dev->dev);
> + else
> + pci_msi_legacy_teardown_msi_irqs(dev);
> +}
> +
> +/**
> + * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space
> + * @irq_data:Pointer to interrupt data of the MSI interrupt
> + * @msg: Pointer to the message
> + */
> +static void pci_msi_domain_write_msg(struct irq_data *irq_data, struct 
> msi_msg *msg)
> +{
> + struct msi_desc *desc = irq_data_get_msi_desc(irq_data);
> +
> + /*
> +  * For MSI-X desc->irq is always equal to irq_data->irq. For
> +  * MSI only the first interrupt of MULTI MSI passes the test.
> +  */
> + if (desc->irq == irq_data->irq)
> + __pci_write_msi_msg(desc, msg);
> +}
> +
> +/**
> + * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source
> + * @desc:Pointer to the MSI descriptor
> + *
> + * The ID number is only used within the irqdomain.
> + */
> +static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc)
> +{
> + struct pci_dev *dev = msi_desc_to_pci_dev(desc);
> +
> + return (irq_hw_number_t)desc->pci.msi_attrib.entry_nr |
> + pci_dev_id(dev) << 11 |
> + (pci_domain_nr(dev->bus) & 0x) << 27;
> +}
> +
> +static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc)
> +{
> + return !desc->pci.msi_attrib.is_msix && desc->nvec_used > 1;
> +}
> +
> +/**
> + * pci_msi_domain_check_cap - Verify that @domain supports the capabilities
> + * for @dev
> + * @domain:  The interrupt domain to check
> + * @info:The domain info for verification
> + * @dev: The device to check
> + *
> + * Returns:
> + *  0 if the functionality is supported
> + *  1 if Multi MSI is requested, but the domain does not support it
> + *  -ENOTSUPP otherwise
> + */
> +int pci_msi_domain_check_cap(struct irq_domain *domain,
> +  struct msi_domain_info *info, struct device *dev)
> +{
> + struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev));
> +
> + /* Special handling to support __pci_enable_msi_range() */
> + if (pci_msi_desc_is_multi_msi(desc) &&
> + !(info->flags & MSI_FLAG_MULTI_PCI_MSI))
> + return 1;
> + else if (desc->pci.msi_attrib.is_msix && !(info->flags & 
> MSI_FLAG_PCI_MSIX))
> + return -ENOTSUPP;
> +
> + return 0;
> +}
> +
> +static int pci_msi_domain_handle_error(struct irq_domain *domain,
> +struct msi_desc *desc, int error)
> +{
> + /* Special handling to support __pci_enable_msi_range() */
> + if (pci_msi_desc_is_

Re: [patch V2 19/23] PCI/MSI: Sanitize MSIX table map handling

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:54PM +0100, Thomas Gleixner wrote:
> Unmapping the MSIX base mapping in the loops which allocate/free MSI
> desciptors is daft and in the way of allowing runtime expansion of MSI-X
> descriptors.

s/MSIX/MSI-X/ (subject and first use in commit log)
s/desciptors/descriptors/

> Store the mapping in struct pci_dev and free it after freeing the MSI-X
> descriptors.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/msi.c |   18 --
>  include/linux/pci.h   |1 +
>  2 files changed, 9 insertions(+), 10 deletions(-)
> 
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -241,14 +241,14 @@ static void free_msi_irqs(struct pci_dev
>   pci_msi_teardown_msi_irqs(dev);
>  
>   list_for_each_entry_safe(entry, tmp, msi_list, list) {
> - if (entry->pci.msi_attrib.is_msix) {
> - if (list_is_last(&entry->list, msi_list))
> - iounmap(entry->pci.mask_base);
> - }
> -
>   list_del(&entry->list);
>   free_msi_entry(entry);
>   }
> +
> + if (dev->msix_base) {
> + iounmap(dev->msix_base);
> + dev->msix_base = NULL;
> + }
>  }
>  
>  static void pci_intx_for_msi(struct pci_dev *dev, int enable)
> @@ -501,10 +501,6 @@ static int msix_setup_entries(struct pci
>   for (i = 0, curmsk = masks; i < nvec; i++) {
>   entry = alloc_msi_entry(&dev->dev, 1, curmsk);
>   if (!entry) {
> - if (!i)
> - iounmap(base);
> - else
> - free_msi_irqs(dev);
>   /* No enough memory. Don't try again */
>   ret = -ENOMEM;
>   goto out;
> @@ -602,12 +598,14 @@ static int msix_capability_init(struct p
>   goto out_disable;
>   }
>  
> + dev->msix_base = base;
> +
>   /* Ensure that all table entries are masked. */
>   msix_mask_all(base, tsize);
>  
>   ret = msix_setup_entries(dev, base, entries, nvec, affd);
>   if (ret)
> - goto out_disable;
> + goto out_free;
>  
>   ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
>   if (ret)
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -473,6 +473,7 @@ struct pci_dev {
>   u8  ptm_granularity;
>  #endif
>  #ifdef CONFIG_PCI_MSI
> + void __iomem*msix_base;
>   const struct attribute_group **msi_irq_groups;
>  #endif
>   struct pci_vpd  vpd;
> 



Re: [patch V2 20/23] PCI/MSI: Move msi_lock to struct pci_dev

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:56PM +0100, Thomas Gleixner wrote:
> It's only required for PCI/MSI. So no point in having it in every struct
> device.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Bjorn Helgaas 

> ---
> V2: New patch
> ---
>  drivers/base/core.c|1 -
>  drivers/pci/msi/msi.c  |2 +-
>  drivers/pci/probe.c|4 +++-
>  include/linux/device.h |2 --
>  include/linux/pci.h|1 +
>  5 files changed, 5 insertions(+), 5 deletions(-)
> 
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -2875,7 +2875,6 @@ void device_initialize(struct device *de
>   device_pm_init(dev);
>   set_dev_node(dev, NUMA_NO_NODE);
>  #ifdef CONFIG_GENERIC_MSI_IRQ
> - raw_spin_lock_init(&dev->msi_lock);
>   INIT_LIST_HEAD(&dev->msi_list);
>  #endif
>   INIT_LIST_HEAD(&dev->links.consumers);
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -18,7 +18,7 @@ int pci_msi_ignore_mask;
>  
>  static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, 
> u32 set)
>  {
> - raw_spinlock_t *lock = &desc->dev->msi_lock;
> + raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock;
>   unsigned long flags;
>  
>   if (!desc->pci.msi_attrib.can_mask)
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -2311,7 +2311,9 @@ struct pci_dev *pci_alloc_dev(struct pci
>   INIT_LIST_HEAD(&dev->bus_list);
>   dev->dev.type = &pci_dev_type;
>   dev->bus = pci_bus_get(bus);
> -
> +#ifdef CONFIG_PCI_MSI
> + raw_spin_lock_init(&dev->msi_lock);
> +#endif
>   return dev;
>  }
>  EXPORT_SYMBOL(pci_alloc_dev);
> --- a/include/linux/device.h
> +++ b/include/linux/device.h
> @@ -407,7 +407,6 @@ struct dev_links_info {
>   * @em_pd:   device's energy model performance domain
>   * @pins:For device pin management.
>   *   See Documentation/driver-api/pin-control.rst for details.
> - * @msi_lock:Lock to protect MSI mask cache and mask register
>   * @msi_list:Hosts MSI descriptors
>   * @msi_domain: The generic MSI domain this device is using.
>   * @numa_node:   NUMA node this device is close to.
> @@ -508,7 +507,6 @@ struct device {
>   struct dev_pin_info *pins;
>  #endif
>  #ifdef CONFIG_GENERIC_MSI_IRQ
> - raw_spinlock_t  msi_lock;
>   struct list_headmsi_list;
>  #endif
>  #ifdef CONFIG_DMA_OPS
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -474,6 +474,7 @@ struct pci_dev {
>  #endif
>  #ifdef CONFIG_PCI_MSI
>   void __iomem*msix_base;
> + raw_spinlock_t  msi_lock;
>   const struct attribute_group **msi_irq_groups;
>  #endif
>   struct pci_vpd  vpd;
> 



Re: [patch V2 21/23] PCI/MSI: Make pci_msi_domain_check_cap() static

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:57PM +0100, Thomas Gleixner wrote:
> No users outside of that file.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/irqdomain.c |5 +++--
>  include/linux/msi.h |2 --
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 
> --- a/drivers/pci/msi/irqdomain.c
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -79,8 +79,9 @@ static inline bool pci_msi_desc_is_multi
>   *  1 if Multi MSI is requested, but the domain does not support it
>   *  -ENOTSUPP otherwise
>   */
> -int pci_msi_domain_check_cap(struct irq_domain *domain,
> -  struct msi_domain_info *info, struct device *dev)
> +static int pci_msi_domain_check_cap(struct irq_domain *domain,
> + struct msi_domain_info *info,
> + struct device *dev)
>  {
>   struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev));
>  
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -439,8 +439,6 @@ void *platform_msi_get_host_data(struct
>  struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
>struct msi_domain_info *info,
>struct irq_domain *parent);
> -int pci_msi_domain_check_cap(struct irq_domain *domain,
> -  struct msi_domain_info *info, struct device *dev);
>  u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev 
> *pdev);
>  struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev);
>  bool pci_dev_has_special_msi_domain(struct pci_dev *pdev);
> 



Re: [patch V2 23/23] PCI/MSI: Move descriptor counting on allocation fail to the legacy code

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:28:00PM +0100, Thomas Gleixner wrote:
> The irqdomain code already returns the information. Move the loop to the
> legacy code.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/legacy.c |   20 +++-
>  drivers/pci/msi/msi.c|   19 +--
>  2 files changed, 20 insertions(+), 19 deletions(-)
> 
> --- a/drivers/pci/msi/legacy.c
> +++ b/drivers/pci/msi/legacy.c
> @@ -50,9 +50,27 @@ void __weak arch_teardown_msi_irqs(struc
>   }
>  }
>  
> +static int pci_msi_setup_check_result(struct pci_dev *dev, int type, int ret)
> +{
> + struct msi_desc *entry;
> + int avail = 0;
> +
> + if (type != PCI_CAP_ID_MSIX || ret >= 0)
> + return ret;
> +
> + /* Scan the MSI descriptors for successfully allocated ones. */
> + for_each_pci_msi_entry(entry, dev) {
> + if (entry->irq != 0)
> + avail++;
> + }
> + return avail ? avail : ret;
> +}
> +
>  int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
>  {
> - return arch_setup_msi_irqs(dev, nvec, type);
> + int ret = arch_setup_msi_irqs(dev, nvec, type);
> +
> + return pci_msi_setup_check_result(dev, type, ret);
>  }
>  
>  void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev)
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -609,7 +609,7 @@ static int msix_capability_init(struct p
>  
>   ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
>   if (ret)
> - goto out_avail;
> + goto out_free;
>  
>   /* Check if all MSI entries honor device restrictions */
>   ret = msi_verify_entries(dev);
> @@ -634,23 +634,6 @@ static int msix_capability_init(struct p
>   pcibios_free_irq(dev);
>   return 0;
>  
> -out_avail:
> - if (ret < 0) {
> - /*
> -  * If we had some success, report the number of IRQs
> -  * we succeeded in setting up.
> -  */
> - struct msi_desc *entry;
> - int avail = 0;
> -
> - for_each_pci_msi_entry(entry, dev) {
> - if (entry->irq != 0)
> - avail++;
> - }
> - if (avail != 0)
> - ret = avail;
> - }
> -
>  out_free:
>   free_msi_irqs(dev);
>  
> 



Re: [patch V2 03/36] PCI/MSI: Allocate MSI device data on first use

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:39:00PM +0100, Thomas Gleixner wrote:
> Allocate MSI device data on first use, i.e. when a PCI driver invokes one
> of the PCI/MSI enablement functions.
> 
> Signed-off-by: Thomas Gleixner 
> Reviewed-by: Greg Kroah-Hartman 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/msi.c |   20 +++-
>  1 file changed, 15 insertions(+), 5 deletions(-)
> 
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -889,10 +889,12 @@ static int __pci_enable_msi_range(struct
>  /* deprecated, don't use */
>  int pci_enable_msi(struct pci_dev *dev)
>  {
> - int rc = __pci_enable_msi_range(dev, 1, 1, NULL);
> - if (rc < 0)
> - return rc;
> - return 0;
> + int rc = msi_setup_device_data(&dev->dev);
> +
> + if (!rc)
> + rc = __pci_enable_msi_range(dev, 1, 1, NULL);
> +
> + return rc < 0 ? rc : 0;
>  }
>  EXPORT_SYMBOL(pci_enable_msi);
>  
> @@ -947,7 +949,11 @@ static int __pci_enable_msix_range(struc
>  int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
>   int minvec, int maxvec)
>  {
> - return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
> + int ret = msi_setup_device_data(&dev->dev);
> +
> + if (!ret)
> + ret = __pci_enable_msix_range(dev, entries, minvec, maxvec, 
> NULL, 0);
> + return ret;
>  }
>  EXPORT_SYMBOL(pci_enable_msix_range);
>  
> @@ -974,8 +980,12 @@ int pci_alloc_irq_vectors_affinity(struc
>  struct irq_affinity *affd)
>  {
>   struct irq_affinity msi_default_affd = {0};
> + int ret = msi_setup_device_data(&dev->dev);
>   int nvecs = -ENOSPC;
>  
> + if (ret)
> + return ret;
> +
>   if (flags & PCI_IRQ_AFFINITY) {
>   if (!affd)
>   affd = &msi_default_affd;
> 



Re: [patch V2 08/36] PCI/MSI: Let the irq code handle sysfs groups

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:39:09PM +0100, Thomas Gleixner wrote:
> Set the domain info flag which makes the core code handle sysfs groups and
> put an explicit invocation into the legacy code.
> 
> Signed-off-by: Thomas Gleixner 
> Reviewed-by: Greg Kroah-Hartman 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/irqdomain.c |2 +-
>  drivers/pci/msi/legacy.c|6 +-
>  drivers/pci/msi/msi.c   |   23 ---
>  include/linux/pci.h |1 -
>  4 files changed, 6 insertions(+), 26 deletions(-)
> 
> --- a/drivers/pci/msi/irqdomain.c
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -159,7 +159,7 @@ struct irq_domain *pci_msi_create_irq_do
>   if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
>   pci_msi_domain_update_chip_ops(info);
>  
> - info->flags |= MSI_FLAG_ACTIVATE_EARLY;
> + info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS;
>   if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
>   info->flags |= MSI_FLAG_MUST_REACTIVATE;
>  
> --- a/drivers/pci/msi/legacy.c
> +++ b/drivers/pci/msi/legacy.c
> @@ -70,10 +70,14 @@ int pci_msi_legacy_setup_msi_irqs(struct
>  {
>   int ret = arch_setup_msi_irqs(dev, nvec, type);
>  
> - return pci_msi_setup_check_result(dev, type, ret);
> + ret = pci_msi_setup_check_result(dev, type, ret);
> + if (!ret)
> + ret = msi_device_populate_sysfs(&dev->dev);
> + return ret;
>  }
>  
>  void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev)
>  {
> + msi_device_destroy_sysfs(&dev->dev);
>   arch_teardown_msi_irqs(dev);
>  }
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -233,11 +233,6 @@ static void free_msi_irqs(struct pci_dev
>   for (i = 0; i < entry->nvec_used; i++)
>   BUG_ON(irq_has_action(entry->irq + i));
>  
> - if (dev->msi_irq_groups) {
> - msi_destroy_sysfs(&dev->dev, dev->msi_irq_groups);
> - dev->msi_irq_groups = NULL;
> - }
> -
>   pci_msi_teardown_msi_irqs(dev);
>  
>   list_for_each_entry_safe(entry, tmp, msi_list, list) {
> @@ -417,7 +412,6 @@ static int msi_verify_entries(struct pci
>  static int msi_capability_init(struct pci_dev *dev, int nvec,
>  struct irq_affinity *affd)
>  {
> - const struct attribute_group **groups;
>   struct msi_desc *entry;
>   int ret;
>  
> @@ -441,14 +435,6 @@ static int msi_capability_init(struct pc
>   if (ret)
>   goto err;
>  
> - groups = msi_populate_sysfs(&dev->dev);
> - if (IS_ERR(groups)) {
> - ret = PTR_ERR(groups);
> - goto err;
> - }
> -
> - dev->msi_irq_groups = groups;
> -
>   /* Set MSI enabled bits */
>   pci_intx_for_msi(dev, 0);
>   pci_msi_set_enable(dev, 1);
> @@ -576,7 +562,6 @@ static void msix_mask_all(void __iomem *
>  static int msix_capability_init(struct pci_dev *dev, struct msix_entry 
> *entries,
>   int nvec, struct irq_affinity *affd)
>  {
> - const struct attribute_group **groups;
>   void __iomem *base;
>   int ret, tsize;
>   u16 control;
> @@ -618,14 +603,6 @@ static int msix_capability_init(struct p
>  
>   msix_update_entries(dev, entries);
>  
> - groups = msi_populate_sysfs(&dev->dev);
> - if (IS_ERR(groups)) {
> - ret = PTR_ERR(groups);
> - goto out_free;
> - }
> -
> - dev->msi_irq_groups = groups;
> -
>   /* Set MSI-X enabled bits and unmask the function */
>   pci_intx_for_msi(dev, 0);
>   dev->msix_enabled = 1;
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -475,7 +475,6 @@ struct pci_dev {
>  #ifdef CONFIG_PCI_MSI
>   void __iomem*msix_base;
>   raw_spinlock_t  msi_lock;
> - const struct attribute_group **msi_irq_groups;
>  #endif
>   struct pci_vpd  vpd;
>  #ifdef CONFIG_PCIE_DPC
> 



Re: [patch V2 17/36] PCI/MSI: Use msi_desc::msi_index

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:39:23PM +0100, Thomas Gleixner wrote:
> The usage of msi_desc::pci::entry_nr is confusing at best. It's the index
> into the MSI[X] descriptor table.
> 
> Use msi_desc::msi_index which is shared between all MSI incarnations
> instead of having a PCI specific storage for no value.
> 
> Signed-off-by: Thomas Gleixner 
> Reviewed-by: Greg Kroah-Hartman 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  arch/powerpc/platforms/pseries/msi.c |4 ++--
>  arch/x86/pci/xen.c   |2 +-
>  drivers/pci/msi/irqdomain.c  |2 +-
>  drivers/pci/msi/msi.c|   20 
>  drivers/pci/xen-pcifront.c   |2 +-
>  include/linux/msi.h  |2 --
>  6 files changed, 13 insertions(+), 19 deletions(-)
> 
> --- a/arch/powerpc/platforms/pseries/msi.c
> +++ b/arch/powerpc/platforms/pseries/msi.c
> @@ -332,7 +332,7 @@ static int check_msix_entries(struct pci
>  
>   expected = 0;
>   for_each_pci_msi_entry(entry, pdev) {
> - if (entry->pci.msi_attrib.entry_nr != expected) {
> + if (entry->msi_index != expected) {
>   pr_debug("rtas_msi: bad MSI-X entries.\n");
>   return -EINVAL;
>   }
> @@ -580,7 +580,7 @@ static int pseries_irq_domain_alloc(stru
>   int hwirq;
>   int i, ret;
>  
> - hwirq = rtas_query_irq_number(pci_get_pdn(pdev), 
> desc->pci.msi_attrib.entry_nr);
> + hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index);
>   if (hwirq < 0) {
>   dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
>   return hwirq;
> --- a/arch/x86/pci/xen.c
> +++ b/arch/x86/pci/xen.c
> @@ -306,7 +306,7 @@ static int xen_initdom_setup_msi_irqs(st
>   return -EINVAL;
>  
>   map_irq.table_base = pci_resource_start(dev, bir);
> - map_irq.entry_nr = msidesc->pci.msi_attrib.entry_nr;
> + map_irq.entry_nr = msidesc->msi_index;
>   }
>  
>   ret = -EINVAL;
> --- a/drivers/pci/msi/irqdomain.c
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -57,7 +57,7 @@ static irq_hw_number_t pci_msi_domain_ca
>  {
>   struct pci_dev *dev = msi_desc_to_pci_dev(desc);
>  
> - return (irq_hw_number_t)desc->pci.msi_attrib.entry_nr |
> + return (irq_hw_number_t)desc->msi_index |
>   pci_dev_id(dev) << 11 |
>   (pci_domain_nr(dev->bus) & 0x) << 27;
>  }
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -44,7 +44,7 @@ static inline void pci_msi_unmask(struct
>  
>  static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
>  {
> - return desc->pci.mask_base + desc->pci.msi_attrib.entry_nr * 
> PCI_MSIX_ENTRY_SIZE;
> + return desc->pci.mask_base + desc->msi_index * PCI_MSIX_ENTRY_SIZE;
>  }
>  
>  /*
> @@ -356,13 +356,10 @@ msi_setup_entry(struct pci_dev *dev, int
>   if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
>   control |= PCI_MSI_FLAGS_MASKBIT;
>  
> - entry->pci.msi_attrib.is_msix   = 0;
> - entry->pci.msi_attrib.is_64 = !!(control & 
> PCI_MSI_FLAGS_64BIT);
> - entry->pci.msi_attrib.is_virtual= 0;
> - entry->pci.msi_attrib.entry_nr  = 0;
> + entry->pci.msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT);
>   entry->pci.msi_attrib.can_mask  = !pci_msi_ignore_mask &&
> !!(control & PCI_MSI_FLAGS_MASKBIT);
> - entry->pci.msi_attrib.default_irq   = dev->irq; /* Save IOAPIC 
> IRQ */
> + entry->pci.msi_attrib.default_irq = dev->irq;
>   entry->pci.msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
>   entry->pci.msi_attrib.multiple  = ilog2(__roundup_pow_of_two(nvec));
>  
> @@ -496,12 +493,11 @@ static int msix_setup_entries(struct pci
>   entry->pci.msi_attrib.is_64 = 1;
>  
>   if (entries)
> - entry->pci.msi_attrib.entry_nr = entries[i].entry;
> + entry->msi_index = entries[i].entry;
>   else
> - entry->pci.msi_attrib.entry_nr = i;
> + entry->msi_index = i;
>  
> - entry->pci.msi_attrib.is_virtual =
> - entry->pci.msi_attrib.entry_nr >= vec_count;
> + entry->pci.msi_attrib.is_virtual = entry->msi_index >= 

Re: [patch V2 19/36] PCI/MSI: Store properties in device::msi::data

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:39:26PM +0100, Thomas Gleixner wrote:
> Store the properties which are interesting for various places so the MSI
> descriptor fiddling can be removed.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Bjorn Helgaas 

> ---
> V2: Use the setter function
> ---
>  drivers/pci/msi/msi.c |8 
>  1 file changed, 8 insertions(+)
> 
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -244,6 +244,8 @@ static void free_msi_irqs(struct pci_dev
>   iounmap(dev->msix_base);
>   dev->msix_base = NULL;
>   }
> +
> + msi_device_set_properties(&dev->dev, 0);
>  }
>  
>  static void pci_intx_for_msi(struct pci_dev *dev, int enable)
> @@ -341,6 +343,7 @@ msi_setup_entry(struct pci_dev *dev, int
>  {
>   struct irq_affinity_desc *masks = NULL;
>   struct msi_desc *entry;
> + unsigned long prop;
>   u16 control;
>  
>   if (affd)
> @@ -372,6 +375,10 @@ msi_setup_entry(struct pci_dev *dev, int
>   if (entry->pci.msi_attrib.can_mask)
>   pci_read_config_dword(dev, entry->pci.mask_pos, 
> &entry->pci.msi_mask);
>  
> + prop = MSI_PROP_PCI_MSI;
> + if (entry->pci.msi_attrib.is_64)
> + prop |= MSI_PROP_64BIT;
> + msi_device_set_properties(&dev->dev, prop);
>  out:
>   kfree(masks);
>   return entry;
> @@ -514,6 +521,7 @@ static int msix_setup_entries(struct pci
>   if (masks)
>   curmsk++;
>   }
> + msi_device_set_properties(&dev->dev, MSI_PROP_PCI_MSIX | 
> MSI_PROP_64BIT);
>   ret = 0;
>  out:
>   kfree(masks);
> 



Re: [patch V2 25/36] PCI/MSI: Provide MSI_FLAG_MSIX_CONTIGUOUS

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:39:36PM +0100, Thomas Gleixner wrote:
> Provide a domain info flag which makes the core code check for a contiguous
> MSI-X index on allocation. That's simpler than checking it at some other
> domain callback in architecture code.
> 
> Signed-off-by: Thomas Gleixner 
> Reviewed-by: Greg Kroah-Hartman 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/irqdomain.c |   16 ++--
>  include/linux/msi.h |2 ++
>  2 files changed, 16 insertions(+), 2 deletions(-)
> 
> --- a/drivers/pci/msi/irqdomain.c
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -89,9 +89,21 @@ static int pci_msi_domain_check_cap(stru
>   if (pci_msi_desc_is_multi_msi(desc) &&
>   !(info->flags & MSI_FLAG_MULTI_PCI_MSI))
>   return 1;
> - else if (desc->pci.msi_attrib.is_msix && !(info->flags & 
> MSI_FLAG_PCI_MSIX))
> - return -ENOTSUPP;
>  
> + if (desc->pci.msi_attrib.is_msix) {
> + if (!(info->flags & MSI_FLAG_PCI_MSIX))
> + return -ENOTSUPP;
> +
> + if (info->flags & MSI_FLAG_MSIX_CONTIGUOUS) {
> + unsigned int idx = 0;
> +
> + /* Check for gaps in the entry indices */
> + for_each_msi_entry(desc, dev) {
> + if (desc->msi_index != idx++)
> + return -ENOTSUPP;
> + }
> + }
> + }
>   return 0;
>  }
>  
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -376,6 +376,8 @@ enum {
>   MSI_FLAG_LEVEL_CAPABLE  = (1 << 6),
>   /* Populate sysfs on alloc() and destroy it on free() */
>   MSI_FLAG_DEV_SYSFS  = (1 << 7),
> + /* MSI-X entries must be contiguous */
> + MSI_FLAG_MSIX_CONTIGUOUS= (1 << 8),
>  };
>  
>  int msi_domain_set_affinity(struct irq_data *data, const struct cpumask 
> *mask,
> 



Re: [patch V2 28/36] PCI/MSI: Use __msi_get_virq() in pci_get_vector()

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:39:41PM +0100, Thomas Gleixner wrote:
> Use msi_get_vector() and handle the return value to be compatible.
> 
> No functional change intended.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Bjorn Helgaas 

> ---
> V2: Handle the INTx case directly instead of trying to be overly smart - Marc
> ---
>  drivers/pci/msi/msi.c |   25 +
>  1 file changed, 5 insertions(+), 20 deletions(-)
> 
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -1032,28 +1032,13 @@ EXPORT_SYMBOL(pci_free_irq_vectors);
>   */
>  int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
>  {
> - if (dev->msix_enabled) {
> - struct msi_desc *entry;
> + unsigned int irq;
>  
> - for_each_pci_msi_entry(entry, dev) {
> - if (entry->msi_index == nr)
> - return entry->irq;
> - }
> - WARN_ON_ONCE(1);
> - return -EINVAL;
> - }
> + if (!dev->msi_enabled && !dev->msix_enabled)
> + return !nr ? dev->irq : -EINVAL;
>  
> - if (dev->msi_enabled) {
> - struct msi_desc *entry = first_pci_msi_entry(dev);
> -
> - if (WARN_ON_ONCE(nr >= entry->nvec_used))
> - return -EINVAL;
> - } else {
> - if (WARN_ON_ONCE(nr > 0))
> - return -EINVAL;
> - }
> -
> - return dev->irq + nr;
> + irq = msi_get_virq(&dev->dev, nr);
> + return irq ? irq : -EINVAL;
>  }
>  EXPORT_SYMBOL(pci_irq_vector);
>  
> 



Re: [patch V2 22/23] genirq/msi: Handle PCI/MSI allocation fail in core code

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:59PM +0100, Thomas Gleixner wrote:
> Get rid of yet another irqdomain callback and let the core code return the
> already available information of how many descriptors could be allocated.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas# PCI

> ---
>  drivers/pci/msi/irqdomain.c |   13 -
>  include/linux/msi.h |5 +
>  kernel/irq/msi.c|   29 +
>  3 files changed, 26 insertions(+), 21 deletions(-)
> 
> --- a/drivers/pci/msi/irqdomain.c
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -95,16 +95,6 @@ static int pci_msi_domain_check_cap(stru
>   return 0;
>  }
>  
> -static int pci_msi_domain_handle_error(struct irq_domain *domain,
> -struct msi_desc *desc, int error)
> -{
> - /* Special handling to support __pci_enable_msi_range() */
> - if (pci_msi_desc_is_multi_msi(desc) && error == -ENOSPC)
> - return 1;
> -
> - return error;
> -}
> -
>  static void pci_msi_domain_set_desc(msi_alloc_info_t *arg,
>   struct msi_desc *desc)
>  {
> @@ -115,7 +105,6 @@ static void pci_msi_domain_set_desc(msi_
>  static struct msi_domain_ops pci_msi_domain_ops_default = {
>   .set_desc   = pci_msi_domain_set_desc,
>   .msi_check  = pci_msi_domain_check_cap,
> - .handle_error   = pci_msi_domain_handle_error,
>  };
>  
>  static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info)
> @@ -129,8 +118,6 @@ static void pci_msi_domain_update_dom_op
>   ops->set_desc = pci_msi_domain_set_desc;
>   if (ops->msi_check == NULL)
>   ops->msi_check = pci_msi_domain_check_cap;
> - if (ops->handle_error == NULL)
> - ops->handle_error = pci_msi_domain_handle_error;
>   }
>  }
>  
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -286,7 +286,6 @@ struct msi_domain_info;
>   * @msi_check:   Callback for verification of the 
> domain/info/dev data
>   * @msi_prepare: Prepare the allocation of the interrupts in the domain
>   * @set_desc:Set the msi descriptor for an interrupt
> - * @handle_error:Optional error handler if the allocation fails
>   * @domain_alloc_irqs:   Optional function to override the default 
> allocation
>   *   function.
>   * @domain_free_irqs:Optional function to override the default free
> @@ -295,7 +294,7 @@ struct msi_domain_info;
>   * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying
>   * irqdomain.
>   *
> - * @msi_check, @msi_prepare, @handle_error and @set_desc are callbacks used 
> by
> + * @msi_check, @msi_prepare and @set_desc are callbacks used by
>   * msi_domain_alloc/free_irqs().
>   *
>   * @domain_alloc_irqs, @domain_free_irqs can be used to override the
> @@ -332,8 +331,6 @@ struct msi_domain_ops {
>  msi_alloc_info_t *arg);
>   void(*set_desc)(msi_alloc_info_t *arg,
>   struct msi_desc *desc);
> - int (*handle_error)(struct irq_domain *domain,
> - struct msi_desc *desc, int error);
>   int (*domain_alloc_irqs)(struct irq_domain *domain,
>struct device *dev, int nvec);
>   void(*domain_free_irqs)(struct irq_domain *domain,
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -538,6 +538,27 @@ static bool msi_check_reservation_mode(s
>   return desc->pci.msi_attrib.is_msix || desc->pci.msi_attrib.can_mask;
>  }
>  
> +static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc 
> *desc,
> +int allocated)
> +{
> + switch(domain->bus_token) {
> + case DOMAIN_BUS_PCI_MSI:
> + case DOMAIN_BUS_VMD_MSI:
> + if (IS_ENABLED(CONFIG_PCI_MSI))
> + break;
> + fallthrough;
> + default:
> + return -ENOSPC;
> + }
> +
> + /* Let a failed PCI multi MSI allocation retry */
> + if (desc->nvec_used > 1)
> + return 1;
> +
> + /* If there was a successful allocation let the caller know */
> + return allocated ? allocated : -ENOSPC;
> +}
> +
>  int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
>   int nvec)
>  {
> @@ -546,6 +567,7 @@ int __msi_domain_alloc_irqs(struct irq_d
>

Re: [patch V2 16/23] PCI/MSI: Split out CONFIG_PCI_MSI independent part

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:49PM +0100, Thomas Gleixner wrote:
> These functions are required even when CONFIG_PCI_MSI is not set. Move them
> to their own file.
> 
> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/Makefile |3 ++-
>  drivers/pci/msi/msi.c|   39 ---
>  drivers/pci/msi/pcidev_msi.c |   43 
> +++
>  3 files changed, 45 insertions(+), 40 deletions(-)
> 
> --- a/drivers/pci/msi/Makefile
> +++ b/drivers/pci/msi/Makefile
> @@ -1,4 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0
>  #
>  # Makefile for the PCI/MSI
> -obj-$(CONFIG_PCI)+= msi.o
> +obj-$(CONFIG_PCI)+= pcidev_msi.o
> +obj-$(CONFIG_PCI_MSI)+= msi.o
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -18,8 +18,6 @@
>  
>  #include "../pci.h"
>  
> -#ifdef CONFIG_PCI_MSI
> -
>  static int pci_msi_enable = 1;
>  int pci_msi_ignore_mask;
>  
> @@ -1493,40 +1491,3 @@ bool pci_dev_has_special_msi_domain(stru
>  }
>  
>  #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
> -#endif /* CONFIG_PCI_MSI */
> -
> -void pci_msi_init(struct pci_dev *dev)
> -{
> - u16 ctrl;
> -
> - /*
> -  * Disable the MSI hardware to avoid screaming interrupts
> -  * during boot.  This is the power on reset default so
> -  * usually this should be a noop.
> -  */
> - dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
> - if (!dev->msi_cap)
> - return;
> -
> - pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
> - if (ctrl & PCI_MSI_FLAGS_ENABLE)
> - pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS,
> -   ctrl & ~PCI_MSI_FLAGS_ENABLE);
> -
> - if (!(ctrl & PCI_MSI_FLAGS_64BIT))
> - dev->no_64bit_msi = 1;
> -}
> -
> -void pci_msix_init(struct pci_dev *dev)
> -{
> - u16 ctrl;
> -
> - dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
> - if (!dev->msix_cap)
> - return;
> -
> - pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
> - if (ctrl & PCI_MSIX_FLAGS_ENABLE)
> - pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS,
> -   ctrl & ~PCI_MSIX_FLAGS_ENABLE);
> -}
> --- /dev/null
> +++ b/drivers/pci/msi/pcidev_msi.c
> @@ -0,0 +1,43 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * MSI[X} related functions which are available unconditionally.
> + */
> +#include "../pci.h"
> +
> +/*
> + * Disable the MSI[X] hardware to avoid screaming interrupts during boot.
> + * This is the power on reset default so usually this should be a noop.
> + */
> +
> +void pci_msi_init(struct pci_dev *dev)
> +{
> + u16 ctrl;
> +
> + dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
> + if (!dev->msi_cap)
> + return;
> +
> + pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
> + if (ctrl & PCI_MSI_FLAGS_ENABLE) {
> + pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS,
> +   ctrl & ~PCI_MSI_FLAGS_ENABLE);
> + }
> +
> + if (!(ctrl & PCI_MSI_FLAGS_64BIT))
> + dev->no_64bit_msi = 1;
> +}
> +
> +void pci_msix_init(struct pci_dev *dev)
> +{
> + u16 ctrl;
> +
> + dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
> + if (!dev->msix_cap)
> + return;
> +
> + pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
> + if (ctrl & PCI_MSIX_FLAGS_ENABLE) {
> + pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS,
> +   ctrl & ~PCI_MSIX_FLAGS_ENABLE);
> + }
> +}
> 



Re: [patch V2 07/31] PCI/MSI: Protect MSI operations

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:51:13PM +0100, Thomas Gleixner wrote:
> To prepare for dynamic extension of MSI-X vectors, protect the MSI
> operations for MSI and MSI-X. This requires to move the invocation of
> irq_create_affinity_masks() out of the descriptor lock section to avoid
> reverse lock ordering vs. CPU hotplug lock as some callers of the PCI/MSI
> allocation interfaces already hold it.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/irqdomain.c |4 -
>  drivers/pci/msi/msi.c   |  120 
> ++--
>  2 files changed, 73 insertions(+), 51 deletions(-)
> 
> --- a/drivers/pci/msi/irqdomain.c
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -14,7 +14,7 @@ int pci_msi_setup_msi_irqs(struct pci_de
>  
>   domain = dev_get_msi_domain(&dev->dev);
>   if (domain && irq_domain_is_hierarchy(domain))
> - return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
> + return msi_domain_alloc_irqs_descs_locked(domain, &dev->dev, 
> nvec);
>  
>   return pci_msi_legacy_setup_msi_irqs(dev, nvec, type);
>  }
> @@ -25,7 +25,7 @@ void pci_msi_teardown_msi_irqs(struct pc
>  
>   domain = dev_get_msi_domain(&dev->dev);
>   if (domain && irq_domain_is_hierarchy(domain))
> - msi_domain_free_irqs(domain, &dev->dev);
> + msi_domain_free_irqs_descs_locked(domain, &dev->dev);
>   else
>   pci_msi_legacy_teardown_msi_irqs(dev);
>  }
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -322,11 +322,13 @@ static void __pci_restore_msix_state(str
>  
>   write_msg = arch_restore_msi_irqs(dev);
>  
> + msi_lock_descs(&dev->dev);
>   for_each_pci_msi_entry(entry, dev) {
>   if (write_msg)
>   __pci_write_msi_msg(entry, &entry->msg);
>   pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
>   }
> + msi_unlock_descs(&dev->dev);
>  
>   pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
>  }
> @@ -339,20 +341,16 @@ void pci_restore_msi_state(struct pci_de
>  EXPORT_SYMBOL_GPL(pci_restore_msi_state);
>  
>  static struct msi_desc *
> -msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
> +msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity_desc 
> *masks)
>  {
> - struct irq_affinity_desc *masks = NULL;
>   struct msi_desc *entry;
>   unsigned long prop;
>   u16 control;
>  
> - if (affd)
> - masks = irq_create_affinity_masks(nvec, affd);
> -
>   /* MSI Entry Initialization */
>   entry = alloc_msi_entry(&dev->dev, nvec, masks);
>   if (!entry)
> - goto out;
> + return NULL;
>  
>   pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
>   /* Lies, damned lies, and MSIs */
> @@ -379,8 +377,7 @@ msi_setup_entry(struct pci_dev *dev, int
>   if (entry->pci.msi_attrib.is_64)
>   prop |= MSI_PROP_64BIT;
>   msi_device_set_properties(&dev->dev, prop);
> -out:
> - kfree(masks);
> +
>   return entry;
>  }
>  
> @@ -416,14 +413,21 @@ static int msi_verify_entries(struct pci
>  static int msi_capability_init(struct pci_dev *dev, int nvec,
>  struct irq_affinity *affd)
>  {
> + struct irq_affinity_desc *masks = NULL;
>   struct msi_desc *entry;
>   int ret;
>  
>   pci_msi_set_enable(dev, 0); /* Disable MSI during set up */
>  
> - entry = msi_setup_entry(dev, nvec, affd);
> - if (!entry)
> - return -ENOMEM;
> + if (affd)
> + masks = irq_create_affinity_masks(nvec, affd);
> +
> + msi_lock_descs(&dev->dev);
> + entry = msi_setup_entry(dev, nvec, masks);
> + if (!entry) {
> + ret = -ENOMEM;
> + goto unlock;
> + }
>  
>   /* All MSIs are unmasked by default; mask them all */
>   pci_msi_mask(entry, msi_multi_mask(entry));
> @@ -446,11 +450,14 @@ static int msi_capability_init(struct pc
>  
>   pcibios_free_irq(dev);
>   dev->irq = entry->irq;
> - return 0;
> + goto unlock;
>  
>  err:
>   pci_msi_unmask(entry, msi_multi_mask(entry));
>   free_msi_irqs(dev);
> +unlock:
> + msi_unlock_descs(&dev->dev);
> + kfree(masks);
>   return ret;
>  }
>  
> @@ -477,23 +484,18 @@ static void __iomem *msix_map_region(str
>  
>  static int msix_setup_entries(struct pci_dev *dev, voi

Re: [patch V2 15/23] PCI/MSI: Move code into a separate directory

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:27:47PM +0100, Thomas Gleixner wrote:
> msi.c is getting larger and really could do with a splitup. Move it into
> it's own directory to prepare for that.

s/it's/its/

> Signed-off-by: Thomas Gleixner 
> Tested-by: Juergen Gross 
> Reviewed-by: Jason Gunthorpe 

Acked-by: Bjorn Helgaas 

> ---
>  Documentation/driver-api/pci/pci.rst |2 
>  drivers/pci/Makefile |3 
>  drivers/pci/msi.c| 1532 
> ---
>  drivers/pci/msi/Makefile |4 
>  drivers/pci/msi/msi.c| 1532 
> +++
>  5 files changed, 1539 insertions(+), 1534 deletions(-)
> 
> --- a/Documentation/driver-api/pci/pci.rst
> +++ b/Documentation/driver-api/pci/pci.rst
> @@ -13,7 +13,7 @@ PCI Support Library
>  .. kernel-doc:: drivers/pci/search.c
> :export:
>  
> -.. kernel-doc:: drivers/pci/msi.c
> +.. kernel-doc:: drivers/pci/msi/msi.c
> :export:
>  
>  .. kernel-doc:: drivers/pci/bus.c
> --- a/drivers/pci/Makefile
> +++ b/drivers/pci/Makefile
> @@ -5,8 +5,9 @@
>  obj-$(CONFIG_PCI)+= access.o bus.o probe.o host-bridge.o \
>  remove.o pci.o pci-driver.o search.o \
>  pci-sysfs.o rom.o setup-res.o irq.o vpd.o \
> -setup-bus.o vc.o mmap.o setup-irq.o msi.o
> +setup-bus.o vc.o mmap.o setup-irq.o
>  
> +obj-$(CONFIG_PCI)+= msi/
>  obj-$(CONFIG_PCI)+= pcie/
>  
>  ifdef CONFIG_PCI
> --- a/drivers/pci/msi.c
> +++ /dev/null
> @@ -1,1532 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -/*
> - * PCI Message Signaled Interrupt (MSI)
> - *
> - * Copyright (C) 2003-2004 Intel
> - * Copyright (C) Tom Long Nguyen (tom.l.ngu...@intel.com)
> - * Copyright (C) 2016 Christoph Hellwig.
> - */
> -
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -
> -#include "pci.h"
> -
> -#ifdef CONFIG_PCI_MSI
> -
> -static int pci_msi_enable = 1;
> -int pci_msi_ignore_mask;
> -
> -#define msix_table_size(flags)   ((flags & PCI_MSIX_FLAGS_QSIZE) + 1)
> -
> -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
> -static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
> -{
> - struct irq_domain *domain;
> -
> - domain = dev_get_msi_domain(&dev->dev);
> - if (domain && irq_domain_is_hierarchy(domain))
> - return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
> -
> - return arch_setup_msi_irqs(dev, nvec, type);
> -}
> -
> -static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
> -{
> - struct irq_domain *domain;
> -
> - domain = dev_get_msi_domain(&dev->dev);
> - if (domain && irq_domain_is_hierarchy(domain))
> - msi_domain_free_irqs(domain, &dev->dev);
> - else
> - arch_teardown_msi_irqs(dev);
> -}
> -#else
> -#define pci_msi_setup_msi_irqs   arch_setup_msi_irqs
> -#define pci_msi_teardown_msi_irqsarch_teardown_msi_irqs
> -#endif
> -
> -#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
> -/* Arch hooks */
> -int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
> -{
> - return -EINVAL;
> -}
> -
> -void __weak arch_teardown_msi_irq(unsigned int irq)
> -{
> -}
> -
> -int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
> -{
> - struct msi_desc *entry;
> - int ret;
> -
> - /*
> -  * If an architecture wants to support multiple MSI, it needs to
> -  * override arch_setup_msi_irqs()
> -  */
> - if (type == PCI_CAP_ID_MSI && nvec > 1)
> - return 1;
> -
> - for_each_pci_msi_entry(entry, dev) {
> - ret = arch_setup_msi_irq(dev, entry);
> - if (ret < 0)
> - return ret;
> - if (ret > 0)
> - return -ENOSPC;
> - }
> -
> - return 0;
> -}
> -
> -void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
> -{
> - int i;
> - struct msi_desc *entry;
> -
> - for_each_pci_msi_entry(entry, dev)
> - if (entry->irq)
> - for (i = 0; i < entry->nvec_used; i++)
> - arch_teardown_msi_irq(entry->irq + i);
> -}
> -#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
> -
> -/*
> - * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
> - * mask all MSI interrupts by clearing the 

Re: [patch V2 08/31] PCI/MSI: Use msi_add_msi_desc()

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:51:15PM +0100, Thomas Gleixner wrote:
> Simplify the allocation of MSI descriptors by using msi_add_msi_desc()
> which moves the storage handling to core code and prepares for dynamic
> extension of the MSI-X vector space.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/msi.c |  122 
> --
>  1 file changed, 59 insertions(+), 63 deletions(-)
> 
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -340,45 +340,51 @@ void pci_restore_msi_state(struct pci_de
>  }
>  EXPORT_SYMBOL_GPL(pci_restore_msi_state);
>  
> -static struct msi_desc *
> -msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity_desc 
> *masks)
> +static int msi_setup_msi_desc(struct pci_dev *dev, int nvec,
> +   struct irq_affinity_desc *masks)
>  {
> - struct msi_desc *entry;
> + struct msi_desc desc;
>   unsigned long prop;
>   u16 control;
> + int ret;
>  
>   /* MSI Entry Initialization */
> - entry = alloc_msi_entry(&dev->dev, nvec, masks);
> - if (!entry)
> - return NULL;
> + memset(&desc, 0, sizeof(desc));
>  
>   pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
>   /* Lies, damned lies, and MSIs */
>   if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
>   control |= PCI_MSI_FLAGS_MASKBIT;
> + /* Respect XEN's mask disabling */
> + if (pci_msi_ignore_mask)
> + control &= ~PCI_MSI_FLAGS_MASKBIT;
>  
> - entry->pci.msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT);
> - entry->pci.msi_attrib.can_mask  = !pci_msi_ignore_mask &&
> -   !!(control & PCI_MSI_FLAGS_MASKBIT);
> - entry->pci.msi_attrib.default_irq = dev->irq;
> - entry->pci.msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
> - entry->pci.msi_attrib.multiple  = ilog2(__roundup_pow_of_two(nvec));
> + desc.nvec_used  = nvec;
> + desc.pci.msi_attrib.is_64   = !!(control & PCI_MSI_FLAGS_64BIT);
> + desc.pci.msi_attrib.can_mask= !!(control & PCI_MSI_FLAGS_MASKBIT);
> + desc.pci.msi_attrib.default_irq = dev->irq;
> + desc.pci.msi_attrib.multi_cap   = (control & PCI_MSI_FLAGS_QMASK) >> 1;
> + desc.pci.msi_attrib.multiple= ilog2(__roundup_pow_of_two(nvec));
> + desc.affinity   = masks;
>  
>   if (control & PCI_MSI_FLAGS_64BIT)
> - entry->pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
> + desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
>   else
> - entry->pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
> + desc.pci.mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
>  
>   /* Save the initial mask status */
> - if (entry->pci.msi_attrib.can_mask)
> - pci_read_config_dword(dev, entry->pci.mask_pos, 
> &entry->pci.msi_mask);
> + if (desc.pci.msi_attrib.can_mask)
> + pci_read_config_dword(dev, desc.pci.mask_pos, 
> &desc.pci.msi_mask);
>  
> - prop = MSI_PROP_PCI_MSI;
> - if (entry->pci.msi_attrib.is_64)
> - prop |= MSI_PROP_64BIT;
> - msi_device_set_properties(&dev->dev, prop);
> + ret = msi_add_msi_desc(&dev->dev, &desc);
> + if (!ret) {
> + prop = MSI_PROP_PCI_MSI;
> + if (desc.pci.msi_attrib.is_64)
> + prop |= MSI_PROP_64BIT;
> + msi_device_set_properties(&dev->dev, prop);
> + }
>  
> - return entry;
> + return ret;
>  }
>  
>  static int msi_verify_entries(struct pci_dev *dev)
> @@ -423,17 +429,14 @@ static int msi_capability_init(struct pc
>   masks = irq_create_affinity_masks(nvec, affd);
>  
>   msi_lock_descs(&dev->dev);
> - entry = msi_setup_entry(dev, nvec, masks);
> - if (!entry) {
> - ret = -ENOMEM;
> + ret = msi_setup_msi_desc(dev, nvec, masks);
> + if (ret)
>   goto unlock;
> - }
>  
>   /* All MSIs are unmasked by default; mask them all */
> + entry = first_pci_msi_entry(dev);
>   pci_msi_mask(entry, msi_multi_mask(entry));
>  
> - list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
> -
>   /* Configure MSI capability structure */
>   ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
>   if (ret)
> @@ -482,49 +485,40 @@ static void __iomem *msix_map_region(str
>  

Re: [patch V2 09/31] PCI/MSI: Let core code free MSI descriptors

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:51:16PM +0100, Thomas Gleixner wrote:
> Set the domain info flag which tells the core code to free the MSI
> descriptors from msi_domain_free_irqs() and add an explicit call to the
> core function into the legacy code.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/irqdomain.c |3 ++-
>  drivers/pci/msi/legacy.c|1 +
>  drivers/pci/msi/msi.c   |   14 --
>  3 files changed, 3 insertions(+), 15 deletions(-)
> 
> --- a/drivers/pci/msi/irqdomain.c
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -171,7 +171,8 @@ struct irq_domain *pci_msi_create_irq_do
>   if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
>   pci_msi_domain_update_chip_ops(info);
>  
> - info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS;
> + info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS |
> +MSI_FLAG_FREE_MSI_DESCS;
>   if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
>   info->flags |= MSI_FLAG_MUST_REACTIVATE;
>  
> --- a/drivers/pci/msi/legacy.c
> +++ b/drivers/pci/msi/legacy.c
> @@ -80,4 +80,5 @@ void pci_msi_legacy_teardown_msi_irqs(st
>  {
>   msi_device_destroy_sysfs(&dev->dev);
>   arch_teardown_msi_irqs(dev);
> + msi_free_msi_descs(&dev->dev);
>  }
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -224,22 +224,8 @@ EXPORT_SYMBOL_GPL(pci_write_msi_msg);
>  
>  static void free_msi_irqs(struct pci_dev *dev)
>  {
> - struct list_head *msi_list = dev_to_msi_list(&dev->dev);
> - struct msi_desc *entry, *tmp;
> - int i;
> -
> - for_each_pci_msi_entry(entry, dev)
> - if (entry->irq)
> - for (i = 0; i < entry->nvec_used; i++)
> - BUG_ON(irq_has_action(entry->irq + i));
> -
>   pci_msi_teardown_msi_irqs(dev);
>  
> - list_for_each_entry_safe(entry, tmp, msi_list, list) {
> - list_del(&entry->list);
> - free_msi_entry(entry);
> - }
> -
>   if (dev->msix_base) {
>   iounmap(dev->msix_base);
>   dev->msix_base = NULL;
> 



Re: [patch V2 10/31] PCI/MSI: Use msi_on_each_desc()

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:51:18PM +0100, Thomas Gleixner wrote:
> Use the new iterator functions which pave the way for dynamically extending
> MSI-X vectors.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/msi/irqdomain.c |4 ++--
>  drivers/pci/msi/legacy.c|   19 ---
>  drivers/pci/msi/msi.c   |   30 ++
>  3 files changed, 24 insertions(+), 29 deletions(-)
> 
> --- a/drivers/pci/msi/irqdomain.c
> +++ b/drivers/pci/msi/irqdomain.c
> @@ -83,7 +83,7 @@ static int pci_msi_domain_check_cap(stru
>   struct msi_domain_info *info,
>   struct device *dev)
>  {
> - struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev));
> + struct msi_desc *desc = msi_first_desc(dev, MSI_DESC_ALL);
>  
>   /* Special handling to support __pci_enable_msi_range() */
>   if (pci_msi_desc_is_multi_msi(desc) &&
> @@ -98,7 +98,7 @@ static int pci_msi_domain_check_cap(stru
>   unsigned int idx = 0;
>  
>   /* Check for gaps in the entry indices */
> - for_each_msi_entry(desc, dev) {
> + msi_for_each_desc(desc, dev, MSI_DESC_ALL) {
>   if (desc->msi_index != idx++)
>   return -ENOTSUPP;
>   }
> --- a/drivers/pci/msi/legacy.c
> +++ b/drivers/pci/msi/legacy.c
> @@ -28,7 +28,7 @@ int __weak arch_setup_msi_irqs(struct pc
>   if (type == PCI_CAP_ID_MSI && nvec > 1)
>   return 1;
>  
> - for_each_pci_msi_entry(desc, dev) {
> + msi_for_each_desc(desc, &dev->dev, MSI_DESC_NOTASSOCIATED) {
>   ret = arch_setup_msi_irq(dev, desc);
>   if (ret)
>   return ret < 0 ? ret : -ENOSPC;
> @@ -42,27 +42,24 @@ void __weak arch_teardown_msi_irqs(struc
>   struct msi_desc *desc;
>   int i;
>  
> - for_each_pci_msi_entry(desc, dev) {
> - if (desc->irq) {
> - for (i = 0; i < desc->nvec_used; i++)
> - arch_teardown_msi_irq(desc->irq + i);
> - }
> + msi_for_each_desc(desc, &dev->dev, MSI_DESC_ASSOCIATED) {
> + for (i = 0; i < desc->nvec_used; i++)
> + arch_teardown_msi_irq(desc->irq + i);
>   }
>  }
>  
>  static int pci_msi_setup_check_result(struct pci_dev *dev, int type, int ret)
>  {
> - struct msi_desc *entry;
> + struct msi_desc *desc;
>   int avail = 0;
>  
>   if (type != PCI_CAP_ID_MSIX || ret >= 0)
>   return ret;
>  
>   /* Scan the MSI descriptors for successfully allocated ones. */
> - for_each_pci_msi_entry(entry, dev) {
> - if (entry->irq != 0)
> - avail++;
> - }
> + msi_for_each_desc(desc, &dev->dev, MSI_DESC_ASSOCIATED)
> + avail++;
> +
>   return avail ? avail : ret;
>  }
>  
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -299,7 +299,6 @@ static void __pci_restore_msix_state(str
>  
>   if (!dev->msix_enabled)
>   return;
> - BUG_ON(list_empty(dev_to_msi_list(&dev->dev)));
>  
>   /* route the table */
>   pci_intx_for_msi(dev, 0);
> @@ -309,7 +308,7 @@ static void __pci_restore_msix_state(str
>   write_msg = arch_restore_msi_irqs(dev);
>  
>   msi_lock_descs(&dev->dev);
> - for_each_pci_msi_entry(entry, dev) {
> + msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
>   if (write_msg)
>   __pci_write_msi_msg(entry, &entry->msg);
>   pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl);
> @@ -378,14 +377,14 @@ static int msi_verify_entries(struct pci
>   if (!dev->no_64bit_msi)
>   return 0;
>  
> - for_each_pci_msi_entry(entry, dev) {
> + msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) {
>   if (entry->msg.address_hi) {
>   pci_err(dev, "arch assigned 64-bit MSI address %#x%08x 
> but device only supports 32 bits\n",
>   entry->msg.address_hi, entry->msg.address_lo);
> - return -EIO;
> + break;
>   }
>   }
> - return 0;
> + return !entry ? 0 : -EIO;
>  }
>  
>  /**
> @@ -418,7 +417,7 @@ static int msi_capability_init(struct pc
>   goto unlock;
>  
>   /* All MSIs are

Re: [patch V2 19/31] PCI: hv: Rework MSI handling

2021-12-07 Thread Bjorn Helgaas
On Mon, Dec 06, 2021 at 11:51:33PM +0100, Thomas Gleixner wrote:
> Replace the about to vanish iterators and make use of the filtering. Take
> the descriptor lock around the iterators.
> 
> Signed-off-by: Thomas Gleixner 

Acked-by: Bjorn Helgaas 

> ---
>  drivers/pci/controller/pci-hyperv.c |   15 ++-
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -3445,18 +3445,23 @@ static int hv_pci_suspend(struct hv_devi
>  
>  static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg)
>  {
> - struct msi_desc *entry;
>   struct irq_data *irq_data;
> + struct msi_desc *entry;
> + int ret = 0;
>  
> - for_each_pci_msi_entry(entry, pdev) {
> + msi_lock_descs(&pdev->dev);
> + msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
>   irq_data = irq_get_irq_data(entry->irq);
> - if (WARN_ON_ONCE(!irq_data))
> - return -EINVAL;
> + if (WARN_ON_ONCE(!irq_data)) {
> + ret = -EINVAL;
> + break;
> + }
>  
>   hv_compose_msi_msg(irq_data, &entry->msg);
>   }
> + msi_unlock_descs(&pdev->dev);
>  
> - return 0;
> + return ret;
>  }
>  
>  /*
> 



Re: [PATCH v2 0/6] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-08-05 Thread Bjorn Helgaas
On Tue, Aug 03, 2021 at 12:01:44PM +0200, Uwe Kleine-König wrote:
> Hello,
> 
> changes since v1 
> (https://lore.kernel.org/linux-pci/20210729203740.1377045-1-u.kleine-koe...@pengutronix.de):
> 
> - New patch to simplify drivers/pci/xen-pcifront.c, spotted and
>   suggested by Boris Ostrovsky
> - Fix a possible NULL pointer dereference I introduced in xen-pcifront.c
> - A few whitespace improvements
> - Add a commit log to patch #6 (formerly #5)
> 
> I also expanded the audience for patches #4 and #6 to allow affected
> people to actually see the changes to their drivers.
> 
> Interdiff can be found below.
> 
> The idea is still the same: After a few cleanups (#1 - #3) a new macro
> is introduced abstracting access to struct pci_dev->driver. All users
> are then converted to use this and in the last patch the macro is
> changed to make use of struct pci_dev::dev->driver to get rid of the
> duplicated tracking.

I love the idea of this series!

I looked at all the bus_type.probe() methods, it looks like pci_dev is
not the only offender here.  At least the following also have a driver
pointer in the device struct:

  parisc_device.driver
  acpi_device.driver
  dio_dev.driver
  hid_device.driver
  pci_dev.driver
  pnp_dev.driver
  rio_dev.driver
  zorro_dev.driver

Do you plan to do the same for all of them, or is there some reason
why they need the pointer and PCI doesn't?

In almost all cases, other buses define a "to__driver()"
interface.  In fact, PCI already has a to_pci_driver().

This series adds pci_driver_of_dev(), which basically just means we
can do this:

  pdrv = pci_driver_of_dev(pdev);

instead of this:

  pdrv = to_pci_driver(pdev->dev.driver);

I don't see any other "_driver_of_dev()" interfaces, so I assume
other buses just live with the latter style?  I'd rather not be
different and have two ways to get the "struct pci_driver *" unless
there's a good reason.

Looking through the places that care about pci_dev.driver (the ones
updated by patch 5/6), many of them are ... a little dubious to begin
with.  A few need the "struct pci_error_handlers *err_handler"
pointer, so that's probably legitimate.  But many just need a name,
and should probably be using dev_driver_string() instead.

Bjorn



Re: [PATCH v2 0/6] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-08-06 Thread Bjorn Helgaas
On Fri, Aug 06, 2021 at 08:46:23AM +0200, Uwe Kleine-König wrote:
> On Thu, Aug 05, 2021 at 06:42:34PM -0500, Bjorn Helgaas wrote:

> > I looked at all the bus_type.probe() methods, it looks like pci_dev is
> > not the only offender here.  At least the following also have a driver
> > pointer in the device struct:
> > 
> >   parisc_device.driver
> >   acpi_device.driver
> >   dio_dev.driver
> >   hid_device.driver
> >   pci_dev.driver
> >   pnp_dev.driver
> >   rio_dev.driver
> >   zorro_dev.driver
> 
> Right, when I converted zorro_dev it was pointed out that the code was
> copied from pci and the latter has the same construct. :-)
> See
> https://lore.kernel.org/r/20210730191035.1455248-5-u.kleine-koe...@pengutronix.de
> for the patch, I don't find where pci was pointed out, maybe it was on
> irc only.

Oh, thanks!  I looked to see if you'd done something similar
elsewhere, but I missed this one.

> > Looking through the places that care about pci_dev.driver (the ones
> > updated by patch 5/6), many of them are ... a little dubious to begin
> > with.  A few need the "struct pci_error_handlers *err_handler"
> > pointer, so that's probably legitimate.  But many just need a name,
> > and should probably be using dev_driver_string() instead.
> 
> Yeah, I considered adding a function to get the driver name from a
> pci_dev and a function to get the error handlers. Maybe it's an idea to
> introduce these two and then use to_pci_driver(pdev->dev.driver) for the
> few remaining users? Maybe doing that on top of my current series makes
> sense to have a clean switch from pdev->driver to pdev->dev.driver?!

I'd propose using dev_driver_string() for these places:

  eeh_driver_name() (could change callers to use dev_driver_string())
  bcma_host_pci_probe()
  qm_alloc_uacce()
  hns3_get_drvinfo()
  prestera_pci_probe()
  mlxsw_pci_probe()
  nfp_get_drvinfo()
  ssb_pcihost_probe()

The use in mpt_device_driver_register() looks unnecessary: it's only
to get a struct pci_device_id *, which is passed to ->probe()
functions that don't need it.

The use in adf_enable_aer() looks wrong: it sets the err_handler
pointer in one of the adf_driver structs.  I think those structs
should be basically immutable, and the drivers that call
adf_enable_aer() from their .probe() methods should set
".err_handler = &adf_err_handler" in their static adf_driver
definitions instead.

I think that basically leaves these:

  uncore_pci_probe() # .id_table, custom driver "registration"
  match_id() # .id_table, arch/x86/kernel/probe_roms.c
  xhci_pci_quirks()  # .id_table
  pci_error_handlers()   # roll-your-own AER handling, drivers/misc/cxl/guest.c

I think it would be fine to use to_pci_driver(pdev->dev.driver) for
these few.

Bjorn



Re: [PATCH v2 0/6] PCI: Drop duplicated tracking of a pci_dev's bound driver

2021-08-09 Thread Bjorn Helgaas
On Sat, Aug 07, 2021 at 11:26:45AM +0200, Uwe Kleine-König wrote:
> On Fri, Aug 06, 2021 at 04:24:52PM -0500, Bjorn Helgaas wrote:
> > On Fri, Aug 06, 2021 at 08:46:23AM +0200, Uwe Kleine-König wrote:
> > > On Thu, Aug 05, 2021 at 06:42:34PM -0500, Bjorn Helgaas wrote:
> > 
> > > > I looked at all the bus_type.probe() methods, it looks like pci_dev is
> > > > not the only offender here.  At least the following also have a driver
> > > > pointer in the device struct:
> > > > 
> > > >   parisc_device.driver
> > > >   acpi_device.driver
> > > >   dio_dev.driver
> > > >   hid_device.driver
> > > >   pci_dev.driver
> > > >   pnp_dev.driver
> > > >   rio_dev.driver
> > > >   zorro_dev.driver
> > > 
> > > Right, when I converted zorro_dev it was pointed out that the code was
> > > copied from pci and the latter has the same construct. :-)
> > > See
> > > https://lore.kernel.org/r/20210730191035.1455248-5-u.kleine-koe...@pengutronix.de
> > > for the patch, I don't find where pci was pointed out, maybe it was on
> > > irc only.
> > 
> > Oh, thanks!  I looked to see if you'd done something similar
> > elsewhere, but I missed this one.
> > 
> > > > Looking through the places that care about pci_dev.driver (the ones
> > > > updated by patch 5/6), many of them are ... a little dubious to begin
> > > > with.  A few need the "struct pci_error_handlers *err_handler"
> > > > pointer, so that's probably legitimate.  But many just need a name,
> > > > and should probably be using dev_driver_string() instead.
> > > 
> > > Yeah, I considered adding a function to get the driver name from a
> > > pci_dev and a function to get the error handlers. Maybe it's an idea to
> > > introduce these two and then use to_pci_driver(pdev->dev.driver) for the
> > > few remaining users? Maybe doing that on top of my current series makes
> > > sense to have a clean switch from pdev->driver to pdev->dev.driver?!
> > 
> > I'd propose using dev_driver_string() for these places:
> > 
> >   eeh_driver_name() (could change callers to use dev_driver_string())
> >   bcma_host_pci_probe()
> >   qm_alloc_uacce()
> >   hns3_get_drvinfo()
> >   prestera_pci_probe()
> >   mlxsw_pci_probe()
> >   nfp_get_drvinfo()
> >   ssb_pcihost_probe()
> 
> So the idea is:
> 
>   PCI: Simplify pci_device_remove()
>   PCI: Drop useless check from pci_device_probe()
>   xen/pci: Drop some checks that are always true
> 
> are kept as is as preparation. (Do you want to take them from this v2,
> or should I include them again in v3?)

Easiest if you include them until we merge the series.

> Then convert the list of functions above to use dev_driver_string() in a
> 4th patch.
> 
> > The use in mpt_device_driver_register() looks unnecessary: it's only
> > to get a struct pci_device_id *, which is passed to ->probe()
> > functions that don't need it.
> 
> This is patch #5.
> 
> > The use in adf_enable_aer() looks wrong: it sets the err_handler
> > pointer in one of the adf_driver structs.  I think those structs
> > should be basically immutable, and the drivers that call
> > adf_enable_aer() from their .probe() methods should set
> > ".err_handler = &adf_err_handler" in their static adf_driver
> > definitions instead.
> 
> I don't understand that one without some research, probably this yields
> at least one patch.

Yeah, it's a little messy because you'd have to make adf_err_handler
non-static and add an extern for it.  Sample below.

> > I think that basically leaves these:
> > 
> >   uncore_pci_probe() # .id_table, custom driver "registration"
> >   match_id() # .id_table, arch/x86/kernel/probe_roms.c
> >   xhci_pci_quirks()  # .id_table
> >   pci_error_handlers()   # roll-your-own AER handling, 
> > drivers/misc/cxl/guest.c
> > 
> > I think it would be fine to use to_pci_driver(pdev->dev.driver) for
> > these few.
> 
> Converting these will be patch 7 then and patch 8 can then drop the
> duplicated handling.
> 
> Sounds reasonable?

Sounds good to me.  Thanks for working on this!

Bjorn


diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c 
b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index a8805c815d16..75e6c5540523 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -310,6 +310,7 @@ s

Re: [PATCH v2] PCI/MSI: Correct use of can_mask in msi_add_msi_desc()

2022-08-05 Thread Bjorn Helgaas
On Fri, Aug 05, 2022 at 09:10:41AM -0300, Jason Gunthorpe wrote:
> On Fri, Aug 05, 2022 at 12:03:15PM +0200, Josef Johansson wrote:
> > On 2/14/22 11:07, Josef Johansson wrote:
> > > From: Josef Johansson 
> > > 
> > > PCI/MSI: Correct use of can_mask in msi_add_msi_desc()
> > > Commit 71020a3c0dff4 ("PCI/MSI: Use msi_add_msi_desc()") modifies
> > > the logic of checking msi_attrib.can_mask, without any reason.
> > > This commits restores that logic.
> > >
> > > Fixes: 71020a3c0dff4 ("PCI/MSI: Use msi_add_msi_desc()")
> > > Signed-off-by: Josef Johansson 
> > > 
> > > ---
> > > v2: Changing subject line to fit earlier commits.
> > > 
> > > Trying to fix a NULL BUG in the NVMe MSIX implementation I stumbled upon 
> > > this code,
> > > which ironically was what my last MSI patch resulted into.
> > > 
> > > I don't see any reason why this logic was change, and it did not break 
> > > anything
> > > correcting the logic.
> > > 
> > > CC xen-devel since it very much relates to Xen kernel (via 
> > > pci_msi_ignore_mask).
> > > ---
> > > 
> > > diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
> > > index c19c7ca58186..146e7b9a01cc 100644
> > > --- a/drivers/pci/msi/msi.c
> > > +++ b/drivers/pci/msi/msi.c
> > > @@ -526,7 +526,7 @@ static int msix_setup_msi_descs(struct pci_dev *dev, 
> > > void __iomem *base,
> > >   desc.pci.msi_attrib.can_mask = !pci_msi_ignore_mask &&
> > >  
> > > !desc.pci.msi_attrib.is_virtual;
> > > - if (!desc.pci.msi_attrib.can_mask) {
> > > + if (desc.pci.msi_attrib.can_mask) {
> > >   addr = pci_msix_desc_addr(&desc);
> > >   desc.pci.msix_ctrl = readl(addr + 
> > > PCI_MSIX_ENTRY_VECTOR_CTRL);
> > >   }
> > > 
> 
> Reviewed-by: Jason Gunthorpe 
> 
> Bjorn, please take it?

Thanks for the ping.  Since 71020a3c0dff4 is by Thomas, and he merged
that along with a whole series of MSI work, I think I probably
expected him to take care of this.

This looks like a simple typo, so I think the commit log should be
reworded along that line, e.g., something like:

  71020a3c0dff4 ("PCI/MSI: Use msi_add_msi_desc()") inadvertently
  reversed the sense of "msi_attrib.can_mask" in one use:

- if (entry->pci.msi_attrib.can_mask) {
- addr = pci_msix_desc_addr(entry);
- entry->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+ if (!desc.pci.msi_attrib.can_mask) {
+ addr = pci_msix_desc_addr(&desc);
+ desc.pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);

  Restore the original test.

Thomas, do you want to take this?  I'm happy to merge it, but would
like your reviewed-by or ack first.

Bjorn



Re: [Xen-devel] [PATCH V3 1/2] Drivers/PCI: Export pcie_has_flr() interface

2017-12-08 Thread Bjorn Helgaas
On Thu, Dec 07, 2017 at 05:21:44PM -0500, Govinda Tatti wrote:
> This patch exports pcie_has_flr() and it is being used by Xen pciback
> driver to reset (flr/slot/bus) PCI devices based on 'reset' SysFS
> attribute.
> 
> Signed-off-by: Govinda Tatti 
> ---
> v3: -New
> 
>  drivers/pci/pci.c   | 3 ++-
>  include/linux/pci.h | 1 +
>  2 files changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 6078dfc..499e922 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -3872,7 +3872,7 @@ static void pci_flr_wait(struct pci_dev *dev)
>   * Returns true if the device advertises support for PCIe function level
>   * resets.
>   */
> -static bool pcie_has_flr(struct pci_dev *dev)
> +bool pcie_has_flr(struct pci_dev *dev)
>  {
>   u32 cap;
>  
> @@ -3882,6 +3882,7 @@ static bool pcie_has_flr(struct pci_dev *dev)
>   pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
>   return cap & PCI_EXP_DEVCAP_FLR;
>  }
> +EXPORT_SYMBOL_GPL(pcie_has_flr);

I'd rather change pcie_flr() so you could *always* call it, and it
would return 0, -ENOTTY, or whatever, based on whether FLR is
supported.  Is that feasible?

I don't like the "Can I do this? Ok, do this" style of interfaces.
It's racy (not really applicable in this case) and seems clunky.

>  /**
>   * pcie_flr - initiate a PCIe function level reset
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index d16a7c0..44bf2b5 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1089,6 +1089,7 @@ int pcie_get_mps(struct pci_dev *dev);
>  int pcie_set_mps(struct pci_dev *dev, int mps);
>  int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed,
> enum pcie_link_width *width);
> +bool pcie_has_flr(struct pci_dev *dev);
>  void pcie_flr(struct pci_dev *dev);
>  int __pci_reset_function(struct pci_dev *dev);
>  int __pci_reset_function_locked(struct pci_dev *dev);
> -- 
> 2.9.5
> 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V3 1/2] Drivers/PCI: Export pcie_has_flr() interface

2017-12-11 Thread Bjorn Helgaas
On Mon, Dec 11, 2017 at 06:29:29PM -0600, Govinda Tatti wrote:
> 
> Thanks Bjorn for your review comments. Please see below for my comments.
> 
> On 12/8/2017 2:24 PM, Bjorn Helgaas wrote:
> >On Thu, Dec 07, 2017 at 05:21:44PM -0500, Govinda Tatti wrote:
> >>This patch exports pcie_has_flr() and it is being used by Xen pciback
> >>driver to reset (flr/slot/bus) PCI devices based on 'reset' SysFS
> >>attribute.
> >>
> >>Signed-off-by: Govinda Tatti 
> >>---
> >>v3: -New
> >>
> >>  drivers/pci/pci.c   | 3 ++-
> >>  include/linux/pci.h | 1 +
> >>  2 files changed, 3 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> >>index 6078dfc..499e922 100644
> >>--- a/drivers/pci/pci.c
> >>+++ b/drivers/pci/pci.c
> >>@@ -3872,7 +3872,7 @@ static void pci_flr_wait(struct pci_dev *dev)
> >>   * Returns true if the device advertises support for PCIe function level
> >>   * resets.
> >>   */
> >>-static bool pcie_has_flr(struct pci_dev *dev)
> >>+bool pcie_has_flr(struct pci_dev *dev)
> >>  {
> >>u32 cap;
> >>@@ -3882,6 +3882,7 @@ static bool pcie_has_flr(struct pci_dev *dev)
> >>pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
> >>return cap & PCI_EXP_DEVCAP_FLR;
> >>  }
> >>+EXPORT_SYMBOL_GPL(pcie_has_flr);
> >I'd rather change pcie_flr() so you could *always* call it, and it
> >would return 0, -ENOTTY, or whatever, based on whether FLR is
> >supported.  Is that feasible?
> Sure, I will add pcie_has_flr() logic inside pcie_flr() and return
> appropriate
> values as suggested by you. Do we still want to retain pcie_has_flr() and
> its usage inside pci.c?.Otherwise, I will remove it and do required cleanup.

If you can restructure the code and remove pcie_has_flr() while
retaining the existing behavior of its callers, that would be great.

Bjorn

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] x86/PCI: limit the size of the 64bit BAR to 256GB

2017-12-12 Thread Bjorn Helgaas
[+cc Boris, Juergen, xen-devel]

On Mon, Dec 11, 2017 at 04:04:52PM +0100, Christian König wrote:
> Xen hides a bit of system memory from the OS for its own purpose by
> intercepting e820. This memory is unfortunately not reported as
> reserved, but rather completely invisible.
> 
> Avoid this address space collision and possible similar problems by
> limiting the size of the newly allocated root hub window to 256GB which
> should be sufficient for the short term.

It sounds like Boris is working on a more complete fix, so I'm going
to drop this for now.  This changelog includes a few more details, but
I think it makes implicit assumptions about where memory and holes
are and how big they are, and it's still not clear why 256GB is the
right number.  Is it connected to the expected size of the BAR, or
related somehow to the size of the hole?

If we need this as a short-term workaround, we can do that, but I
would like to include a reference to f5775e0b6116 ("x86/xen: discard
RAM regions above the maximum reservation") and somehow make what's
going on here a little more explicit.

> Signed-off-by: Christian König 
> ---
>  arch/x86/pci/fixup.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
> index 8f86060f5cf6..ed8bc6ab0573 100644
> --- a/arch/x86/pci/fixup.c
> +++ b/arch/x86/pci/fixup.c
> @@ -702,7 +702,7 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
>   res->name = "PCI Bus :00";
>   res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
>   IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
> - res->start = 0x1ull;
> + res->start = 0xbdull;
>   res->end = 0xfdull - 1;
>  
>   /* Just grab the free area behind system memory for this */
> -- 
> 2.11.0
> 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V3 1/2] Drivers/PCI: Export pcie_has_flr() interface

2017-12-13 Thread Bjorn Helgaas
[+cc Christoph]

On Wed, Dec 13, 2017 at 02:46:57PM -0600, Govinda Tatti wrote:
> 
> -static bool pcie_has_flr(struct pci_dev *dev)
> +bool pcie_has_flr(struct pci_dev *dev)
>   {
>   u32 cap;
> @@ -3882,6 +3882,7 @@ static bool pcie_has_flr(struct pci_dev *dev)
>   pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
>   return cap & PCI_EXP_DEVCAP_FLR;
>   }
> +EXPORT_SYMBOL_GPL(pcie_has_flr);
> >>>I'd rather change pcie_flr() so you could *always* call it, and it
> >>>would return 0, -ENOTTY, or whatever, based on whether FLR is
> >>>supported.  Is that feasible?
> >>Sure, I will add pcie_has_flr() logic inside pcie_flr() and return
> >>appropriate
> >>values as suggested by you. Do we still want to retain pcie_has_flr() and
> >>its usage inside pci.c?.Otherwise, I will remove it and do required cleanup.
> >If you can restructure the code and remove pcie_has_flr() while
> >retaining the existing behavior of its callers, that would be great.
> I checked the current usage of pcie_has_flr() and pcie_flr(). I have
> a couple
> of questions or need some clarification.
> 
> 1. pcie_has_flr() usage inside pci_probe_reset_function().
> 
>    This function is only calling pcie_has_flr() but not pcie_flr().
>    Rest of the code is trying to do specific type of reset except
> pcie_flr().
> 
>     rc = pci_dev_specific_reset(dev, 1);
>     if (rc != -ENOTTY)
>     return rc;
>     if (pcie_has_flr(dev))
>     return 0;
>     rc = pci_af_flr(dev, 1);
>     if (rc != -ENOTTY)
>     return rc;
> 
>    In other-words, I can remove usage of pcie_has_flr() in all other places
>    in pci.c except in above function.

I think we should keep the EXPORT_SYMBOL_GPL() part of a60a2b73ba69
("PCI: Export pcie_flr()"), but revert the restructuring part.

Prior to a60a2b73ba69, we had

  int pcie_flr(struct pci_dev *dev, int probe);

like all the other reset methods.  AFAICT, the addition of
pcie_has_flr() was to optimize the path slightly because when drivers
call pcie_flr(), they should already know that their hardware supports
FLR.  But I don't think that optimization is worth the extra code
complexity.  If we do need to optimize it, we can check this in the
core during enumeration and set PCI_DEV_FLAGS_NO_FLR_RESET
accordingly.

Christoph, chime in if I'm missing something here.

> 2. W.r.t pcie_flr(), I am planning to return error code. Currently,
> the following
>    file/modules are calling this function. My plan is to add a check
> for return
>    code and print a WANRING message if return code is NON-ZERO. I
> hope this is
>    sufficient for this patch.
> 
>    drivers/crypto/qat/qat_common/adf_aer.c
>    drivers/infiniband/hw/hfi1/chip.c (2 places)
>    drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
>    drivers/net/ethernet/intel/ixgbe/ixgbe_main.c (2 places)
>    drivers/pci/quirks.c (2 places)

Checking the return code is probably overkill, since pcie_flr() is
void and returns no errors now.  The only point of the return value is
to tell whether the device supports FLR.  If we call it with "probe ==
0" there's no useful error to return.

Bjorn

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V3 1/2] Drivers/PCI: Export pcie_has_flr() interface

2017-12-14 Thread Bjorn Helgaas
On Thu, Dec 14, 2017 at 04:52:06AM -0800, Christoph Hellwig wrote:
> On Wed, Dec 13, 2017 at 03:24:21PM -0600, Bjorn Helgaas wrote:
> > Prior to a60a2b73ba69, we had
> > 
> >   int pcie_flr(struct pci_dev *dev, int probe);
> > 
> > like all the other reset methods.  AFAICT, the addition of
> > pcie_has_flr() was to optimize the path slightly because when drivers
> > call pcie_flr(), they should already know that their hardware supports
> > FLR.  But I don't think that optimization is worth the extra code
> > complexity.  If we do need to optimize it, we can check this in the
> > core during enumeration and set PCI_DEV_FLAGS_NO_FLR_RESET
> > accordingly.
> > 
> > Christoph, chime in if I'm missing something here.
> 
> Didn't we just have that discussion in another thread a few days
> ago?

Probably, but I didn't have a clear picture of what you were suggesting.

> I think that the pcie_has_flr was a mistake in retrospective but I
> think the bool probe API was an even bigger mistake.  The only use
> of it is to hide the reset attribute in sysfs.  I'd much rather always
> have it and have it return EOPNOTSUPP if no reset method is supported.
> 
> I can send a patch for that if it sounds fine to you.

If you can get rid of the whole probe infrastructure, that sounds
good to me.

Bjorn

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH V3 1/2] Drivers/PCI: Export pcie_has_flr() interface

2017-12-15 Thread Bjorn Helgaas
[+cc Russell, Sinan, Herbert, Srikanth, Derek, Satanand, Felix, Raghu]

On Fri, Dec 15, 2017 at 09:48:02AM -0600, Govinda Tatti wrote:
> On 12/13/2017 3:24 PM, Bjorn Helgaas wrote:
> >On Wed, Dec 13, 2017 at 02:46:57PM -0600, Govinda Tatti wrote:

> >>>>>>-static bool pcie_has_flr(struct pci_dev *dev)
> >>>>>>+bool pcie_has_flr(struct pci_dev *dev)
> >>>>>>  {
> >>>>>>u32 cap;
> >>>>>>@@ -3882,6 +3882,7 @@ static bool pcie_has_flr(struct pci_dev *dev)
> >>>>>>pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
> >>>>>>return cap & PCI_EXP_DEVCAP_FLR;
> >>>>>>  }
> >>>>>>+EXPORT_SYMBOL_GPL(pcie_has_flr);

> >>>>>I'd rather change pcie_flr() so you could *always* call it, and
> >>>>>it would return 0, -ENOTTY, or whatever, based on whether FLR
> >>>>>is supported.  Is that feasible?

> >>>>Sure, I will add pcie_has_flr() logic inside pcie_flr() and
> >>>>return appropriate values as suggested by you. Do we still want
> >>>>to retain pcie_has_flr() and its usage inside pci.c?.Otherwise,
> >>>>I will remove it and do required cleanup.

> >>>If you can restructure the code and remove pcie_has_flr() while
> >>>retaining the existing behavior of its callers, that would be
> >>>great.

> >>I checked the current usage of pcie_has_flr() and pcie_flr(). I
> >>have a couple of questions or need some clarification.
> >>
> >>1. pcie_has_flr() usage inside pci_probe_reset_function().
> >>
> >>    This function is only calling pcie_has_flr() but not pcie_flr().
> >>    Rest of the code is trying to do specific type of reset except
> >>pcie_flr().
> >>
> >>     rc = pci_dev_specific_reset(dev, 1);
> >>     if (rc != -ENOTTY)
> >>     return rc;
> >>     if (pcie_has_flr(dev))
> >>     return 0;
> >>     rc = pci_af_flr(dev, 1);
> >>     if (rc != -ENOTTY)
> >>     return rc;
> >>
> >>    In other-words, I can remove usage of pcie_has_flr() in all
> >>other places in pci.c except in above function.

> >I think we should keep the EXPORT_SYMBOL_GPL() part of a60a2b73ba69
> >("PCI: Export pcie_flr()"), but revert the restructuring part.
> >
> >Prior to a60a2b73ba69, we had
> >
> >   int pcie_flr(struct pci_dev *dev, int probe);
> >
> >like all the other reset methods.  AFAICT, the addition of
> >pcie_has_flr() was to optimize the path slightly because when
> >drivers call pcie_flr(), they should already know that their
> >hardware supports FLR.  But I don't think that optimization is
> >worth the extra code complexity.  If we do need to optimize it, we
> >can check this in the core during enumeration and set
> >PCI_DEV_FLAGS_NO_FLR_RESET accordingly.

> Not all code paths are aware of FLR capability and also, not
> using pcie_flr().  For example,
> 
> arch/powerpc/platforms/powernv/eeh-powernv.c

I assume you're referring to pnv_eeh_do_flr() (which contains code similar
to pcie_flr()) and pnv_eeh_do_af_flr() (which has code similar to
pci_af_flr()).  I agree that those are problematic and would ideally be
unified with the PCI core implementations.

Powerpc has quite a bit of this sort of special-case code for several
reasons, some just historical and some more concrete, so I don't know how
feasible this is.

> drivers/crypto/cavium/nitrox/nitrox_main.c

This has nitrox_reset_device(), which should definitely be replaced with a
core interface.

> drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c

And this has octeon_mbox_process_cmd() which also does a home-grown
PCI_EXP_DEVCTL_BCR_FLR request and also should definitely use a core
interface.

> So, we should consider one of these options.
> 
> - set PCI_DEV_FLAGS_NO_FLR_RESET if it is not supported.
> - pcie_flr() should return if it is not supported
> 
> If we modify pcie_flr() to return error codes, then we need to modify
> all existing modules that are calling this function.

Yes, of course.

> Please let me know your preference, so that I can move accordingly. Thanks.

I think Christoph volunteered to do some restructuring, but I don't
know his timeframe.  If you can, I would probably wait for that
because there's so much overlap here.

The other paths that use PCI_EXP_DEVCTL_BCR_FLR are definitely issues
and should be fixed, but again should wait for the revised pcie_flr()
interface.  And if they're not actually required for your Xen issue,
they sound like "nice to have" cleanups that will not gate your Xen
fixes.  I added this to my ever-growing list of cleanups to do.

Bjorn

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] PCI: Mark expected switch fall-throughs

2019-03-20 Thread Bjorn Helgaas
On Wed, Mar 20, 2019 at 01:27:15PM -0500, Gustavo A. R. Silva wrote:
> In preparation to enabling -Wimplicit-fallthrough, mark switch
> cases where we are expecting to fall through.

Does this fix all the remaining cases in drivers/pci?  I'd like to fix
them all at once.

What's the best way to watch for new warnings being added?  I fiddled
with "make W=2" etc but it didn't seem useful.  Does the 0-day robot
warn when -Wimplicit-fallthrough warnings are added?

Bjorn

> This patch fixes the following warnings:
> 
> drivers/pci/proc.c: In function ‘proc_bus_pci_ioctl’:
> drivers/pci/proc.c:216:6: warning: this statement may fall through 
> [-Wimplicit-fallthrough=]
>if (arch_can_pci_mmap_wc()) {
>   ^
> drivers/pci/proc.c:225:2: note: here
>   default:
>   ^~~
> 
> drivers/pci/xen-pcifront.c: In function ‘pcifront_backend_changed’:
> drivers/pci/xen-pcifront.c:1105:6: warning: this statement may fall through 
> [-Wimplicit-fallthrough=]
>if (xdev->state == XenbusStateClosed)
>   ^
> drivers/pci/xen-pcifront.c:1108:2: note: here
>   case XenbusStateClosing:
>   ^~~~
> 
> Notice that, in this particular case, the /* fall through */
> comment is placed at the very bottom of the case statement,
> which is what GCC is expecting to find.
> 
> Warning level 3 was used: -Wimplicit-fallthrough=3
> 
> This patch is part of the ongoing efforts to enable
> -Wimplicit-fallthrough.
> 
> Signed-off-by: Gustavo A. R. Silva 
> ---
>  drivers/pci/proc.c | 1 +
>  drivers/pci/xen-pcifront.c | 2 +-
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
> index 6fa1627ce08d..445b51db75b0 100644
> --- a/drivers/pci/proc.c
> +++ b/drivers/pci/proc.c
> @@ -222,6 +222,7 @@ static long proc_bus_pci_ioctl(struct file *file, 
> unsigned int cmd,
>   }
>   /* If arch decided it can't, fall through... */
>  #endif /* HAVE_PCI_MMAP */
> + /* fall through */
>   default:
>   ret = -EINVAL;
>   break;
> diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
> index eba6e33147a2..14cf0f41ecf0 100644
> --- a/drivers/pci/xen-pcifront.c
> +++ b/drivers/pci/xen-pcifront.c
> @@ -1104,7 +1104,7 @@ static void __ref pcifront_backend_changed(struct 
> xenbus_device *xdev,
>   case XenbusStateClosed:
>   if (xdev->state == XenbusStateClosed)
>   break;
> - /* Missed the backend's CLOSING state -- fallthrough */
> + /* fall through - Missed the backend's CLOSING state. */
>   case XenbusStateClosing:
>   dev_warn(&xdev->dev, "backend going away!\n");
>   pcifront_try_disconnect(pdev);
> -- 
> 2.21.0
> 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH] PCI: Mark expected switch fall-throughs

2019-03-20 Thread Bjorn Helgaas
On Wed, Mar 20, 2019 at 01:27:15PM -0500, Gustavo A. R. Silva wrote:
> In preparation to enabling -Wimplicit-fallthrough, mark switch
> cases where we are expecting to fall through.
> 
> This patch fixes the following warnings:
> 
> drivers/pci/proc.c: In function ‘proc_bus_pci_ioctl’:
> drivers/pci/proc.c:216:6: warning: this statement may fall through 
> [-Wimplicit-fallthrough=]
>if (arch_can_pci_mmap_wc()) {
>   ^
> drivers/pci/proc.c:225:2: note: here
>   default:
>   ^~~
> 
> drivers/pci/xen-pcifront.c: In function ‘pcifront_backend_changed’:
> drivers/pci/xen-pcifront.c:1105:6: warning: this statement may fall through 
> [-Wimplicit-fallthrough=]
>if (xdev->state == XenbusStateClosed)
>   ^
> drivers/pci/xen-pcifront.c:1108:2: note: here
>   case XenbusStateClosing:
>   ^~~~
> 
> Notice that, in this particular case, the /* fall through */
> comment is placed at the very bottom of the case statement,
> which is what GCC is expecting to find.
> 
> Warning level 3 was used: -Wimplicit-fallthrough=3
> 
> This patch is part of the ongoing efforts to enable
> -Wimplicit-fallthrough.
> 
> Signed-off-by: Gustavo A. R. Silva 

Applied to pci/misc for v5.2, thanks!

> ---
>  drivers/pci/proc.c | 1 +
>  drivers/pci/xen-pcifront.c | 2 +-
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
> index 6fa1627ce08d..445b51db75b0 100644
> --- a/drivers/pci/proc.c
> +++ b/drivers/pci/proc.c
> @@ -222,6 +222,7 @@ static long proc_bus_pci_ioctl(struct file *file, 
> unsigned int cmd,
>   }
>   /* If arch decided it can't, fall through... */
>  #endif /* HAVE_PCI_MMAP */
> + /* fall through */
>   default:
>   ret = -EINVAL;
>   break;
> diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
> index eba6e33147a2..14cf0f41ecf0 100644
> --- a/drivers/pci/xen-pcifront.c
> +++ b/drivers/pci/xen-pcifront.c
> @@ -1104,7 +1104,7 @@ static void __ref pcifront_backend_changed(struct 
> xenbus_device *xdev,
>   case XenbusStateClosed:
>   if (xdev->state == XenbusStateClosed)
>   break;
> - /* Missed the backend's CLOSING state -- fallthrough */
> + /* fall through - Missed the backend's CLOSING state. */
>   case XenbusStateClosing:
>   dev_warn(&xdev->dev, "backend going away!\n");
>   pcifront_try_disconnect(pdev);
> -- 
> 2.21.0
> 

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v2 1/1] treewide: Switch printk users from %pf and %pF to %ps and %pS, respectively

2019-03-25 Thread Bjorn Helgaas
On Mon, Mar 25, 2019 at 09:32:28PM +0200, Sakari Ailus wrote:
> %pF and %pf are functionally equivalent to %pS and %ps conversion
> specifiers. The former are deprecated, therefore switch the current users
> to use the preferred variant.
> 
> The changes have been produced by the following command:
> 
>   git grep -l '%p[fF]' | grep -v '^\(tools\|Documentation\)/' | \
>   while read i; do perl -i -pe 's/%pf/%ps/g; s/%pF/%pS/g;' $i; done
> 
> And verifying the result.
> 
> Signed-off-by: Sakari Ailus 
> Acked-by: David Sterba  (for btrfs)
> Acked-by: Mike Rapoport  (for mm/memblock.c)
> Acked-by: Rafael J. Wysocki 

Acked-by: Bjorn Helgaas  (for drivers/pci)

Thanks a lot for cleaning this up.  This has been annoying for a long time.

___
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [PATCH v2] PCI/MSI: Correct use of can_mask in msi_add_msi_desc()

2022-08-26 Thread Bjorn Helgaas
On Mon, Feb 14, 2022 at 11:07:47AM +0100, Josef Johansson wrote:
> From: Josef Johansson 
> 
> PCI/MSI: Correct use of can_mask in msi_add_msi_desc()
> 
> Commit 71020a3c0dff4 ("PCI/MSI: Use msi_add_msi_desc()") modifies
> the logic of checking msi_attrib.can_mask, without any reason.
> 
> This commits restores that logic.
> 
> Fixes: 71020a3c0dff4 ("PCI/MSI: Use msi_add_msi_desc()")
> Signed-off-by: Josef Johansson 

Applied to pci/misc for v6.1 with commit log below, thanks!

  PCI/MSI: Correct 'can_mask' test in msi_add_msi_desc()

  71020a3c0dff4 ("PCI/MSI: Use msi_add_msi_desc()") inadvertently reversed
  the sense of "msi_attrib.can_mask" in one use:

- if (entry->pci.msi_attrib.can_mask) {
- addr = pci_msix_desc_addr(entry);
- entry->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+ if (!desc.pci.msi_attrib.can_mask) {
+ addr = pci_msix_desc_addr(&desc);
+ desc.pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);

  Restore the original test.

> ---
> v2: Changing subject line to fit earlier commits.
> 
> Trying to fix a NULL BUG in the NVMe MSIX implementation I stumbled upon this 
> code,
> which ironically was what my last MSI patch resulted into.
> 
> I don't see any reason why this logic was change, and it did not break 
> anything
> correcting the logic.
> 
> CC xen-devel since it very much relates to Xen kernel (via 
> pci_msi_ignore_mask).
> ---
> 
> diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
> index c19c7ca58186..146e7b9a01cc 100644
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -526,7 +526,7 @@ static int msix_setup_msi_descs(struct pci_dev *dev, void 
> __iomem *base,
>   desc.pci.msi_attrib.can_mask = !pci_msi_ignore_mask &&
>  !desc.pci.msi_attrib.is_virtual;
>  
> - if (!desc.pci.msi_attrib.can_mask) {
> + if (desc.pci.msi_attrib.can_mask) {
>   addr = pci_msix_desc_addr(&desc);
>   desc.pci.msix_ctrl = readl(addr + 
> PCI_MSIX_ENTRY_VECTOR_CTRL);
>   }
> 
> --
> 2.31.1
> 
> 



Re: [PATCH] xen-pcifront: Handle missed Connected state

2022-09-02 Thread Bjorn Helgaas
The conventional style for subject (from "git log --oneline") is:

  xen/pcifront: Handle ...

On Mon, Aug 29, 2022 at 11:15:36AM -0400, Jason Andryuk wrote:
> An HVM guest with linux stubdom and 2 PCI devices failed to start as

"stubdom" might be handy shorthand in the Xen world, but I think
it would be nice to consistently spell out "stubdomain" since you use
both forms randomly in this commit log and newbies like me have to
wonder whether they're the same or different.

> libxl timed out waiting for the PCI devices to be added.  It happens
> intermittently but with some regularity.  libxl wrote the two xenstore
> entries for the devices, but then timed out waiting for backend state 4
> (Connected) - the state stayed at 7 (Reconfiguring).  (PCI passthrough
> to an HVM with stubdomain is PV passthrough to the stubdomain and then
> HVM passthrough with the QEMU inside the stubdomain.)
> 
> The stubdom kernel never printed "pcifront pci-0: Installing PCI
> frontend", so it seems to have missed state 4 which would have
> called pcifront_try_connect -> pcifront_connect_and_init_dma

Add "()" after function names for clarity.

> Have pcifront_detach_devices special-case state Initialised and call
> pcifront_connect_and_init_dma.  Don't use pcifront_try_connect because
> that sets the xenbus state which may throw off the backend.  After
> connecting, skip the remainder of detach_devices since none have been
> initialized yet.  When the backend switches to Reconfigured,
> pcifront_attach_devices will pick them up again.

Bjorn



Re: [PATCH v6 1/4] PCI: Introduce pci_dev_for_each_resource()

2023-03-22 Thread Bjorn Helgaas
Hi Andy and Mika,

I really like the improvements here.  They make the code read much
better.

On Mon, Mar 20, 2023 at 03:16:30PM +0200, Andy Shevchenko wrote:
> From: Mika Westerberg 
> ...

>  static void fixup_winbond_82c105(struct pci_dev* dev)
>  {
> - int i;
> + struct resource *r;
>   unsigned int reg;
>  
>   if (!machine_is(pseries))
> @@ -251,14 +251,14 @@ static void fixup_winbond_82c105(struct pci_dev* dev)
>   /* Enable LEGIRQ to use INTC instead of ISA interrupts */
>   pci_write_config_dword(dev, 0x40, reg | (1<<11));
>  
> - for (i = 0; i < DEVICE_COUNT_RESOURCE; ++i) {
> + pci_dev_for_each_resource_p(dev, r) {
>   /* zap the 2nd function of the winbond chip */
> - if (dev->resource[i].flags & IORESOURCE_IO
> - && dev->bus->number == 0 && dev->devfn == 0x81)
> - dev->resource[i].flags &= ~IORESOURCE_IO;
> - if (dev->resource[i].start == 0 && dev->resource[i].end) {
> - dev->resource[i].flags = 0;
> - dev->resource[i].end = 0;
> + if (dev->bus->number == 0 && dev->devfn == 0x81 &&
> + r->flags & IORESOURCE_IO)

This is a nice literal conversion, but it's kind of lame to test
bus->number and devfn *inside* the loop here, since they can't change
inside the loop.

> + r->flags &= ~IORESOURCE_IO;
> + if (r->start == 0 && r->end) {
> + r->flags = 0;
> + r->end = 0;
>   }
>   }

>  #define pci_resource_len(dev,bar) \
>   ((pci_resource_end((dev), (bar)) == 0) ? 0 :\
>   \
> -  (pci_resource_end((dev), (bar)) -  \
> -   pci_resource_start((dev), (bar)) + 1))
> +  resource_size(pci_resource_n((dev), (bar

I like this change, but it's unrelated to pci_dev_for_each_resource()
and unmentioned in the commit log.

> +#define __pci_dev_for_each_resource(dev, res, __i, vartype)  \
> + for (vartype __i = 0;   \
> +  res = pci_resource_n(dev, __i), __i < PCI_NUM_RESOURCES;   \
> +  __i++)
> +
> +#define pci_dev_for_each_resource(dev, res, i)   
> \
> +   __pci_dev_for_each_resource(dev, res, i, )
> +
> +#define pci_dev_for_each_resource_p(dev, res)
> \
> + __pci_dev_for_each_resource(dev, res, __i, unsigned int)

This series converts many cases to drop the iterator variable ("i"),
which is fantastic.

Several of the remaining places need the iterator variable only to
call pci_claim_resource(), which could be converted to take a "struct
resource *" directly without much trouble.

We don't have to do that pci_claim_resource() conversion now, but
since we're converging on the "(dev, res)" style, I think we should
reverse the names so we have something like:

  pci_dev_for_each_resource(dev, res)
  pci_dev_for_each_resource_idx(dev, res, i)

Not sure __pci_dev_for_each_resource() is worthwhile since it only
avoids repeating that single "for" statement, and passing in "vartype"
(sometimes empty to implicitly avoid the declaration) is a little
complicated to read.  I think it'd be easier to read like this:

  #define pci_dev_for_each_resource(dev, res)  \
for (unsigned int __i = 0; \
 res = pci_resource_n(dev, __i), __i < PCI_NUM_RESOURCES;  \
 __i++)

  #define pci_dev_for_each_resource_idx(dev, res, idx) \
for (idx = 0;  \
 res = pci_resource_n(dev, idx), idx < PCI_NUM_RESOURCES;  \
 idx++)

Bjorn



Re: [PATCH v6 2/4] PCI: Split pci_bus_for_each_resource_p() out of pci_bus_for_each_resource()

2023-03-22 Thread Bjorn Helgaas
On Mon, Mar 20, 2023 at 03:16:31PM +0200, Andy Shevchenko wrote:
> ...

> -#define pci_bus_for_each_resource(bus, res, i)   
> \
> - for (i = 0; \
> - (res = pci_bus_resource_n(bus, i)) || i < PCI_BRIDGE_RESOURCE_NUM; \
> -  i++)
> +#define __pci_bus_for_each_resource(bus, res, __i, vartype)  
> \
> + for (vartype __i = 0;   
> \
> +  res = pci_bus_resource_n(bus, __i), __i < PCI_BRIDGE_RESOURCE_NUM; 
> \
> +  __i++)
> +
> +#define pci_bus_for_each_resource(bus, res, i)   
> \
> + __pci_bus_for_each_resource(bus, res, i, )
> +
> +#define pci_bus_for_each_resource_p(bus, res)
> \
> + __pci_bus_for_each_resource(bus, res, __i, unsigned int)

I like these changes a lot, too!

Same comments about _p vs _idx and __pci_bus_for_each_resource(...,
vartype).

Also would prefer 80 char max instead of 81.



Re: [PATCH v6 1/4] PCI: Introduce pci_dev_for_each_resource()

2023-03-23 Thread Bjorn Helgaas
On Thu, Mar 23, 2023 at 04:30:01PM +0200, Andy Shevchenko wrote:
> On Wed, Mar 22, 2023 at 02:28:04PM -0500, Bjorn Helgaas wrote:
> > On Mon, Mar 20, 2023 at 03:16:30PM +0200, Andy Shevchenko wrote:
> ...
> 
> > > + pci_dev_for_each_resource_p(dev, r) {
> > >   /* zap the 2nd function of the winbond chip */
> > > - if (dev->resource[i].flags & IORESOURCE_IO
> > > - && dev->bus->number == 0 && dev->devfn == 0x81)
> > > - dev->resource[i].flags &= ~IORESOURCE_IO;
> > > - if (dev->resource[i].start == 0 && dev->resource[i].end) {
> > > - dev->resource[i].flags = 0;
> > > - dev->resource[i].end = 0;
> > > + if (dev->bus->number == 0 && dev->devfn == 0x81 &&
> > > + r->flags & IORESOURCE_IO)
> > 
> > This is a nice literal conversion, but it's kind of lame to test
> > bus->number and devfn *inside* the loop here, since they can't change
> > inside the loop.
> 
> Hmm... why are you asking me, even if I may agree on that? It's
> in the original code and out of scope of this series.

Yeah, I don't think it would be *unreasonable* to clean this up at the
same time so the maintainers can look at both at the same time (this
is arch/powerpc/platforms/pseries/pci.c, so Michael, et al), but no
need for you to do anything, certainly.  I can post a follow-up patch.

> > but
> > since we're converging on the "(dev, res)" style, I think we should
> > reverse the names so we have something like:
> > 
> >   pci_dev_for_each_resource(dev, res)
> >   pci_dev_for_each_resource_idx(dev, res, i)
> 
> Wouldn't it be more churn, including pci_bus_for_each_resource() correction?

Yes, it definitely is a little more churn because we already have
pci_bus_for_each_resource() that would have to be changed.

I poked around looking for similar patterns elsewhere with:

  git grep "#define.*for_each_.*_p("
  git grep "#define.*for_each_.*_idx("

I didn't find any other "_p" iterators and just a few "_idx" ones, so
my hope is to follow what little precedent there is, as well as
converge on the basic "*_for_each_resource()" iterators and remove the
"_idx()" versions over time by doing things like the
pci_claim_resource() change.

What do you think?  If it seems like excessive churn, we can do it
as-is and still try to reduce the use of the index variable over time.

Bjorn



Re: [PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-04-04 Thread Bjorn Helgaas
On Thu, Mar 30, 2023 at 07:24:27PM +0300, Andy Shevchenko wrote:
> Provide two new helper macros to iterate over PCI device resources and
> convert users.
> 
> Looking at it, refactor existing pci_bus_for_each_resource() and convert
> users accordingly.
> 
> Note, the amount of lines grew due to the documentation update.
> 
> Changelog v8:
> - fixed issue with pci_bus_for_each_resource() macro (LKP)
> - due to above added a new patch to document how it works
> - moved the last patch to be #2 (Philippe)
> - added tags (Philippe)
> 
> Changelog v7:
> - made both macros to share same name (Bjorn)

I didn't actually request the same name for both; I would have had no
idea how to even do that :)

v6 had:

  pci_dev_for_each_resource_p(dev, res)
  pci_dev_for_each_resource(dev, res, i)

and I suggested:

  pci_dev_for_each_resource(dev, res)
  pci_dev_for_each_resource_idx(dev, res, i)

because that pattern is used elsewhere.  But you figured out how to do
it, and having one name is even better, so thanks for that extra work!

> - split out the pci_resource_n() conversion (Bjorn)
> 
> Changelog v6:
> - dropped unused variable in PPC code (LKP)
> 
> Changelog v5:
> - renamed loop variable to minimize the clash (Keith)
> - addressed smatch warning (Dan)
> - addressed 0-day bot findings (LKP)
> 
> Changelog v4:
> - rebased on top of v6.3-rc1
> - added tag (Krzysztof)
> 
> Changelog v3:
> - rebased on top of v2 by Mika, see above
> - added tag to pcmcia patch (Dominik)
> 
> Changelog v2:
> - refactor to have two macros
> - refactor existing pci_bus_for_each_resource() in the same way and
>   convert users
> 
> Andy Shevchenko (6):
>   kernel.h: Split out COUNT_ARGS() and CONCATENATE()
>   PCI: Introduce pci_resource_n()
>   PCI: Document pci_bus_for_each_resource() to avoid confusion
>   PCI: Allow pci_bus_for_each_resource() to take less arguments
>   EISA: Convert to use less arguments in pci_bus_for_each_resource()
>   pcmcia: Convert to use less arguments in pci_bus_for_each_resource()
> 
> Mika Westerberg (1):
>   PCI: Introduce pci_dev_for_each_resource()
> 
>  .clang-format |  1 +
>  arch/alpha/kernel/pci.c   |  5 +-
>  arch/arm/kernel/bios32.c  | 16 +++--
>  arch/arm/mach-dove/pcie.c | 10 ++--
>  arch/arm/mach-mv78xx0/pcie.c  | 10 ++--
>  arch/arm/mach-orion5x/pci.c   | 10 ++--
>  arch/mips/pci/ops-bcm63xx.c   |  8 +--
>  arch/mips/pci/pci-legacy.c|  3 +-
>  arch/powerpc/kernel/pci-common.c  | 21 +++
>  arch/powerpc/platforms/4xx/pci.c  |  8 +--
>  arch/powerpc/platforms/52xx/mpc52xx_pci.c |  5 +-
>  arch/powerpc/platforms/pseries/pci.c  | 16 ++---
>  arch/sh/drivers/pci/pcie-sh7786.c | 10 ++--
>  arch/sparc/kernel/leon_pci.c  |  5 +-
>  arch/sparc/kernel/pci.c   | 10 ++--
>  arch/sparc/kernel/pcic.c  |  5 +-
>  drivers/eisa/pci_eisa.c   |  4 +-
>  drivers/pci/bus.c |  7 +--
>  drivers/pci/hotplug/shpchp_sysfs.c|  8 +--
>  drivers/pci/pci.c |  3 +-
>  drivers/pci/probe.c   |  2 +-
>  drivers/pci/remove.c  |  5 +-
>  drivers/pci/setup-bus.c   | 37 +---
>  drivers/pci/setup-res.c   |  4 +-
>  drivers/pci/vgaarb.c  | 17 ++
>  drivers/pci/xen-pcifront.c|  4 +-
>  drivers/pcmcia/rsrc_nonstatic.c   |  9 +--
>  drivers/pcmcia/yenta_socket.c |  3 +-
>  drivers/pnp/quirks.c  | 29 -
>  include/linux/args.h  | 13 
>  include/linux/kernel.h|  8 +--
>  include/linux/pci.h   | 72 +++
>  32 files changed, 190 insertions(+), 178 deletions(-)
>  create mode 100644 include/linux/args.h

Applied 2-7 to pci/resource for v6.4, thanks, I really like this!

I omitted

  [1/7] kernel.h: Split out COUNT_ARGS() and CONCATENATE()"

only because it's not essential to this series and has only a trivial
one-line impact on include/linux/pci.h.

Bjorn



Re: [PATCH v8 5/7] PCI: Allow pci_bus_for_each_resource() to take less arguments

2023-04-05 Thread Bjorn Helgaas
On Wed, Apr 05, 2023 at 02:50:47PM +0300, Andy Shevchenko wrote:
> On Thu, Mar 30, 2023 at 07:24:32PM +0300, Andy Shevchenko wrote:
> > Refactor pci_bus_for_each_resource() in the same way as it's done in
> > pci_dev_for_each_resource() case. This will allow to hide iterator
> > inside the loop, where it's not used otherwise.
> > 
> > No functional changes intended.
> 
> Bjorn, this has wrong author in your tree:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/commit/?h=resource&id=46dbad19a59e0dd8f1e7065e5281345797fbb365

I botched it, sorry, should be fixed now.

Bjorn



Re: [PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-04-05 Thread Bjorn Helgaas
On Wed, Apr 05, 2023 at 11:28:27AM +0300, Andy Shevchenko wrote:
> On Tue, Apr 04, 2023 at 11:11:01AM -0500, Bjorn Helgaas wrote:
> > On Thu, Mar 30, 2023 at 07:24:27PM +0300, Andy Shevchenko wrote:
> > > Provide two new helper macros to iterate over PCI device resources and
> > > convert users.
> > > 
> > > Looking at it, refactor existing pci_bus_for_each_resource() and convert
> > > users accordingly.

> > Applied 2-7 to pci/resource for v6.4, thanks, I really like this!
> 
> Btw, can you actually drop patch 7, please?

Done.

> > I omitted
> > 
> >   [1/7] kernel.h: Split out COUNT_ARGS() and CONCATENATE()"
> > 
> > only because it's not essential to this series and has only a trivial
> > one-line impact on include/linux/pci.h.
> 
> I'm not sure I understood what exactly "essentiality" means to you, but
> I included that because it makes the split which can be used later by
> others and not including kernel.h in the header is the objective I want
> to achieve. Without this patch the achievement is going to be deferred.
> Yet, this, as you have noticed, allows to compile and use the macros in
> the rest of the patches.

I haven't followed the kernel.h splitting, and I try to avoid
incidental changes outside of the files I maintain, so I just wanted
to keep this series purely PCI and avoid any possible objections to a
new include file or discussion about how it should be done.



Re: [PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-05-09 Thread Bjorn Helgaas
On Tue, Apr 04, 2023 at 11:11:01AM -0500, Bjorn Helgaas wrote:
> On Thu, Mar 30, 2023 at 07:24:27PM +0300, Andy Shevchenko wrote:
> > Provide two new helper macros to iterate over PCI device resources and
> > convert users.

> Applied 2-7 to pci/resource for v6.4, thanks, I really like this!

This is 09cc90063240 ("PCI: Introduce pci_dev_for_each_resource()")
upstream now.

Coverity complains about each use, sample below from
drivers/pci/vgaarb.c.  I didn't investigate at all, so it might be a
false positive; just FYI.

  1. Condition screen_info.capabilities & (2U /* 1 << 1 */), taking 
true branch.
  556if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
  557base |= (u64)screen_info.ext_lfb_base << 32;
  558
  559limit = base + size;
  560
  561/* Does firmware framebuffer belong to us? */
  2. Condition __b < PCI_NUM_RESOURCES, taking true branch.
  3. Condition (r = &pdev->resource[__b]) , (__b < PCI_NUM_RESOURCES), 
taking true branch.
  6. Condition __b < PCI_NUM_RESOURCES, taking true branch.
  7. cond_at_most: Checking __b < PCI_NUM_RESOURCES implies that __b 
may be up to 16 on the true branch.
  8. Condition (r = &pdev->resource[__b]) , (__b < PCI_NUM_RESOURCES), 
taking true branch.
  11. incr: Incrementing __b. The value of __b may now be up to 17.
  12. alias: Assigning: r = &pdev->resource[__b]. r may now point to as 
high as element 17 of pdev->resource (which consists of 17 64-byte elements).
  13. Condition __b < PCI_NUM_RESOURCES, taking true branch.
  14. Condition (r = &pdev->resource[__b]) , (__b < PCI_NUM_RESOURCES), 
taking true branch.
  562pci_dev_for_each_resource(pdev, r) {
  4. Condition resource_type(r) != 512, taking true branch.
  9. Condition resource_type(r) != 512, taking true branch.

  CID 1529911 (#1 of 1): Out-of-bounds read (OVERRUN)
  15. overrun-local: Overrunning array of 1088 bytes at byte offset 1088 by 
dereferencing pointer r. [show details]
  563if (resource_type(r) != IORESOURCE_MEM)
  5. Continuing loop.
  10. Continuing loop.
  564continue;



Re: [PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-05-12 Thread Bjorn Helgaas
On Fri, May 12, 2023 at 01:56:29PM +0300, Andy Shevchenko wrote:
> On Tue, May 09, 2023 at 01:21:22PM -0500, Bjorn Helgaas wrote:
> > On Tue, Apr 04, 2023 at 11:11:01AM -0500, Bjorn Helgaas wrote:
> > > On Thu, Mar 30, 2023 at 07:24:27PM +0300, Andy Shevchenko wrote:
> > > > Provide two new helper macros to iterate over PCI device resources and
> > > > convert users.
> > 
> > > Applied 2-7 to pci/resource for v6.4, thanks, I really like this!
> > 
> > This is 09cc90063240 ("PCI: Introduce pci_dev_for_each_resource()")
> > upstream now.
> > 
> > Coverity complains about each use,
> 
> It needs more clarification here. Use of reduced variant of the
> macro or all of them? If the former one, then I can speculate that
> Coverity (famous for false positives) simply doesn't understand `for
> (type var; var ...)` code.

True, Coverity finds false positives.  It flagged every use in
drivers/pci and drivers/pnp.  It didn't mention the arch/alpha, arm,
mips, powerpc, sh, or sparc uses, but I think it just didn't look at
those.

It flagged both:

  pbus_size_iopci_dev_for_each_resource(dev, r)
  pbus_size_mem   pci_dev_for_each_resource(dev, r, i)

Here's a spreadsheet with a few more details (unfortunately I don't
know how to make it dump the actual line numbers or analysis like I
pasted below, so "pci_dev_for_each_resource" doesn't appear).  These
are mostly in the "Drivers-PCI" component.

https://docs.google.com/spreadsheets/d/1ohOJwxqXXoDUA0gwopgk-z-6ArLvhN7AZn4mIlDkHhQ/edit?usp=sharing

These particular reports are in the "High Impact Outstanding" tab.

> > sample below from
> > drivers/pci/vgaarb.c.  I didn't investigate at all, so it might be a
> > false positive; just FYI.
> > 
> >   1. Condition screen_info.capabilities & (2U /* 1 << 1 */), taking 
> > true branch.
> >   556if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
> >   557base |= (u64)screen_info.ext_lfb_base << 32;
> >   558
> >   559limit = base + size;
> >   560
> >   561/* Does firmware framebuffer belong to us? */
> >   2. Condition __b < PCI_NUM_RESOURCES, taking true branch.
> >   3. Condition (r = &pdev->resource[__b]) , (__b < PCI_NUM_RESOURCES), 
> > taking true branch.
> >   6. Condition __b < PCI_NUM_RESOURCES, taking true branch.
> >   7. cond_at_most: Checking __b < PCI_NUM_RESOURCES implies that __b 
> > may be up to 16 on the true branch.
> >   8. Condition (r = &pdev->resource[__b]) , (__b < PCI_NUM_RESOURCES), 
> > taking true branch.
> >   11. incr: Incrementing __b. The value of __b may now be up to 17.
> >   12. alias: Assigning: r = &pdev->resource[__b]. r may now point to as 
> > high as element 17 of pdev->resource (which consists of 17 64-byte 
> > elements).
> >   13. Condition __b < PCI_NUM_RESOURCES, taking true branch.
> >   14. Condition (r = &pdev->resource[__b]) , (__b < PCI_NUM_RESOURCES), 
> > taking true branch.
> >   562pci_dev_for_each_resource(pdev, r) {
> >   4. Condition resource_type(r) != 512, taking true branch.
> >   9. Condition resource_type(r) != 512, taking true branch.
> > 
> >   CID 1529911 (#1 of 1): Out-of-bounds read (OVERRUN)
> >   15. overrun-local: Overrunning array of 1088 bytes at byte offset 1088 by 
> > dereferencing pointer r. [show details]
> >   563if (resource_type(r) != IORESOURCE_MEM)
> >   5. Continuing loop.
> >   10. Continuing loop.
> >   564continue;
> 
> -- 
> With Best Regards,
> Andy Shevchenko
> 
> 



Re: [PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-05-30 Thread Bjorn Helgaas
On Fri, May 12, 2023 at 02:48:51PM -0500, Bjorn Helgaas wrote:
> On Fri, May 12, 2023 at 01:56:29PM +0300, Andy Shevchenko wrote:
> > On Tue, May 09, 2023 at 01:21:22PM -0500, Bjorn Helgaas wrote:
> > > On Tue, Apr 04, 2023 at 11:11:01AM -0500, Bjorn Helgaas wrote:
> > > > On Thu, Mar 30, 2023 at 07:24:27PM +0300, Andy Shevchenko wrote:
> > > > > Provide two new helper macros to iterate over PCI device resources and
> > > > > convert users.
> > > 
> > > > Applied 2-7 to pci/resource for v6.4, thanks, I really like this!
> > > 
> > > This is 09cc90063240 ("PCI: Introduce pci_dev_for_each_resource()")
> > > upstream now.
> > > 
> > > Coverity complains about each use,
> > 
> > It needs more clarification here. Use of reduced variant of the
> > macro or all of them? If the former one, then I can speculate that
> > Coverity (famous for false positives) simply doesn't understand `for
> > (type var; var ...)` code.
> 
> True, Coverity finds false positives.  It flagged every use in
> drivers/pci and drivers/pnp.  It didn't mention the arch/alpha, arm,
> mips, powerpc, sh, or sparc uses, but I think it just didn't look at
> those.
> 
> It flagged both:
> 
>   pbus_size_iopci_dev_for_each_resource(dev, r)
>   pbus_size_mem   pci_dev_for_each_resource(dev, r, i)
> 
> Here's a spreadsheet with a few more details (unfortunately I don't
> know how to make it dump the actual line numbers or analysis like I
> pasted below, so "pci_dev_for_each_resource" doesn't appear).  These
> are mostly in the "Drivers-PCI" component.
> 
> https://docs.google.com/spreadsheets/d/1ohOJwxqXXoDUA0gwopgk-z-6ArLvhN7AZn4mIlDkHhQ/edit?usp=sharing
> 
> These particular reports are in the "High Impact Outstanding" tab.

Where are we at?  Are we going to ignore this because some Coverity
reports are false positives?

Bjorn



Re: [PATCH v8 0/7] Add pci_dev_for_each_resource() helper and update users

2023-05-31 Thread Bjorn Helgaas
On Wed, May 31, 2023 at 08:48:35PM +0200, Jonas Gorski wrote:
> ...

> Looking at the code I understand where coverity is coming from:
> 
> #define __pci_dev_for_each_res0(dev, res, ...) \
>for (unsigned int __b = 0;  \
> res = pci_resource_n(dev, __b), __b < PCI_NUM_RESOURCES;   \
> __b++)
> 
>  res will be assigned before __b is checked for being less than
> PCI_NUM_RESOURCES, making it point to behind the array at the end of
> the last loop iteration.
> 
> Rewriting the test expression as
> 
> __b < PCI_NUM_RESOURCES && (res = pci_resource_n(dev, __b));
> 
> should avoid the (coverity) warning by making use of lazy evaluation.
> 
> It probably makes the code slightly less performant as res will now be
> checked for being not NULL (which will always be true), but I doubt it
> will be significant (or in any hot paths).

Thanks a lot for looking into this!  I think you're right, and I think
the rewritten expression is more logical as well.  Do you want to post
a patch for it?

Bjorn



Re: [RFC KERNEL PATCH v4 3/3] PCI/sysfs: Add gsi sysfs for pci_dev

2024-04-09 Thread Bjorn Helgaas
[+to Rafael]

On Mon, Apr 08, 2024 at 06:42:31AM +, Chen, Jiqian wrote:
> Hi Bjorn,
> It has been almost two months since we received your reply last time.
> This series are blocking on this patch, since there are patches on Xen and 
> Qemu side depending on it.
> Do you still have any confusion about this patch? Or do you have other 
> suggestions?
> If no, may I get your Reviewed-by?

  - This is ACPI-specific, but exposes /sys/.../gsi for all systems,
including non-ACPI systems.  I don't think we want that.

  - Do you care about similar Xen configurations on non-ACPI systems?
If so, maybe the commit log could mention how you learn about PCI
INTx routing on them in case there's some way to unify this in the
future.

  - Missing an update to Documentation/ABI/.

  - A nit: I asked about s/dumU/DomU/ in the commit log earlier,
haven't seen any response.

  - Commit log mentions "and for other potential scenarios."  It's
another nit, but unless you have another concrete use for this,
that phrase is meaningless hand waving and should be dropped.

  - A _PRT entry may refer directly to a GSI or to an interrupt link
device (PNP0C0F) that can be routed to one of several GSIs:

  ACPI: PCI Interrupt Link [LNKA] (IRQs 3 4 5 6 7 9 10 *11 12 14 15)
 
I don't think the kernel reconfigures interrupt links after
enumeration, but if they are reconfigured at run-time (via _SRS),
the cached GSI will be wrong.  I think setpnp could do this, but
that tool is dead.  So maybe this isn't a concern anymore, but I
*would* like to get Rafael's take on this.  If we don't care
enough, I think we should mention it in the commit log just in
case.

Bjorn



Re: [RFC KERNEL PATCH v4 3/3] PCI/sysfs: Add gsi sysfs for pci_dev

2024-01-29 Thread Bjorn Helgaas
On Thu, Jan 25, 2024 at 07:17:24AM +, Chen, Jiqian wrote:
> On 2024/1/24 00:02, Bjorn Helgaas wrote:
> > On Tue, Jan 23, 2024 at 10:13:52AM +, Chen, Jiqian wrote:
> >> On 2024/1/23 07:37, Bjorn Helgaas wrote:
> >>> On Fri, Jan 05, 2024 at 02:22:17PM +0800, Jiqian Chen wrote:
> >>>> There is a need for some scenarios to use gsi sysfs.
> >>>> For example, when xen passthrough a device to dumU, it will
> >>>> use gsi to map pirq, but currently userspace can't get gsi
> >>>> number.
> >>>> So, add gsi sysfs for that and for other potential scenarios.
> >> ...
> > 
> >>> I don't know enough about Xen to know why it needs the GSI in
> >>> userspace.  Is this passthrough brand new functionality that can't be
> >>> done today because we don't expose the GSI yet?

I assume this must be new functionality, i.e., this kind of
passthrough does not work today, right?

> >> has ACPI support and is responsible for detecting and controlling
> >> the hardware, also it performs privileged operations such as the
> >> creation of normal (unprivileged) domains DomUs. When we give to a
> >> DomU direct access to a device, we need also to route the physical
> >> interrupts to the DomU. In order to do so Xen needs to setup and map
> >> the interrupts appropriately.
> > 
> > What kernel interfaces are used for this setup and mapping?
>
> For passthrough devices, the setup and mapping of routing physical
> interrupts to DomU are done on Xen hypervisor side, hypervisor only
> need userspace to provide the GSI info, see Xen code:
> xc_physdev_map_pirq require GSI and then will call hypercall to pass
> GSI into hypervisor and then hypervisor will do the mapping and
> routing, kernel doesn't do the setup and mapping.

So we have to expose the GSI to userspace not because userspace itself
uses it, but so userspace can turn around and pass it back into the
kernel?

It seems like it would be better for userspace to pass an identifier
of the PCI device itself back into the hypervisor.  Then the interface
could be generic and potentially work even on non-ACPI systems where
the GSI concept doesn't apply.

> For devices on PVH Dom0, Dom0 setups interrupts for devices as the
> baremetal Linux kernel does, through using acpi_pci_irq_enable->
> acpi_register_gsi-> __acpi_register_gsi->acpi_register_gsi_ioapic.

This case sounds like it's all inside Linux, so I assume there's no
problem with this one?  If you can call acpi_pci_irq_enable(), you
have the pci_dev, so I assume there's no need to expose the GSI in
sysfs?

Bjorn



Re: [RFC KERNEL PATCH v4 3/3] PCI/sysfs: Add gsi sysfs for pci_dev

2024-01-30 Thread Bjorn Helgaas
On Tue, Jan 30, 2024 at 10:07:36AM +0100, Roger Pau Monné wrote:
> On Mon, Jan 29, 2024 at 04:01:13PM -0600, Bjorn Helgaas wrote:
> > On Thu, Jan 25, 2024 at 07:17:24AM +, Chen, Jiqian wrote:
> > > On 2024/1/24 00:02, Bjorn Helgaas wrote:
> > > > On Tue, Jan 23, 2024 at 10:13:52AM +, Chen, Jiqian wrote:
> > > >> On 2024/1/23 07:37, Bjorn Helgaas wrote:
> > > >>> On Fri, Jan 05, 2024 at 02:22:17PM +0800, Jiqian Chen wrote:
> > > >>>> There is a need for some scenarios to use gsi sysfs.
> > > >>>> For example, when xen passthrough a device to dumU, it will
> > > >>>> use gsi to map pirq, but currently userspace can't get gsi
> > > >>>> number.
> > > >>>> So, add gsi sysfs for that and for other potential scenarios.
> > > >> ...
> > > > 
> > > >>> I don't know enough about Xen to know why it needs the GSI in
> > > >>> userspace.  Is this passthrough brand new functionality that can't be
> > > >>> done today because we don't expose the GSI yet?
> > 
> > I assume this must be new functionality, i.e., this kind of
> > passthrough does not work today, right?
> > 
> > > >> has ACPI support and is responsible for detecting and controlling
> > > >> the hardware, also it performs privileged operations such as the
> > > >> creation of normal (unprivileged) domains DomUs. When we give to a
> > > >> DomU direct access to a device, we need also to route the physical
> > > >> interrupts to the DomU. In order to do so Xen needs to setup and map
> > > >> the interrupts appropriately.
> > > > 
> > > > What kernel interfaces are used for this setup and mapping?
> > >
> > > For passthrough devices, the setup and mapping of routing physical
> > > interrupts to DomU are done on Xen hypervisor side, hypervisor only
> > > need userspace to provide the GSI info, see Xen code:
> > > xc_physdev_map_pirq require GSI and then will call hypercall to pass
> > > GSI into hypervisor and then hypervisor will do the mapping and
> > > routing, kernel doesn't do the setup and mapping.
> > 
> > So we have to expose the GSI to userspace not because userspace itself
> > uses it, but so userspace can turn around and pass it back into the
> > kernel?
> 
> No, the point is to pass it back to Xen, which doesn't know the
> mapping between GSIs and PCI devices because it can't execute the ACPI
> AML resource methods that provide such information.
> 
> The (Linux) kernel is just a proxy that forwards the hypercalls from
> user-space tools into Xen.

But I guess Xen knows how to interpret a GSI even though it doesn't
have access to AML?

> > It seems like it would be better for userspace to pass an identifier
> > of the PCI device itself back into the hypervisor.  Then the interface
> > could be generic and potentially work even on non-ACPI systems where
> > the GSI concept doesn't apply.
> 
> We would still need a way to pass the GSI to PCI device relation to
> the hypervisor, and then cache such data in the hypervisor.
> 
> I don't think we have any preference of where such information should
> be exposed, but given GSIs are an ACPI concept not specific to Xen
> they should be exposed by a non-Xen specific interface.

AFAIK Linux doesn't expose GSIs directly to userspace yet.  The GSI
concept relies on ACPI MADT, _MAT, _PRT, etc.  A GSI is associated
with some device (PCI in this case) and some interrupt controller
entry.  I don't understand how a GSI value is useful without knowing
something about that framework in which GSIs exist.

Obviously I know less than nothing about Xen, so I apologize for
asking all these stupid questions, but it just doesn't all make sense
to me yet.

Bjorn



Re: [RFC KERNEL PATCH v4 3/3] PCI/sysfs: Add gsi sysfs for pci_dev

2024-01-31 Thread Bjorn Helgaas
On Wed, Jan 31, 2024 at 09:58:19AM +0100, Roger Pau Monné wrote:
> On Tue, Jan 30, 2024 at 02:44:03PM -0600, Bjorn Helgaas wrote:
> > On Tue, Jan 30, 2024 at 10:07:36AM +0100, Roger Pau Monné wrote:
> > > On Mon, Jan 29, 2024 at 04:01:13PM -0600, Bjorn Helgaas wrote:
> > > > On Thu, Jan 25, 2024 at 07:17:24AM +, Chen, Jiqian wrote:
> > > > > On 2024/1/24 00:02, Bjorn Helgaas wrote:
> > > > > > On Tue, Jan 23, 2024 at 10:13:52AM +0000, Chen, Jiqian wrote:
> > > > > >> On 2024/1/23 07:37, Bjorn Helgaas wrote:
> > > > > >>> On Fri, Jan 05, 2024 at 02:22:17PM +0800, Jiqian Chen wrote:
> > > > > >>>> There is a need for some scenarios to use gsi sysfs.
> > > > > >>>> For example, when xen passthrough a device to dumU, it will
> > > > > >>>> use gsi to map pirq, but currently userspace can't get gsi
> > > > > >>>> number.
> > > > > >>>> So, add gsi sysfs for that and for other potential scenarios.
> > > > > >> ...
> > > > > > 
> > > > > >>> I don't know enough about Xen to know why it needs the GSI in
> > > > > >>> userspace.  Is this passthrough brand new functionality that 
> > > > > >>> can't be
> > > > > >>> done today because we don't expose the GSI yet?
> > > > 
> > > > I assume this must be new functionality, i.e., this kind of
> > > > passthrough does not work today, right?
> > > > 
> > > > > >> has ACPI support and is responsible for detecting and controlling
> > > > > >> the hardware, also it performs privileged operations such as the
> > > > > >> creation of normal (unprivileged) domains DomUs. When we give to a
> > > > > >> DomU direct access to a device, we need also to route the physical
> > > > > >> interrupts to the DomU. In order to do so Xen needs to setup and 
> > > > > >> map
> > > > > >> the interrupts appropriately.
> > > > > > 
> > > > > > What kernel interfaces are used for this setup and mapping?
> > > > >
> > > > > For passthrough devices, the setup and mapping of routing physical
> > > > > interrupts to DomU are done on Xen hypervisor side, hypervisor only
> > > > > need userspace to provide the GSI info, see Xen code:
> > > > > xc_physdev_map_pirq require GSI and then will call hypercall to pass
> > > > > GSI into hypervisor and then hypervisor will do the mapping and
> > > > > routing, kernel doesn't do the setup and mapping.
> > > > 
> > > > So we have to expose the GSI to userspace not because userspace itself
> > > > uses it, but so userspace can turn around and pass it back into the
> > > > kernel?
> > > 
> > > No, the point is to pass it back to Xen, which doesn't know the
> > > mapping between GSIs and PCI devices because it can't execute the ACPI
> > > AML resource methods that provide such information.
> > > 
> > > The (Linux) kernel is just a proxy that forwards the hypercalls from
> > > user-space tools into Xen.
> > 
> > But I guess Xen knows how to interpret a GSI even though it doesn't
> > have access to AML?
> 
> On x86 Xen does know how to map a GSI into an IO-APIC pin, in order
> configure the RTE as requested.

IIUC, mapping a GSI to an IO-APIC pin requires information from the
MADT.  So I guess Xen does use the static ACPI tables, but not the AML
_PRT methods that would connect a GSI with a PCI device?

I guess this means Xen would not be able to deal with _MAT methods,
which also contains MADT entries?  I don't know the implications of
this -- maybe it means Xen might not be able to use with hot-added
devices?

The tables (including DSDT and SSDTS that contain the AML) are exposed
to userspace via /sys/firmware/acpi/tables/, but of course that
doesn't mean Xen knows how to interpret the AML, and even if it did,
Xen probably wouldn't be able to *evaluate* it since that could
conflict with the host kernel's use of AML.

Bjorn



Re: [RFC KERNEL PATCH v4 3/3] PCI/sysfs: Add gsi sysfs for pci_dev

2024-02-09 Thread Bjorn Helgaas
On Thu, Feb 01, 2024 at 09:39:49AM +0100, Roger Pau Monné wrote:
> On Wed, Jan 31, 2024 at 01:00:14PM -0600, Bjorn Helgaas wrote:
> > On Wed, Jan 31, 2024 at 09:58:19AM +0100, Roger Pau Monné wrote:
> > > On Tue, Jan 30, 2024 at 02:44:03PM -0600, Bjorn Helgaas wrote:
> > > > On Tue, Jan 30, 2024 at 10:07:36AM +0100, Roger Pau Monné wrote:
> > > > > On Mon, Jan 29, 2024 at 04:01:13PM -0600, Bjorn Helgaas wrote:
> > > > > > On Thu, Jan 25, 2024 at 07:17:24AM +0000, Chen, Jiqian wrote:
> > > > > > > On 2024/1/24 00:02, Bjorn Helgaas wrote:
> > > > > > > > On Tue, Jan 23, 2024 at 10:13:52AM +, Chen, Jiqian wrote:
> > > > > > > >> On 2024/1/23 07:37, Bjorn Helgaas wrote:
> > > > > > > >>> On Fri, Jan 05, 2024 at 02:22:17PM +0800, Jiqian Chen wrote:
> > > > > > > >>>> There is a need for some scenarios to use gsi sysfs.
> > > > > > > >>>> For example, when xen passthrough a device to dumU, it will
> > > > > > > >>>> use gsi to map pirq, but currently userspace can't get gsi
> > > > > > > >>>> number.
> > > > > > > >>>> So, add gsi sysfs for that and for other potential scenarios.
> > > > > > > >> ...
> > > > > > > > 
> > > > > > > >>> I don't know enough about Xen to know why it needs the GSI in
> > > > > > > >>> userspace.  Is this passthrough brand new functionality that 
> > > > > > > >>> can't be
> > > > > > > >>> done today because we don't expose the GSI yet?
> > > > > > 
> > > > > > I assume this must be new functionality, i.e., this kind of
> > > > > > passthrough does not work today, right?
> > > > > > 
> > > > > > > >> has ACPI support and is responsible for detecting and 
> > > > > > > >> controlling
> > > > > > > >> the hardware, also it performs privileged operations such as 
> > > > > > > >> the
> > > > > > > >> creation of normal (unprivileged) domains DomUs. When we give 
> > > > > > > >> to a
> > > > > > > >> DomU direct access to a device, we need also to route the 
> > > > > > > >> physical
> > > > > > > >> interrupts to the DomU. In order to do so Xen needs to setup 
> > > > > > > >> and map
> > > > > > > >> the interrupts appropriately.
> > > > > > > > 
> > > > > > > > What kernel interfaces are used for this setup and mapping?
> > > > > > >
> > > > > > > For passthrough devices, the setup and mapping of routing physical
> > > > > > > interrupts to DomU are done on Xen hypervisor side, hypervisor 
> > > > > > > only
> > > > > > > need userspace to provide the GSI info, see Xen code:
> > > > > > > xc_physdev_map_pirq require GSI and then will call hypercall to 
> > > > > > > pass
> > > > > > > GSI into hypervisor and then hypervisor will do the mapping and
> > > > > > > routing, kernel doesn't do the setup and mapping.
> > > > > > 
> > > > > > So we have to expose the GSI to userspace not because userspace 
> > > > > > itself
> > > > > > uses it, but so userspace can turn around and pass it back into the
> > > > > > kernel?
> > > > > 
> > > > > No, the point is to pass it back to Xen, which doesn't know the
> > > > > mapping between GSIs and PCI devices because it can't execute the ACPI
> > > > > AML resource methods that provide such information.
> > > > > 
> > > > > The (Linux) kernel is just a proxy that forwards the hypercalls from
> > > > > user-space tools into Xen.
> > > > 
> > > > But I guess Xen knows how to interpret a GSI even though it doesn't
> > > > have access to AML?
> > > 
> > > On x86 Xen does know how to map a GSI into an IO-APIC pin, in order
> > > configure the RTE as requested.
> > 
> > IIUC, mapping a GSI to an IO-APIC pin requires information from the
> > MADT

Re: [RFC KERNEL PATCH v4 3/3] PCI/sysfs: Add gsi sysfs for pci_dev

2024-02-12 Thread Bjorn Helgaas
On Mon, Feb 12, 2024 at 10:13:28AM +0100, Roger Pau Monné wrote:
> On Fri, Feb 09, 2024 at 03:05:49PM -0600, Bjorn Helgaas wrote:
> > On Thu, Feb 01, 2024 at 09:39:49AM +0100, Roger Pau Monné wrote:
> > > On Wed, Jan 31, 2024 at 01:00:14PM -0600, Bjorn Helgaas wrote:
> > > > On Wed, Jan 31, 2024 at 09:58:19AM +0100, Roger Pau Monné wrote:
> > > > > On Tue, Jan 30, 2024 at 02:44:03PM -0600, Bjorn Helgaas wrote:
> > > > > > On Tue, Jan 30, 2024 at 10:07:36AM +0100, Roger Pau Monné wrote:
> > > > > > > On Mon, Jan 29, 2024 at 04:01:13PM -0600, Bjorn Helgaas wrote:
> > > > > > > > On Thu, Jan 25, 2024 at 07:17:24AM +, Chen, Jiqian wrote:
> > > > > > > > > On 2024/1/24 00:02, Bjorn Helgaas wrote:
> > > > > > > > > > On Tue, Jan 23, 2024 at 10:13:52AM +, Chen, Jiqian 
> > > > > > > > > > wrote:
> > > > > > > > > >> On 2024/1/23 07:37, Bjorn Helgaas wrote:
> > > > > > > > > >>> On Fri, Jan 05, 2024 at 02:22:17PM +0800, Jiqian Chen 
> > > > > > > > > >>> wrote:
> > > > > > > > > >>>> There is a need for some scenarios to use gsi sysfs.
> > > > > > > > > >>>> For example, when xen passthrough a device to dumU, it 
> > > > > > > > > >>>> will
> > > > > > > > > >>>> use gsi to map pirq, but currently userspace can't get 
> > > > > > > > > >>>> gsi
> > > > > > > > > >>>> number.
> > > > > > > > > >>>> So, add gsi sysfs for that and for other potential 
> > > > > > > > > >>>> scenarios.
> > > > > > > > > >> ...
> > > > > > > > > > 
> > > > > > > > > >>> I don't know enough about Xen to know why it needs the 
> > > > > > > > > >>> GSI in
> > > > > > > > > >>> userspace.  Is this passthrough brand new functionality 
> > > > > > > > > >>> that can't be
> > > > > > > > > >>> done today because we don't expose the GSI yet?
> > > > > > > > 
> > > > > > > > I assume this must be new functionality, i.e., this kind of
> > > > > > > > passthrough does not work today, right?
> > > > > > > > 
> > > > > > > > > >> has ACPI support and is responsible for detecting and 
> > > > > > > > > >> controlling
> > > > > > > > > >> the hardware, also it performs privileged operations such 
> > > > > > > > > >> as the
> > > > > > > > > >> creation of normal (unprivileged) domains DomUs. When we 
> > > > > > > > > >> give to a
> > > > > > > > > >> DomU direct access to a device, we need also to route the 
> > > > > > > > > >> physical
> > > > > > > > > >> interrupts to the DomU. In order to do so Xen needs to 
> > > > > > > > > >> setup and map
> > > > > > > > > >> the interrupts appropriately.
> > > > > > > > > > 
> > > > > > > > > > What kernel interfaces are used for this setup and mapping?
> > > > > > > > >
> > > > > > > > > For passthrough devices, the setup and mapping of routing 
> > > > > > > > > physical
> > > > > > > > > interrupts to DomU are done on Xen hypervisor side, 
> > > > > > > > > hypervisor only
> > > > > > > > > need userspace to provide the GSI info, see Xen code:
> > > > > > > > > xc_physdev_map_pirq require GSI and then will call hypercall 
> > > > > > > > > to pass
> > > > > > > > > GSI into hypervisor and then hypervisor will do the mapping 
> > > > > > > > > and
> > > > > > > > > routing, kernel doesn't do the setup and mapping.
> > > > > > > > 
> > > > > > > > So we have to expose the GSI t

Re: [RFC KERNEL PATCH v4 3/3] PCI/sysfs: Add gsi sysfs for pci_dev

2024-01-22 Thread Bjorn Helgaas
On Fri, Jan 05, 2024 at 02:22:17PM +0800, Jiqian Chen wrote:
> There is a need for some scenarios to use gsi sysfs.
> For example, when xen passthrough a device to dumU, it will
> use gsi to map pirq, but currently userspace can't get gsi
> number.
> So, add gsi sysfs for that and for other potential scenarios.

Isn't GSI really an ACPI-specific concept?

I don't know enough about Xen to know why it needs the GSI in
userspace.  Is this passthrough brand new functionality that can't be
done today because we don't expose the GSI yet?

How does userspace use the GSI?  I see "to map pirq", but I think we
should have more concrete details about exactly what is needed and how
it is used before adding something new in sysfs.

Is there some more generic kernel interface we could use
for this?

s/dumU/DomU/ ?  (I dunno, but https://www.google.com/search?q=xen+dumu
suggests it :))

> Co-developed-by: Huang Rui 
> Signed-off-by: Jiqian Chen 
> ---
>  drivers/acpi/pci_irq.c  |  1 +
>  drivers/pci/pci-sysfs.c | 11 +++
>  include/linux/pci.h |  2 ++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
> index 630fe0a34bc6..739a58755df2 100644
> --- a/drivers/acpi/pci_irq.c
> +++ b/drivers/acpi/pci_irq.c
> @@ -449,6 +449,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
>   kfree(entry);
>   return 0;
>   }
> + dev->gsi = gsi;
>  
>   rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
>   if (rc < 0) {
> diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
> index 2321fdfefd7d..c51df88d079e 100644
> --- a/drivers/pci/pci-sysfs.c
> +++ b/drivers/pci/pci-sysfs.c
> @@ -71,6 +71,16 @@ static ssize_t irq_show(struct device *dev,
>  }
>  static DEVICE_ATTR_RO(irq);
>  
> +static ssize_t gsi_show(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct pci_dev *pdev = to_pci_dev(dev);
> +
> + return sysfs_emit(buf, "%u\n", pdev->gsi);
> +}
> +static DEVICE_ATTR_RO(gsi);
> +
>  static ssize_t broken_parity_status_show(struct device *dev,
>struct device_attribute *attr,
>char *buf)
> @@ -596,6 +606,7 @@ static struct attribute *pci_dev_attrs[] = {
>   &dev_attr_revision.attr,
>   &dev_attr_class.attr,
>   &dev_attr_irq.attr,
> + &dev_attr_gsi.attr,
>   &dev_attr_local_cpus.attr,
>   &dev_attr_local_cpulist.attr,
>   &dev_attr_modalias.attr,
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index dea043bc1e38..0618d4a87a50 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -529,6 +529,8 @@ struct pci_dev {
>  
>   /* These methods index pci_reset_fn_methods[] */
>   u8 reset_methods[PCI_NUM_RESET_METHODS]; /* In priority order */
> +
> + unsigned intgsi;
>  };
>  
>  static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
> -- 
> 2.34.1
> 
> 



Re: [RFC KERNEL PATCH v4 3/3] PCI/sysfs: Add gsi sysfs for pci_dev

2024-01-23 Thread Bjorn Helgaas
On Tue, Jan 23, 2024 at 10:13:52AM +, Chen, Jiqian wrote:
> On 2024/1/23 07:37, Bjorn Helgaas wrote:
> > On Fri, Jan 05, 2024 at 02:22:17PM +0800, Jiqian Chen wrote:
> >> There is a need for some scenarios to use gsi sysfs.
> >> For example, when xen passthrough a device to dumU, it will
> >> use gsi to map pirq, but currently userspace can't get gsi
> >> number.
> >> So, add gsi sysfs for that and for other potential scenarios.
> ...

> > I don't know enough about Xen to know why it needs the GSI in
> > userspace.  Is this passthrough brand new functionality that can't be
> > done today because we don't expose the GSI yet?
>
> In Xen architecture, there is a privileged domain named Dom0 that
> has ACPI support and is responsible for detecting and controlling
> the hardware, also it performs privileged operations such as the
> creation of normal (unprivileged) domains DomUs. When we give to a
> DomU direct access to a device, we need also to route the physical
> interrupts to the DomU. In order to do so Xen needs to setup and map
> the interrupts appropriately.

What kernel interfaces are used for this setup and mapping?