Re: [RFC PATCH] irq/affinity: Mark the pre/post vectors as regular interrupts

2018-09-20 Thread Dou Liyang

Hi Kashyap,

On 2018/9/20 16:39, Kashyap Desai worte:

Dou -  Do you want me to test your patch or shall I wait for next version
?


I'm sorry, please wait for the next version.

Thanks,
dou



Re: [RFC PATCH] irq/affinity: Mark the pre/post vectors as regular interrupts

2018-09-20 Thread Dou Liyang

Hi Kashyap,

On 2018/9/20 16:39, Kashyap Desai worte:

Dou -  Do you want me to test your patch or shall I wait for next version
?


I'm sorry, please wait for the next version.

Thanks,
dou



RE: [RFC PATCH] irq/affinity: Mark the pre/post vectors as regular interrupts

2018-09-20 Thread Kashyap Desai
> This is the wrong direction as it does not allow to do initial affinity
> assignement for the non-managed interrupts on allocation time. And
that's
> what Kashyap and Sumit are looking for.
>
> The trivial fix for the possible breakage when irq_default_affinity !=
> cpu_possible_mask is to set the affinity for the pre/post vectors to
> cpu_possible_mask and be done with it.
>
> One other thing I noticed while staring at that is the fact, that the
PCI
> code does not care about the return value of irq_create_affinity_masks()
at
> all. It just proceeds if masks is NULL as if the stuff was requested
with
> the affinity descriptor pointer being NULL. I don't think that's a
> brilliant idea because the drivers might rely on the interrupt being
> managed, but it might be a non-issue and just result in bad locality.
>
> Christoph?
>
> So back to the problem at hand. We need to change the affinity
management
> scheme in a way which lets us differentiate between managed and
> unmanaged
> interrupts, so it allows to automatically assign (initial) affinity to
all
> of them.
>
> Right now everything is bound to the cpumasks array, which does not
allow
> to convey more information. So we need to come up with something
> different.
>
> Something like the below (does not compile and is just for reference)
> should do the trick. I'm not sure whether we want to convey the
information
> (masks and bitmap) in a single data structure or not, but that's an
> implementation detail.


Dou -  Do you want me to test your patch or shall I wait for next version
?

>
> Thanks,
>
>   tglx
>
> 8<-
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -535,15 +535,16 @@ static struct msi_desc *
>  msi_setup_entry(struct pci_dev *dev, int nvec, const struct
irq_affinity *affd)
>  {
>   struct cpumask *masks = NULL;
> + unsigned long managed = 0;
>   struct msi_desc *entry;
>   u16 control;
>
>   if (affd)
> - masks = irq_create_affinity_masks(nvec, affd);
> + masks = irq_create_affinity_masks(nvec, affd, );
>
>
>   /* MSI Entry Initialization */
> - entry = alloc_msi_entry(>dev, nvec, masks);
> + entry = alloc_msi_entry(>dev, nvec, masks, managed);
>   if (!entry)
>   goto out;
>
> @@ -672,15 +673,22 @@ static int msix_setup_entries(struct pci
> struct msix_entry *entries, int nvec,
> const struct irq_affinity *affd)
>  {
> + /*
> +  * MSIX_MAX_VECTORS = 2048, i.e. 256 bytes. Might need runtime
> +  * allocation. OTOH, are 2048 vectors realistic?
> +  */
> + DECLARE_BITMAP(managed, MSIX_MAX_VECTORS);
>   struct cpumask *curmsk, *masks = NULL;
>   struct msi_desc *entry;
>   int ret, i;
>
>   if (affd)
> - masks = irq_create_affinity_masks(nvec, affd);
> + masks = irq_create_affinity_masks(nvec, affd, managed);
>
>   for (i = 0, curmsk = masks; i < nvec; i++) {
> - entry = alloc_msi_entry(>dev, 1, curmsk);
> + unsigned long m = test_bit(i, managed) ? 1 : 0;
> +
> + entry = alloc_msi_entry(>dev, 1, curmsk, m);
>   if (!entry) {
>   if (!i)
>   iounmap(base);
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -27,7 +27,8 @@
>   * and the affinity masks from @affinity are copied.
>   */
>  struct msi_desc *
> -alloc_msi_entry(struct device *dev, int nvec, const struct cpumask
*affinity)
> +alloc_msi_entry(struct device *dev, int nvec, const struct cpumask
*affinity,
> + unsigned long managed)
>  {
>   struct msi_desc *desc;
>
> @@ -38,6 +39,7 @@ alloc_msi_entry(struct device *dev, int
>   INIT_LIST_HEAD(>list);
>   desc->dev = dev;
>   desc->nvec_used = nvec;
> + desc->managed = managed;
>   if (affinity) {
>   desc->affinity = kmemdup(affinity,
>   nvec * sizeof(*desc->affinity), GFP_KERNEL);
> @@ -416,7 +418,7 @@ int msi_domain_alloc_irqs(struct irq_dom
>
>   virq = __irq_domain_alloc_irqs(domain, -1,
desc->nvec_used,
>  dev_to_node(dev), ,
false,
> -desc->affinity);
> +desc->affinity,
desc->managed);
>   if (virq < 0) {
>   ret = -ENOSPC;
>   if (ops->handle_error)


RE: [RFC PATCH] irq/affinity: Mark the pre/post vectors as regular interrupts

2018-09-20 Thread Kashyap Desai
> This is the wrong direction as it does not allow to do initial affinity
> assignement for the non-managed interrupts on allocation time. And
that's
> what Kashyap and Sumit are looking for.
>
> The trivial fix for the possible breakage when irq_default_affinity !=
> cpu_possible_mask is to set the affinity for the pre/post vectors to
> cpu_possible_mask and be done with it.
>
> One other thing I noticed while staring at that is the fact, that the
PCI
> code does not care about the return value of irq_create_affinity_masks()
at
> all. It just proceeds if masks is NULL as if the stuff was requested
with
> the affinity descriptor pointer being NULL. I don't think that's a
> brilliant idea because the drivers might rely on the interrupt being
> managed, but it might be a non-issue and just result in bad locality.
>
> Christoph?
>
> So back to the problem at hand. We need to change the affinity
management
> scheme in a way which lets us differentiate between managed and
> unmanaged
> interrupts, so it allows to automatically assign (initial) affinity to
all
> of them.
>
> Right now everything is bound to the cpumasks array, which does not
allow
> to convey more information. So we need to come up with something
> different.
>
> Something like the below (does not compile and is just for reference)
> should do the trick. I'm not sure whether we want to convey the
information
> (masks and bitmap) in a single data structure or not, but that's an
> implementation detail.


Dou -  Do you want me to test your patch or shall I wait for next version
?

>
> Thanks,
>
>   tglx
>
> 8<-
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -535,15 +535,16 @@ static struct msi_desc *
>  msi_setup_entry(struct pci_dev *dev, int nvec, const struct
irq_affinity *affd)
>  {
>   struct cpumask *masks = NULL;
> + unsigned long managed = 0;
>   struct msi_desc *entry;
>   u16 control;
>
>   if (affd)
> - masks = irq_create_affinity_masks(nvec, affd);
> + masks = irq_create_affinity_masks(nvec, affd, );
>
>
>   /* MSI Entry Initialization */
> - entry = alloc_msi_entry(>dev, nvec, masks);
> + entry = alloc_msi_entry(>dev, nvec, masks, managed);
>   if (!entry)
>   goto out;
>
> @@ -672,15 +673,22 @@ static int msix_setup_entries(struct pci
> struct msix_entry *entries, int nvec,
> const struct irq_affinity *affd)
>  {
> + /*
> +  * MSIX_MAX_VECTORS = 2048, i.e. 256 bytes. Might need runtime
> +  * allocation. OTOH, are 2048 vectors realistic?
> +  */
> + DECLARE_BITMAP(managed, MSIX_MAX_VECTORS);
>   struct cpumask *curmsk, *masks = NULL;
>   struct msi_desc *entry;
>   int ret, i;
>
>   if (affd)
> - masks = irq_create_affinity_masks(nvec, affd);
> + masks = irq_create_affinity_masks(nvec, affd, managed);
>
>   for (i = 0, curmsk = masks; i < nvec; i++) {
> - entry = alloc_msi_entry(>dev, 1, curmsk);
> + unsigned long m = test_bit(i, managed) ? 1 : 0;
> +
> + entry = alloc_msi_entry(>dev, 1, curmsk, m);
>   if (!entry) {
>   if (!i)
>   iounmap(base);
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -27,7 +27,8 @@
>   * and the affinity masks from @affinity are copied.
>   */
>  struct msi_desc *
> -alloc_msi_entry(struct device *dev, int nvec, const struct cpumask
*affinity)
> +alloc_msi_entry(struct device *dev, int nvec, const struct cpumask
*affinity,
> + unsigned long managed)
>  {
>   struct msi_desc *desc;
>
> @@ -38,6 +39,7 @@ alloc_msi_entry(struct device *dev, int
>   INIT_LIST_HEAD(>list);
>   desc->dev = dev;
>   desc->nvec_used = nvec;
> + desc->managed = managed;
>   if (affinity) {
>   desc->affinity = kmemdup(affinity,
>   nvec * sizeof(*desc->affinity), GFP_KERNEL);
> @@ -416,7 +418,7 @@ int msi_domain_alloc_irqs(struct irq_dom
>
>   virq = __irq_domain_alloc_irqs(domain, -1,
desc->nvec_used,
>  dev_to_node(dev), ,
false,
> -desc->affinity);
> +desc->affinity,
desc->managed);
>   if (virq < 0) {
>   ret = -ENOSPC;
>   if (ops->handle_error)


Re: [RFC PATCH] irq/affinity: Mark the pre/post vectors as regular interrupts

2018-09-13 Thread Thomas Gleixner
On Thu, 13 Sep 2018, Dou Liyang wrote:
> So, clear these affinity mask and check it in alloc_desc() to leave them
> as regular interrupts which can be affinity controlled and also can move
> freely on hotplug.

This is the wrong direction as it does not allow to do initial affinity
assignement for the non-managed interrupts on allocation time. And that's
what Kashyap and Sumit are looking for.

The trivial fix for the possible breakage when irq_default_affinity !=
cpu_possible_mask is to set the affinity for the pre/post vectors to
cpu_possible_mask and be done with it.

One other thing I noticed while staring at that is the fact, that the PCI
code does not care about the return value of irq_create_affinity_masks() at
all. It just proceeds if masks is NULL as if the stuff was requested with
the affinity descriptor pointer being NULL. I don't think that's a
brilliant idea because the drivers might rely on the interrupt being
managed, but it might be a non-issue and just result in bad locality.

Christoph?

So back to the problem at hand. We need to change the affinity management
scheme in a way which lets us differentiate between managed and unmanaged
interrupts, so it allows to automatically assign (initial) affinity to all
of them.

Right now everything is bound to the cpumasks array, which does not allow
to convey more information. So we need to come up with something different.

Something like the below (does not compile and is just for reference)
should do the trick. I'm not sure whether we want to convey the information
(masks and bitmap) in a single data structure or not, but that's an
implementation detail.

Thanks,

tglx

8<-
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -535,15 +535,16 @@ static struct msi_desc *
 msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd)
 {
struct cpumask *masks = NULL;
+   unsigned long managed = 0;
struct msi_desc *entry;
u16 control;
 
if (affd)
-   masks = irq_create_affinity_masks(nvec, affd);
+   masks = irq_create_affinity_masks(nvec, affd, );
 
 
/* MSI Entry Initialization */
-   entry = alloc_msi_entry(>dev, nvec, masks);
+   entry = alloc_msi_entry(>dev, nvec, masks, managed);
if (!entry)
goto out;
 
@@ -672,15 +673,22 @@ static int msix_setup_entries(struct pci
  struct msix_entry *entries, int nvec,
  const struct irq_affinity *affd)
 {
+   /*
+* MSIX_MAX_VECTORS = 2048, i.e. 256 bytes. Might need runtime
+* allocation. OTOH, are 2048 vectors realistic?
+*/
+   DECLARE_BITMAP(managed, MSIX_MAX_VECTORS);
struct cpumask *curmsk, *masks = NULL;
struct msi_desc *entry;
int ret, i;
 
if (affd)
-   masks = irq_create_affinity_masks(nvec, affd);
+   masks = irq_create_affinity_masks(nvec, affd, managed);
 
for (i = 0, curmsk = masks; i < nvec; i++) {
-   entry = alloc_msi_entry(>dev, 1, curmsk);
+   unsigned long m = test_bit(i, managed) ? 1 : 0;
+
+   entry = alloc_msi_entry(>dev, 1, curmsk, m);
if (!entry) {
if (!i)
iounmap(base);
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -27,7 +27,8 @@
  * and the affinity masks from @affinity are copied.
  */
 struct msi_desc *
-alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity)
+alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity,
+   unsigned long managed)
 {
struct msi_desc *desc;
 
@@ -38,6 +39,7 @@ alloc_msi_entry(struct device *dev, int
INIT_LIST_HEAD(>list);
desc->dev = dev;
desc->nvec_used = nvec;
+   desc->managed = managed;
if (affinity) {
desc->affinity = kmemdup(affinity,
nvec * sizeof(*desc->affinity), GFP_KERNEL);
@@ -416,7 +418,7 @@ int msi_domain_alloc_irqs(struct irq_dom
 
virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
   dev_to_node(dev), , false,
-  desc->affinity);
+  desc->affinity, desc->managed);
if (virq < 0) {
ret = -ENOSPC;
if (ops->handle_error)


Re: [RFC PATCH] irq/affinity: Mark the pre/post vectors as regular interrupts

2018-09-13 Thread Thomas Gleixner
On Thu, 13 Sep 2018, Dou Liyang wrote:
> So, clear these affinity mask and check it in alloc_desc() to leave them
> as regular interrupts which can be affinity controlled and also can move
> freely on hotplug.

This is the wrong direction as it does not allow to do initial affinity
assignement for the non-managed interrupts on allocation time. And that's
what Kashyap and Sumit are looking for.

The trivial fix for the possible breakage when irq_default_affinity !=
cpu_possible_mask is to set the affinity for the pre/post vectors to
cpu_possible_mask and be done with it.

One other thing I noticed while staring at that is the fact, that the PCI
code does not care about the return value of irq_create_affinity_masks() at
all. It just proceeds if masks is NULL as if the stuff was requested with
the affinity descriptor pointer being NULL. I don't think that's a
brilliant idea because the drivers might rely on the interrupt being
managed, but it might be a non-issue and just result in bad locality.

Christoph?

So back to the problem at hand. We need to change the affinity management
scheme in a way which lets us differentiate between managed and unmanaged
interrupts, so it allows to automatically assign (initial) affinity to all
of them.

Right now everything is bound to the cpumasks array, which does not allow
to convey more information. So we need to come up with something different.

Something like the below (does not compile and is just for reference)
should do the trick. I'm not sure whether we want to convey the information
(masks and bitmap) in a single data structure or not, but that's an
implementation detail.

Thanks,

tglx

8<-
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -535,15 +535,16 @@ static struct msi_desc *
 msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd)
 {
struct cpumask *masks = NULL;
+   unsigned long managed = 0;
struct msi_desc *entry;
u16 control;
 
if (affd)
-   masks = irq_create_affinity_masks(nvec, affd);
+   masks = irq_create_affinity_masks(nvec, affd, );
 
 
/* MSI Entry Initialization */
-   entry = alloc_msi_entry(>dev, nvec, masks);
+   entry = alloc_msi_entry(>dev, nvec, masks, managed);
if (!entry)
goto out;
 
@@ -672,15 +673,22 @@ static int msix_setup_entries(struct pci
  struct msix_entry *entries, int nvec,
  const struct irq_affinity *affd)
 {
+   /*
+* MSIX_MAX_VECTORS = 2048, i.e. 256 bytes. Might need runtime
+* allocation. OTOH, are 2048 vectors realistic?
+*/
+   DECLARE_BITMAP(managed, MSIX_MAX_VECTORS);
struct cpumask *curmsk, *masks = NULL;
struct msi_desc *entry;
int ret, i;
 
if (affd)
-   masks = irq_create_affinity_masks(nvec, affd);
+   masks = irq_create_affinity_masks(nvec, affd, managed);
 
for (i = 0, curmsk = masks; i < nvec; i++) {
-   entry = alloc_msi_entry(>dev, 1, curmsk);
+   unsigned long m = test_bit(i, managed) ? 1 : 0;
+
+   entry = alloc_msi_entry(>dev, 1, curmsk, m);
if (!entry) {
if (!i)
iounmap(base);
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -27,7 +27,8 @@
  * and the affinity masks from @affinity are copied.
  */
 struct msi_desc *
-alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity)
+alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity,
+   unsigned long managed)
 {
struct msi_desc *desc;
 
@@ -38,6 +39,7 @@ alloc_msi_entry(struct device *dev, int
INIT_LIST_HEAD(>list);
desc->dev = dev;
desc->nvec_used = nvec;
+   desc->managed = managed;
if (affinity) {
desc->affinity = kmemdup(affinity,
nvec * sizeof(*desc->affinity), GFP_KERNEL);
@@ -416,7 +418,7 @@ int msi_domain_alloc_irqs(struct irq_dom
 
virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
   dev_to_node(dev), , false,
-  desc->affinity);
+  desc->affinity, desc->managed);
if (virq < 0) {
ret = -ENOSPC;
if (ops->handle_error)