Re: [RFC PATCH v4 4/7] PCI: Modify resource_alignment to support multiple devices

2016-03-20 Thread Alex Williamson
On Thu, 17 Mar 2016 19:28:34 +0800
Yongji Xie  wrote:

> On 2016/3/17 0:30, Alex Williamson wrote:
> > On Mon,  7 Mar 2016 15:48:35 +0800
> > Yongji Xie  wrote:
> >  
> >> When vfio passthrough a PCI device of which MMIO BARs
> >> are smaller than PAGE_SIZE, guest will not handle the
> >> mmio accesses to the BARs which leads to mmio emulations
> >> in host.
> >>
> >> This is because vfio will not allow to passthrough one
> >> BAR's mmio page which may be shared with other BARs.
> >>
> >> To solve this performance issue, this patch modifies
> >> resource_alignment to support syntax where multiple
> >> devices get the same alignment. So we can use something
> >> like "pci=resource_alignment=*:*:*.*:noresize" to
> >> enforce the alignment of all MMIO BARs to be at least
> >> PAGE_SIZE so that one BAR's mmio page would not be
> >> shared with other BARs.
> >>
> >> Signed-off-by: Yongji Xie 
> >> ---
> >>   Documentation/kernel-parameters.txt |2 +
> >>   drivers/pci/pci.c   |   90 
> >> ++-
> >>   include/linux/pci.h |4 ++
> >>   3 files changed, 85 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/Documentation/kernel-parameters.txt 
> >> b/Documentation/kernel-parameters.txt
> >> index 8028631..74b38ab 100644
> >> --- a/Documentation/kernel-parameters.txt
> >> +++ b/Documentation/kernel-parameters.txt
> >> @@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be 
> >> entirely omitted.
> >>aligned memory resources.
> >>If  is not specified,
> >>PAGE_SIZE is used as alignment.
> >> +  , ,  and  can be set to
> >> +  "*" which means match all values.  
> > I don't see anywhere that you're automatically enabling this for your
> > platform, so presumably you're expecting users to determine on their
> > own that they have a performance problem and hoping that they'll figure
> > out that they need to use this option to resolve it.  The first irate
> > question you'll get back is why doesn't this happen automatically?  
> 
> Actually, I just want to make the code simple. Maybe it is
> not a good idea to let users enable this option manually.
> I will try to fix it like that:

That's not entirely what I meant, I think having a way for a user to
enable it is a good thing, but maybe there need to be cases where it's
enabled automatically.  With 4k pages, this is often not an issue since
the PCI spec recommends 4k or 8k alignment for resources, but that
doesn't preclude an unusual device where a user might want to enable it
anyway.  At 64k page size, problems become more common, so we need to
think about either enabling it automatically or somehow making it more
apparent to the user that the option is available for this purpose.

> 
> diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
> index 6f8065a..6659752 100644
> --- a/arch/powerpc/include/asm/pci.h
> +++ b/arch/powerpc/include/asm/pci.h
> @@ -30,6 +30,8 @@
>   #define PCIBIOS_MIN_IO 0x1000
>   #define PCIBIOS_MIN_MEM0x1000
> 
> +#define PCIBIOS_MIN_ALIGNMENT  PAGE_SIZE
> +
>   struct pci_dev;
> 
>   /* Values for the `which' argument to sys_pciconfig_iobase syscall.  */
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index dadd28a..9f644e4 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -4593,6 +4593,8 @@ EXPORT_SYMBOL_GPL(pci_ignore_hotplug);
>   static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
>   static DEFINE_SPINLOCK(resource_alignment_lock);
> 
> +#define DISABLE_ARCH_ALIGNMENT -1
> +#define DEFAULT_ALIGNMENT  -2
>   /**
>* pci_specified_resource_alignment - get resource alignment specified 
> by user.
>* @dev: the PCI device to get
> @@ -4609,6 +4611,9 @@ static resource_size_t 
> pci_specified_resource_alignment(struct pci_dev *dev,
>  char *p;
>  bool invalid = false;
> 
> +#ifdef PCIBIOS_MIN_ALIGNMENT
> +   align = PCIBIOS_MIN_ALIGNMENT;
> +#endif
>  spin_lock(&resource_alignment_lock);
>  p = resource_alignment_param;
>  while (*p) {
> @@ -4617,7 +4622,7 @@ static resource_size_t 
> pci_specified_resource_alignment(struct pci_dev *dev,
>  p[count] == '@') {
>  p += count + 1;
>  } else {
> -   align_order = -1;
> +   align_order = DEFAULT_ALIGNMENT;
>  }
>  if (p[0] == '*' && p[1] == ':') {
>  seg = -1;
> @@ -4673,8 +4678,10 @@ static resource_size_t 
> pci_specified_resource_alignment(struct pci_dev *dev,
>  (bus == dev->bus->number || bus == -1) &&
>  (slot == PCI_SLOT(dev->devfn) || slot == -1) &&
>  (func == PCI

Re: [RFC PATCH v4 4/7] PCI: Modify resource_alignment to support multiple devices

2016-03-19 Thread Yongji Xie

On 2016/3/17 20:40, Alex Williamson wrote:

On Thu, 17 Mar 2016 19:28:34 +0800
Yongji Xie  wrote:


On 2016/3/17 0:30, Alex Williamson wrote:

On Mon,  7 Mar 2016 15:48:35 +0800
Yongji Xie  wrote:
  

When vfio passthrough a PCI device of which MMIO BARs
are smaller than PAGE_SIZE, guest will not handle the
mmio accesses to the BARs which leads to mmio emulations
in host.

This is because vfio will not allow to passthrough one
BAR's mmio page which may be shared with other BARs.

To solve this performance issue, this patch modifies
resource_alignment to support syntax where multiple
devices get the same alignment. So we can use something
like "pci=resource_alignment=*:*:*.*:noresize" to
enforce the alignment of all MMIO BARs to be at least
PAGE_SIZE so that one BAR's mmio page would not be
shared with other BARs.

Signed-off-by: Yongji Xie 
---
   Documentation/kernel-parameters.txt |2 +
   drivers/pci/pci.c   |   90 
++-
   include/linux/pci.h |4 ++
   3 files changed, 85 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 8028631..74b38ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
+   , ,  and  can be set to
+   "*" which means match all values.

I don't see anywhere that you're automatically enabling this for your
platform, so presumably you're expecting users to determine on their
own that they have a performance problem and hoping that they'll figure
out that they need to use this option to resolve it.  The first irate
question you'll get back is why doesn't this happen automatically?

Actually, I just want to make the code simple. Maybe it is
not a good idea to let users enable this option manually.
I will try to fix it like that:

That's not entirely what I meant, I think having a way for a user to
enable it is a good thing, but maybe there need to be cases where it's
enabled automatically.  With 4k pages, this is often not an issue since
the PCI spec recommends 4k or 8k alignment for resources, but that
doesn't preclude an unusual device where a user might want to enable it
anyway.  At 64k page size, problems become more common, so we need to
think about either enabling it automatically or somehow making it more
apparent to the user that the option is available for this purpose.


OK. I see. I will add a new arch-independent macro to indicate
the min alignments of all PCI BARs. And we can also specify the
alignments through the resource_alignment.


diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6f8065a..6659752 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -30,6 +30,8 @@
   #define PCIBIOS_MIN_IO 0x1000
   #define PCIBIOS_MIN_MEM0x1000

+#define PCIBIOS_MIN_ALIGNMENT  PAGE_SIZE
+
   struct pci_dev;

   /* Values for the `which' argument to sys_pciconfig_iobase syscall.  */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index dadd28a..9f644e4 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4593,6 +4593,8 @@ EXPORT_SYMBOL_GPL(pci_ignore_hotplug);
   static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
   static DEFINE_SPINLOCK(resource_alignment_lock);

+#define DISABLE_ARCH_ALIGNMENT -1
+#define DEFAULT_ALIGNMENT  -2
   /**
* pci_specified_resource_alignment - get resource alignment specified
by user.
* @dev: the PCI device to get
@@ -4609,6 +4611,9 @@ static resource_size_t
pci_specified_resource_alignment(struct pci_dev *dev,
  char *p;
  bool invalid = false;

+#ifdef PCIBIOS_MIN_ALIGNMENT
+   align = PCIBIOS_MIN_ALIGNMENT;
+#endif
  spin_lock(&resource_alignment_lock);
  p = resource_alignment_param;
  while (*p) {
@@ -4617,7 +4622,7 @@ static resource_size_t
pci_specified_resource_alignment(struct pci_dev *dev,
  p[count] == '@') {
  p += count + 1;
  } else {
-   align_order = -1;
+   align_order = DEFAULT_ALIGNMENT;
  }
  if (p[0] == '*' && p[1] == ':') {
  seg = -1;
@@ -4673,8 +4678,10 @@ static resource_size_t
pci_specified_resource_alignment(struct pci_dev *dev,
  (bus == dev->bus->number || bus == -1) &&
  (slot == PCI_SLOT(dev->devfn) || slot == -1) &&
  (func == PCI_FUNC(dev->devfn) || func == -1)) {
-   

Re: [RFC PATCH v4 4/7] PCI: Modify resource_alignment to support multiple devices

2016-03-19 Thread Alex Williamson
On Mon,  7 Mar 2016 15:48:35 +0800
Yongji Xie  wrote:

> When vfio passthrough a PCI device of which MMIO BARs
> are smaller than PAGE_SIZE, guest will not handle the
> mmio accesses to the BARs which leads to mmio emulations
> in host.
> 
> This is because vfio will not allow to passthrough one
> BAR's mmio page which may be shared with other BARs.
> 
> To solve this performance issue, this patch modifies
> resource_alignment to support syntax where multiple
> devices get the same alignment. So we can use something
> like "pci=resource_alignment=*:*:*.*:noresize" to
> enforce the alignment of all MMIO BARs to be at least
> PAGE_SIZE so that one BAR's mmio page would not be
> shared with other BARs.
> 
> Signed-off-by: Yongji Xie 
> ---
>  Documentation/kernel-parameters.txt |2 +
>  drivers/pci/pci.c   |   90 
> ++-
>  include/linux/pci.h |4 ++
>  3 files changed, 85 insertions(+), 11 deletions(-)
> 
> diff --git a/Documentation/kernel-parameters.txt 
> b/Documentation/kernel-parameters.txt
> index 8028631..74b38ab 100644
> --- a/Documentation/kernel-parameters.txt
> +++ b/Documentation/kernel-parameters.txt
> @@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be 
> entirely omitted.
>   aligned memory resources.
>   If  is not specified,
>   PAGE_SIZE is used as alignment.
> + , ,  and  can be set to
> + "*" which means match all values.

I don't see anywhere that you're automatically enabling this for your
platform, so presumably you're expecting users to determine on their
own that they have a performance problem and hoping that they'll figure
out that they need to use this option to resolve it.  The first irate
question you'll get back is why doesn't this happen automatically?

>   PCI-PCI bridge can be specified, if resource
>   windows need to be expanded.
>   noresize: Don't change the resources' sizes when
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 760cce5..44ab59f 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -102,6 +102,8 @@ unsigned int pcibios_max_latency = 255;
>  /* If set, the PCIe ARI capability will not be used. */
>  static bool pcie_ari_disabled;
>  
> +bool pci_resources_page_aligned;
> +
>  /**
>   * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
>   * @bus: pointer to PCI bus structure to search
> @@ -4604,6 +4606,7 @@ static resource_size_t 
> pci_specified_resource_alignment(struct pci_dev *dev,
>   int seg, bus, slot, func, align_order, count;
>   resource_size_t align = 0;
>   char *p;
> + bool invalid = false;
>  
>   spin_lock(&resource_alignment_lock);
>   p = resource_alignment_param;
> @@ -4615,16 +4618,49 @@ static resource_size_t 
> pci_specified_resource_alignment(struct pci_dev *dev,
>   } else {
>   align_order = -1;
>   }
> - if (sscanf(p, "%x:%x:%x.%x%n",
> - &seg, &bus, &slot, &func, &count) != 4) {
> + if (p[0] == '*' && p[1] == ':') {
> + seg = -1;
> + count = 1;
> + } else if (sscanf(p, "%x%n", &seg, &count) != 1 ||
> + p[count] != ':') {
> + invalid = true;
> + break;
> + }
> + p += count + 1;
> + if (*p == '*') {
> + bus = -1;
> + count = 1;
> + } else if (sscanf(p, "%x%n", &bus, &count) != 1) {
> + invalid = true;
> + break;
> + }
> + p += count;
> + if (*p == '.') {
> + slot = bus;
> + bus = seg;
>   seg = 0;
> - if (sscanf(p, "%x:%x.%x%n",
> - &bus, &slot, &func, &count) != 3) {
> - /* Invalid format */
> - printk(KERN_ERR "PCI: Can't parse 
> resource_alignment parameter: %s\n",
> - p);
> + p++;
> + } else if (*p == ':') {
> + p++;
> + if (p[0] == '*' && p[1] == '.') {
> + slot = -1;
> + count = 1;
> + } else if (sscanf(p, "%x%n", &slot, &count) != 1 ||
> + p[count] != '.') {
> + invalid = true;
>   break;
>   }
> + p += count + 1;
> + } else {
> + invalid = true;
> + 

Re: [RFC PATCH v4 4/7] PCI: Modify resource_alignment to support multiple devices

2016-03-19 Thread Yongji Xie

On 2016/3/17 0:30, Alex Williamson wrote:

On Mon,  7 Mar 2016 15:48:35 +0800
Yongji Xie  wrote:


When vfio passthrough a PCI device of which MMIO BARs
are smaller than PAGE_SIZE, guest will not handle the
mmio accesses to the BARs which leads to mmio emulations
in host.

This is because vfio will not allow to passthrough one
BAR's mmio page which may be shared with other BARs.

To solve this performance issue, this patch modifies
resource_alignment to support syntax where multiple
devices get the same alignment. So we can use something
like "pci=resource_alignment=*:*:*.*:noresize" to
enforce the alignment of all MMIO BARs to be at least
PAGE_SIZE so that one BAR's mmio page would not be
shared with other BARs.

Signed-off-by: Yongji Xie 
---
  Documentation/kernel-parameters.txt |2 +
  drivers/pci/pci.c   |   90 ++-
  include/linux/pci.h |4 ++
  3 files changed, 85 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 8028631..74b38ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
+   , ,  and  can be set to
+   "*" which means match all values.

I don't see anywhere that you're automatically enabling this for your
platform, so presumably you're expecting users to determine on their
own that they have a performance problem and hoping that they'll figure
out that they need to use this option to resolve it.  The first irate
question you'll get back is why doesn't this happen automatically?


Actually, I just want to make the code simple. Maybe it is
not a good idea to let users enable this option manually.
I will try to fix it like that:

diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 6f8065a..6659752 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -30,6 +30,8 @@
 #define PCIBIOS_MIN_IO 0x1000
 #define PCIBIOS_MIN_MEM0x1000

+#define PCIBIOS_MIN_ALIGNMENT  PAGE_SIZE
+
 struct pci_dev;

 /* Values for the `which' argument to sys_pciconfig_iobase syscall.  */
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index dadd28a..9f644e4 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4593,6 +4593,8 @@ EXPORT_SYMBOL_GPL(pci_ignore_hotplug);
 static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
 static DEFINE_SPINLOCK(resource_alignment_lock);

+#define DISABLE_ARCH_ALIGNMENT -1
+#define DEFAULT_ALIGNMENT  -2
 /**
  * pci_specified_resource_alignment - get resource alignment specified 
by user.

  * @dev: the PCI device to get
@@ -4609,6 +4611,9 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,

char *p;
bool invalid = false;

+#ifdef PCIBIOS_MIN_ALIGNMENT
+   align = PCIBIOS_MIN_ALIGNMENT;
+#endif
spin_lock(&resource_alignment_lock);
p = resource_alignment_param;
while (*p) {
@@ -4617,7 +4622,7 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,

p[count] == '@') {
p += count + 1;
} else {
-   align_order = -1;
+   align_order = DEFAULT_ALIGNMENT;
}
if (p[0] == '*' && p[1] == ':') {
seg = -1;
@@ -4673,8 +4678,10 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,

(bus == dev->bus->number || bus == -1) &&
(slot == PCI_SLOT(dev->devfn) || slot == -1) &&
(func == PCI_FUNC(dev->devfn) || func == -1)) {
-   if (align_order == -1)
+   if (align_order == DEFAULT_ALIGNMENT)
align = PAGE_SIZE;
+   else if (align_order == DISABLE_ARCH_ALIGNMENT)
+   align = 0;
else
align = 1 << align_order;
if (!pci_resources_page_aligned &&

PCI-PCI bridge can be specified, if resource
windows need to be expanded.
noresize: Don't change the resources' sizes when
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 760cce5..44ab59f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -102,6 +102,8 @@ unsigned int pcibios_max_latency = 255;
  /* If set, the PCIe ARI capability will not be used. */
  static bool 

[RFC PATCH v4 4/7] PCI: Modify resource_alignment to support multiple devices

2016-03-06 Thread Yongji Xie
When vfio passthrough a PCI device of which MMIO BARs
are smaller than PAGE_SIZE, guest will not handle the
mmio accesses to the BARs which leads to mmio emulations
in host.

This is because vfio will not allow to passthrough one
BAR's mmio page which may be shared with other BARs.

To solve this performance issue, this patch modifies
resource_alignment to support syntax where multiple
devices get the same alignment. So we can use something
like "pci=resource_alignment=*:*:*.*:noresize" to
enforce the alignment of all MMIO BARs to be at least
PAGE_SIZE so that one BAR's mmio page would not be
shared with other BARs.

Signed-off-by: Yongji Xie 
---
 Documentation/kernel-parameters.txt |2 +
 drivers/pci/pci.c   |   90 ++-
 include/linux/pci.h |4 ++
 3 files changed, 85 insertions(+), 11 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 8028631..74b38ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2918,6 +2918,8 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
aligned memory resources.
If  is not specified,
PAGE_SIZE is used as alignment.
+   , ,  and  can be set to
+   "*" which means match all values.
PCI-PCI bridge can be specified, if resource
windows need to be expanded.
noresize: Don't change the resources' sizes when
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 760cce5..44ab59f 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -102,6 +102,8 @@ unsigned int pcibios_max_latency = 255;
 /* If set, the PCIe ARI capability will not be used. */
 static bool pcie_ari_disabled;
 
+bool pci_resources_page_aligned;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -4604,6 +4606,7 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
int seg, bus, slot, func, align_order, count;
resource_size_t align = 0;
char *p;
+   bool invalid = false;
 
spin_lock(&resource_alignment_lock);
p = resource_alignment_param;
@@ -4615,16 +4618,49 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
} else {
align_order = -1;
}
-   if (sscanf(p, "%x:%x:%x.%x%n",
-   &seg, &bus, &slot, &func, &count) != 4) {
+   if (p[0] == '*' && p[1] == ':') {
+   seg = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", &seg, &count) != 1 ||
+   p[count] != ':') {
+   invalid = true;
+   break;
+   }
+   p += count + 1;
+   if (*p == '*') {
+   bus = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", &bus, &count) != 1) {
+   invalid = true;
+   break;
+   }
+   p += count;
+   if (*p == '.') {
+   slot = bus;
+   bus = seg;
seg = 0;
-   if (sscanf(p, "%x:%x.%x%n",
-   &bus, &slot, &func, &count) != 3) {
-   /* Invalid format */
-   printk(KERN_ERR "PCI: Can't parse 
resource_alignment parameter: %s\n",
-   p);
+   p++;
+   } else if (*p == ':') {
+   p++;
+   if (p[0] == '*' && p[1] == '.') {
+   slot = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", &slot, &count) != 1 ||
+   p[count] != '.') {
+   invalid = true;
break;
}
+   p += count + 1;
+   } else {
+   invalid = true;
+   break;
+   }
+   if (*p == '*') {
+   func = -1;
+   count = 1;
+   } else if (sscanf(p, "%x%n", &func, &count) != 1) {
+   invalid = true;
+   break;
}
p += count;
if (!strncmp(p, ":noresize", 9)) {
@@ -4632,23 +4668,34 @@ static resource_size_t 
pci_specified_resource_alignment(struct pci_dev *dev,
p += 9;