Re: [PATCH v2 02/12] iommu/vt-d: Manage scalalble mode PASID tables

2018-09-06 Thread Lu Baolu

Hi,

On 09/07/2018 07:43 AM, Jacob Pan wrote:

On Thu, 6 Sep 2018 10:46:03 +0800
Lu Baolu  wrote:


@@ -224,7 +271,14 @@ struct pasid_entry
*intel_pasid_get_entry(struct device *dev, int pasid)
*/
   static inline void pasid_clear_entry(struct pasid_entry *pe)
   {
-   WRITE_ONCE(pe->val, 0);
+   WRITE_ONCE(pe->val[0], 0);
+   WRITE_ONCE(pe->val[1], 0);
+   WRITE_ONCE(pe->val[2], 0);
+   WRITE_ONCE(pe->val[3], 0);
+   WRITE_ONCE(pe->val[4], 0);
+   WRITE_ONCE(pe->val[5], 0);
+   WRITE_ONCE(pe->val[6], 0);
+   WRITE_ONCE(pe->val[7], 0);


memset?


The order is important here. Otherwise, the PRESENT bit of this pasid
entry might still set while other fields contains invalid values.


WRITE_ONCE/READ_ONCE will switch to __builtin_memcpy() in if the size
exceeds word size, ie. 64bit in this case. I don;t think compiler will
reorder built-in function. Beside, we only need to clear present and
FDP bit, right?


Clear present and FDP bit is enough for hardare. But from software point
of view, it's better to clear all bits with 0.

Best regards,
Lu Baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 02/12] iommu/vt-d: Manage scalalble mode PASID tables

2018-09-06 Thread Jacob Pan
On Thu, 6 Sep 2018 10:46:03 +0800
Lu Baolu  wrote:

> >> @@ -224,7 +271,14 @@ struct pasid_entry
> >> *intel_pasid_get_entry(struct device *dev, int pasid)
> >>*/
> >>   static inline void pasid_clear_entry(struct pasid_entry *pe)
> >>   {
> >> -  WRITE_ONCE(pe->val, 0);
> >> +  WRITE_ONCE(pe->val[0], 0);
> >> +  WRITE_ONCE(pe->val[1], 0);
> >> +  WRITE_ONCE(pe->val[2], 0);
> >> +  WRITE_ONCE(pe->val[3], 0);
> >> +  WRITE_ONCE(pe->val[4], 0);
> >> +  WRITE_ONCE(pe->val[5], 0);
> >> +  WRITE_ONCE(pe->val[6], 0);
> >> +  WRITE_ONCE(pe->val[7], 0);  
> > 
> > memset?  
> 
> The order is important here. Otherwise, the PRESENT bit of this pasid
> entry might still set while other fields contains invalid values.

WRITE_ONCE/READ_ONCE will switch to __builtin_memcpy() in if the size
exceeds word size, ie. 64bit in this case. I don;t think compiler will
reorder built-in function. Beside, we only need to clear present and
FDP bit, right?

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 02/12] iommu/vt-d: Manage scalalble mode PASID tables

2018-09-05 Thread Lu Baolu

Hi,

On 09/06/2018 10:52 AM, Tian, Kevin wrote:

From: Lu Baolu [mailto:baolu...@linux.intel.com]
Sent: Thursday, September 6, 2018 10:46 AM


[...]

@@ -143,8 +142,9 @@ int intel_pasid_alloc_table(struct device *dev)
return -ENOMEM;
INIT_LIST_HEAD(_table->dev);

-   size = sizeof(struct pasid_entry);
+   size = sizeof(struct pasid_dir_entry);
count = min_t(int, pci_max_pasids(to_pci_dev(dev)),
intel_pasid_max_id);
+   count >>= PASID_PDE_SHIFT;
order = get_order(size * count);
pages = alloc_pages_node(info->iommu->node,
 GFP_ATOMIC | __GFP_ZERO,
@@ -154,7 +154,7 @@ int intel_pasid_alloc_table(struct device *dev)

pasid_table->table = page_address(pages);
pasid_table->order = order;
-   pasid_table->max_pasid = count;
+   pasid_table->max_pasid = count << PASID_PDE_SHIFT;


are you sure of that count is PDE_SHIFT aligned? otherwise >>
then << would lose some bits. If sure, then better add some check.


I am making the max_pasid PDE_SHIFT aligned as the result of shift
operations.



earlier:

count = min_t(int, pci_max_pasids(to_pci_dev(dev)),
intel_pasid_max_id);


so you decided to truncate count to be PDE_SHIFT aligned. Is PASID
value user configurable? if not, then it's fine.


Here @count is the count of PASID directory entries, so it must be
truncated from the original max_pasid. PASID value is not configurable
anyway.







   attach_out:
device_attach_pasid_table(info, pasid_table);
@@ -162,14 +162,33 @@ int intel_pasid_alloc_table(struct device *dev)
return 0;
   }

+/* Get PRESENT bit of a PASID directory entry. */
+static inline bool
+pasid_pde_is_present(struct pasid_dir_entry *pde)
+{
+   return READ_ONCE(pde->val) & PASID_PTE_PRESENT;


curious why adding READ_ONCE specifically for PASID structure,
but not used for any other existing vtd structures? Is it to address
some specific requirement on PASID structure as defined in spec?


READ/WRITE_ONCE are used in pasid entry read/write to prevent the
compiler from merging, refetching or reordering successive instances of
read/write.



that's fine. I'm just curious why this is the first user of such macros
in intel-iommu driver. Even before with ecs we have PASID table too.



Best regards,
Lu Baolu
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v2 02/12] iommu/vt-d: Manage scalalble mode PASID tables

2018-09-05 Thread Tian, Kevin
> From: Lu Baolu [mailto:baolu...@linux.intel.com]
> Sent: Thursday, September 6, 2018 10:46 AM
>
[...] 
> >> @@ -143,8 +142,9 @@ int intel_pasid_alloc_table(struct device *dev)
> >>return -ENOMEM;
> >>INIT_LIST_HEAD(_table->dev);
> >>
> >> -  size = sizeof(struct pasid_entry);
> >> +  size = sizeof(struct pasid_dir_entry);
> >>count = min_t(int, pci_max_pasids(to_pci_dev(dev)),
> >> intel_pasid_max_id);
> >> +  count >>= PASID_PDE_SHIFT;
> >>order = get_order(size * count);
> >>pages = alloc_pages_node(info->iommu->node,
> >> GFP_ATOMIC | __GFP_ZERO,
> >> @@ -154,7 +154,7 @@ int intel_pasid_alloc_table(struct device *dev)
> >>
> >>pasid_table->table = page_address(pages);
> >>pasid_table->order = order;
> >> -  pasid_table->max_pasid = count;
> >> +  pasid_table->max_pasid = count << PASID_PDE_SHIFT;
> >
> > are you sure of that count is PDE_SHIFT aligned? otherwise >>
> > then << would lose some bits. If sure, then better add some check.
> 
> I am making the max_pasid PDE_SHIFT aligned as the result of shift
> operations.
> 

earlier:
> >>count = min_t(int, pci_max_pasids(to_pci_dev(dev)),
> >> intel_pasid_max_id);

so you decided to truncate count to be PDE_SHIFT aligned. Is PASID
value user configurable? if not, then it's fine.

> >
> >>
> >>   attach_out:
> >>device_attach_pasid_table(info, pasid_table);
> >> @@ -162,14 +162,33 @@ int intel_pasid_alloc_table(struct device *dev)
> >>return 0;
> >>   }
> >>
> >> +/* Get PRESENT bit of a PASID directory entry. */
> >> +static inline bool
> >> +pasid_pde_is_present(struct pasid_dir_entry *pde)
> >> +{
> >> +  return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
> >
> > curious why adding READ_ONCE specifically for PASID structure,
> > but not used for any other existing vtd structures? Is it to address
> > some specific requirement on PASID structure as defined in spec?
> 
> READ/WRITE_ONCE are used in pasid entry read/write to prevent the
> compiler from merging, refetching or reordering successive instances of
> read/write.
> 

that's fine. I'm just curious why this is the first user of such macros
in intel-iommu driver. Even before with ecs we have PASID table too.

Thanks
Kevin
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 02/12] iommu/vt-d: Manage scalalble mode PASID tables

2018-09-05 Thread Lu Baolu

Hi,

On 09/06/2018 10:14 AM, Tian, Kevin wrote:

From: Lu Baolu [mailto:baolu...@linux.intel.com]
Sent: Thursday, August 30, 2018 9:35 AM

In scalable mode, pasid structure is a two level table with
a pasid directory table and a pasid table. Any pasid entry
can be identified by a pasid value in below way.

1
9   6 5  0
 .---.---.
 |  PASID|   |
 '---'---'.-.
  ||  | |
  ||  | |
  ||  | |
  | .---.  |  .-.
  | |   |  |->| PASID Entry |
  | |   |  |  '-'
  | |   |  |Plus  | |
  | .---.  |  | |
  |>| DIR Entry |>| |
  | '---' '-'
.-.  |Plus |   |
| Context |  | |   |
|  Entry  |--->|   |
'-''---'

This changes the pasid table APIs to support scalable mode
PASID directory and PASID table. It also adds a helper to
get the PASID table entry according to the pasid value.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Cc: Liu Yi L 
Signed-off-by: Sanjay Kumar 
Signed-off-by: Lu Baolu 
Reviewed-by: Ashok Raj 
---
  drivers/iommu/intel-iommu.c |  2 +-
  drivers/iommu/intel-pasid.c | 72 
-
  drivers/iommu/intel-pasid.h | 10 +-
  drivers/iommu/intel-svm.c   |  6 +---
  4 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5845edf4dcf9..b0da4f765274 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2507,7 +2507,7 @@ static struct dmar_domain
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (dev)
dev->archdata.iommu = info;

-   if (dev && dev_is_pci(dev) && info->pasid_supported) {
+   if (dev && dev_is_pci(dev) && sm_supported(iommu)) {


worthy of a comment here that PASID table now is mandatory in
scalable mode, instead of optional for 1st level usage before.


Fair enough. Will add in the next version.




ret = intel_pasid_alloc_table(dev);
if (ret) {
__dmar_remove_one_dev_info(info);
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index fe95c9bd4d33..d6e90cd5b062 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -127,8 +127,7 @@ int intel_pasid_alloc_table(struct device *dev)
int ret, order;

info = dev->archdata.iommu;
-   if (WARN_ON(!info || !dev_is_pci(dev) ||
-   !info->pasid_supported || info->pasid_table))
+   if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
return -EINVAL;


following same logic should you check sm_supported here?


If not sm_supported, info->pasid_table should be NULL. Checking
info->pasid_table is better since even sm_supported, the pasid
table pointer could still possible to be empty.





/* DMA alias device already has a pasid table, use it: */
@@ -143,8 +142,9 @@ int intel_pasid_alloc_table(struct device *dev)
return -ENOMEM;
INIT_LIST_HEAD(_table->dev);

-   size = sizeof(struct pasid_entry);
+   size = sizeof(struct pasid_dir_entry);
count = min_t(int, pci_max_pasids(to_pci_dev(dev)),
intel_pasid_max_id);
+   count >>= PASID_PDE_SHIFT;
order = get_order(size * count);
pages = alloc_pages_node(info->iommu->node,
 GFP_ATOMIC | __GFP_ZERO,
@@ -154,7 +154,7 @@ int intel_pasid_alloc_table(struct device *dev)

pasid_table->table = page_address(pages);
pasid_table->order = order;
-   pasid_table->max_pasid = count;
+   pasid_table->max_pasid = count << PASID_PDE_SHIFT;


are you sure of that count is PDE_SHIFT aligned? otherwise >>
then << would lose some bits. If sure, then better add some check.


I am making the max_pasid PDE_SHIFT aligned as the result of shift
operations.





  attach_out:
device_attach_pasid_table(info, pasid_table);
@@ -162,14 +162,33 @@ int intel_pasid_alloc_table(struct device *dev)
return 0;
  }

+/* Get PRESENT bit of a PASID directory entry. */
+static inline bool
+pasid_pde_is_present(struct pasid_dir_entry *pde)
+{
+   return READ_ONCE(pde->val) & PASID_PTE_PRESENT;


curious why adding READ_ONCE specifically for PASID structure,
but not used for any other existing vtd structures? Is it to address
some specific requirement on PASID structure as defined in spec?


READ/WRITE_ONCE are used in pasid entry read/write to prevent the
compiler from merging, refetching or 

RE: [PATCH v2 02/12] iommu/vt-d: Manage scalalble mode PASID tables

2018-09-05 Thread Tian, Kevin
> From: Lu Baolu [mailto:baolu...@linux.intel.com]
> Sent: Thursday, August 30, 2018 9:35 AM
> 
> In scalable mode, pasid structure is a two level table with
> a pasid directory table and a pasid table. Any pasid entry
> can be identified by a pasid value in below way.
> 
>1
>9   6 5  0
> .---.---.
> |  PASID|   |
> '---'---'.-.
>  ||  | |
>  ||  | |
>  ||  | |
>  | .---.  |  .-.
>  | |   |  |->| PASID Entry |
>  | |   |  |  '-'
>  | |   |  |Plus  | |
>  | .---.  |  | |
>  |>| DIR Entry |>| |
>  | '---' '-'
> .-.  |Plus |   |
> | Context |  | |   |
> |  Entry  |--->|   |
> '-''---'
> 
> This changes the pasid table APIs to support scalable mode
> PASID directory and PASID table. It also adds a helper to
> get the PASID table entry according to the pasid value.
> 
> Cc: Ashok Raj 
> Cc: Jacob Pan 
> Cc: Kevin Tian 
> Cc: Liu Yi L 
> Signed-off-by: Sanjay Kumar 
> Signed-off-by: Lu Baolu 
> Reviewed-by: Ashok Raj 
> ---
>  drivers/iommu/intel-iommu.c |  2 +-
>  drivers/iommu/intel-pasid.c | 72 
> -
>  drivers/iommu/intel-pasid.h | 10 +-
>  drivers/iommu/intel-svm.c   |  6 +---
>  4 files changed, 74 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 5845edf4dcf9..b0da4f765274 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -2507,7 +2507,7 @@ static struct dmar_domain
> *dmar_insert_one_dev_info(struct intel_iommu *iommu,
>   if (dev)
>   dev->archdata.iommu = info;
> 
> - if (dev && dev_is_pci(dev) && info->pasid_supported) {
> + if (dev && dev_is_pci(dev) && sm_supported(iommu)) {

worthy of a comment here that PASID table now is mandatory in
scalable mode, instead of optional for 1st level usage before.

>   ret = intel_pasid_alloc_table(dev);
>   if (ret) {
>   __dmar_remove_one_dev_info(info);
> diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
> index fe95c9bd4d33..d6e90cd5b062 100644
> --- a/drivers/iommu/intel-pasid.c
> +++ b/drivers/iommu/intel-pasid.c
> @@ -127,8 +127,7 @@ int intel_pasid_alloc_table(struct device *dev)
>   int ret, order;
> 
>   info = dev->archdata.iommu;
> - if (WARN_ON(!info || !dev_is_pci(dev) ||
> - !info->pasid_supported || info->pasid_table))
> + if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
>   return -EINVAL;

following same logic should you check sm_supported here?

> 
>   /* DMA alias device already has a pasid table, use it: */
> @@ -143,8 +142,9 @@ int intel_pasid_alloc_table(struct device *dev)
>   return -ENOMEM;
>   INIT_LIST_HEAD(_table->dev);
> 
> - size = sizeof(struct pasid_entry);
> + size = sizeof(struct pasid_dir_entry);
>   count = min_t(int, pci_max_pasids(to_pci_dev(dev)),
> intel_pasid_max_id);
> + count >>= PASID_PDE_SHIFT;
>   order = get_order(size * count);
>   pages = alloc_pages_node(info->iommu->node,
>GFP_ATOMIC | __GFP_ZERO,
> @@ -154,7 +154,7 @@ int intel_pasid_alloc_table(struct device *dev)
> 
>   pasid_table->table = page_address(pages);
>   pasid_table->order = order;
> - pasid_table->max_pasid = count;
> + pasid_table->max_pasid = count << PASID_PDE_SHIFT;

are you sure of that count is PDE_SHIFT aligned? otherwise >>
then << would lose some bits. If sure, then better add some check.

> 
>  attach_out:
>   device_attach_pasid_table(info, pasid_table);
> @@ -162,14 +162,33 @@ int intel_pasid_alloc_table(struct device *dev)
>   return 0;
>  }
> 
> +/* Get PRESENT bit of a PASID directory entry. */
> +static inline bool
> +pasid_pde_is_present(struct pasid_dir_entry *pde)
> +{
> + return READ_ONCE(pde->val) & PASID_PTE_PRESENT;

curious why adding READ_ONCE specifically for PASID structure,
but not used for any other existing vtd structures? Is it to address
some specific requirement on PASID structure as defined in spec?

> +}
> +
> +/* Get PASID table from a PASID directory entry. */
> +static inline struct pasid_entry *
> +get_pasid_table_from_pde(struct pasid_dir_entry *pde)
> +{
> + if (!pasid_pde_is_present(pde))
> + return NULL;
> +
> + return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
> +}
> +
>  void intel_pasid_free_table(struct 

[PATCH v2 02/12] iommu/vt-d: Manage scalalble mode PASID tables

2018-08-29 Thread Lu Baolu
In scalable mode, pasid structure is a two level table with
a pasid directory table and a pasid table. Any pasid entry
can be identified by a pasid value in below way.

   1
   9   6 5  0
.---.---.
|  PASID|   |
'---'---'.-.
 ||  | |
 ||  | |
 ||  | |
 | .---.  |  .-.
 | |   |  |->| PASID Entry |
 | |   |  |  '-'
 | |   |  |Plus  | |
 | .---.  |  | |
 |>| DIR Entry |>| |
 | '---' '-'
.-.  |Plus |   |
| Context |  | |   |
|  Entry  |--->|   |
'-''---'

This changes the pasid table APIs to support scalable mode
PASID directory and PASID table. It also adds a helper to
get the PASID table entry according to the pasid value.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Cc: Liu Yi L 
Signed-off-by: Sanjay Kumar 
Signed-off-by: Lu Baolu 
Reviewed-by: Ashok Raj 
---
 drivers/iommu/intel-iommu.c |  2 +-
 drivers/iommu/intel-pasid.c | 72 -
 drivers/iommu/intel-pasid.h | 10 +-
 drivers/iommu/intel-svm.c   |  6 +---
 4 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5845edf4dcf9..b0da4f765274 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2507,7 +2507,7 @@ static struct dmar_domain 
*dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (dev)
dev->archdata.iommu = info;
 
-   if (dev && dev_is_pci(dev) && info->pasid_supported) {
+   if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
ret = intel_pasid_alloc_table(dev);
if (ret) {
__dmar_remove_one_dev_info(info);
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index fe95c9bd4d33..d6e90cd5b062 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -127,8 +127,7 @@ int intel_pasid_alloc_table(struct device *dev)
int ret, order;
 
info = dev->archdata.iommu;
-   if (WARN_ON(!info || !dev_is_pci(dev) ||
-   !info->pasid_supported || info->pasid_table))
+   if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
return -EINVAL;
 
/* DMA alias device already has a pasid table, use it: */
@@ -143,8 +142,9 @@ int intel_pasid_alloc_table(struct device *dev)
return -ENOMEM;
INIT_LIST_HEAD(_table->dev);
 
-   size = sizeof(struct pasid_entry);
+   size = sizeof(struct pasid_dir_entry);
count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id);
+   count >>= PASID_PDE_SHIFT;
order = get_order(size * count);
pages = alloc_pages_node(info->iommu->node,
 GFP_ATOMIC | __GFP_ZERO,
@@ -154,7 +154,7 @@ int intel_pasid_alloc_table(struct device *dev)
 
pasid_table->table = page_address(pages);
pasid_table->order = order;
-   pasid_table->max_pasid = count;
+   pasid_table->max_pasid = count << PASID_PDE_SHIFT;
 
 attach_out:
device_attach_pasid_table(info, pasid_table);
@@ -162,14 +162,33 @@ int intel_pasid_alloc_table(struct device *dev)
return 0;
 }
 
+/* Get PRESENT bit of a PASID directory entry. */
+static inline bool
+pasid_pde_is_present(struct pasid_dir_entry *pde)
+{
+   return READ_ONCE(pde->val) & PASID_PTE_PRESENT;
+}
+
+/* Get PASID table from a PASID directory entry. */
+static inline struct pasid_entry *
+get_pasid_table_from_pde(struct pasid_dir_entry *pde)
+{
+   if (!pasid_pde_is_present(pde))
+   return NULL;
+
+   return phys_to_virt(READ_ONCE(pde->val) & PDE_PFN_MASK);
+}
+
 void intel_pasid_free_table(struct device *dev)
 {
struct device_domain_info *info;
struct pasid_table *pasid_table;
+   struct pasid_dir_entry *dir;
+   struct pasid_entry *table;
+   int i, max_pde;
 
info = dev->archdata.iommu;
-   if (!info || !dev_is_pci(dev) ||
-   !info->pasid_supported || !info->pasid_table)
+   if (!info || !dev_is_pci(dev) || !info->pasid_table)
return;
 
pasid_table = info->pasid_table;
@@ -178,6 +197,14 @@ void intel_pasid_free_table(struct device *dev)
if (!list_empty(_table->dev))
return;
 
+   /* Free scalable mode PASID directory tables: */
+   dir = pasid_table->table;
+   max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT;
+   for (i = 0; i <