[PATCH v4 16/28] powerpc/iommu/powernv: Release replaced TCE

2015-02-16 Thread Alexey Kardashevskiy
At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.

Another problem this patch is addressing is the use of pool locks for
external IOMMU users such as VFIO. The pool locks are to protect
DMA page allocator rather than entries and since the host kernel does
not control what pages are in use, there is no point in pool locks and
exchange()+put_page(oldtce) is sufficient to avoid possible races.

This adds an exchange() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE(s) so the caller can release
the pages afterwards.

The returned old TCE value is a virtual address as the new TCE value.
This is different from tce_clear() which returns a physical address.

This implements exchange() for IODA2 only. This adds a requirement
for a platform to have exchange() implemented so from now on IODA2 is
the only supported PHB for VFIO-SPAPR.

This replaces iommu_tce_build() and iommu_clear_tce() with
a single iommu_tce_xchg().

Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/include/asm/iommu.h  | 13 +---
 arch/powerpc/kernel/iommu.c   | 52 ++-
 arch/powerpc/platforms/powernv/pci-ioda.c | 16 ++
 arch/powerpc/platforms/powernv/pci.c  | 22 +
 arch/powerpc/platforms/powernv/pci.h  |  4 +++
 drivers/vfio/vfio_iommu_spapr_tce.c   | 17 +++---
 6 files changed, 87 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index ba16aa0..bf26d47 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -49,6 +49,12 @@ struct iommu_table_ops {
unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs);
+   int (*exchange)(struct iommu_table *tbl,
+   long index, long npages,
+   unsigned long uaddr,
+   unsigned long *old_tces,
+   enum dma_data_direction direction,
+   struct dma_attrs *attrs);
void (*clear)(struct iommu_table *tbl,
long index, long npages);
unsigned long (*get)(struct iommu_table *tbl, long index);
@@ -225,10 +231,9 @@ extern int iommu_tce_clear_param_check(struct iommu_table 
*tbl,
unsigned long npages);
 extern int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce);
-extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-   unsigned long hwaddr, enum dma_data_direction direction);
-extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
-   unsigned long entry);
+extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+   unsigned long hwaddr, unsigned long *oldtce,
+   enum dma_data_direction direction);
 
 extern void iommu_flush_tce(struct iommu_table *tbl);
 extern int iommu_take_ownership(struct powerpc_iommu *iommu);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 9d06425..e4b89bf 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -974,44 +974,30 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
 }
 EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
 
-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
+static void iommu_tce_mk_dirty(unsigned long tce)
 {
-   unsigned long oldtce;
-   struct iommu_pool *pool = get_pool(tbl, entry);
+   if (tce & TCE_PCI_WRITE) {
+   struct page *pg = pfn_to_page(__pa(tce) >> PAGE_SHIFT);
 
-   spin_lock(&(pool->lock));
-
-   oldtce = tbl->it_ops->get(tbl, entry);
-   if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
-   tbl->it_ops->clear(tbl, entry, 1);
-   else
-   oldtce = 0;
-
-   spin_unlock(&(pool->lock));
-
-   return oldtce;
+   SetPageDirty(pg);
+   }
 }
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
 
 /*
  * hwaddr is a kernel virtual address here (0xc... bazillion),
  * tce_build converts it to a physical address.
  */
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-   unsigned long hwaddr, enum dma_data_direction direction)
+long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+   unsigned long hwaddr, unsigned long *oldtce,
+   enum dma_data_direction direction)
 {
-   int ret = -EBUSY;
-   unsigned long oldtce;
-   struct iommu_pool *pool = get_pool(tbl, entry);
+   long ret;
 
-   spin_lock(&(pool->lock));
+   ret = tbl->it_ops->exchange(tbl, entry, 1, hwaddr, oldtce,
+   direction, NULL);
 
-   oldtce = tbl->it_ops->get(tbl, 

[PATCH v4 16/28] powerpc/iommu/powernv: Release replaced TCE

2015-02-16 Thread Alexey Kardashevskiy
At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.

Another problem this patch is addressing is the use of pool locks for
external IOMMU users such as VFIO. The pool locks are to protect
DMA page allocator rather than entries and since the host kernel does
not control what pages are in use, there is no point in pool locks and
exchange()+put_page(oldtce) is sufficient to avoid possible races.

This adds an exchange() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE(s) so the caller can release
the pages afterwards.

The returned old TCE value is a virtual address as the new TCE value.
This is different from tce_clear() which returns a physical address.

This implements exchange() for IODA2 only. This adds a requirement
for a platform to have exchange() implemented so from now on IODA2 is
the only supported PHB for VFIO-SPAPR.

This replaces iommu_tce_build() and iommu_clear_tce() with
a single iommu_tce_xchg().

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
 arch/powerpc/include/asm/iommu.h  | 13 +---
 arch/powerpc/kernel/iommu.c   | 52 ++-
 arch/powerpc/platforms/powernv/pci-ioda.c | 16 ++
 arch/powerpc/platforms/powernv/pci.c  | 22 +
 arch/powerpc/platforms/powernv/pci.h  |  4 +++
 drivers/vfio/vfio_iommu_spapr_tce.c   | 17 +++---
 6 files changed, 87 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index ba16aa0..bf26d47 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -49,6 +49,12 @@ struct iommu_table_ops {
unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs);
+   int (*exchange)(struct iommu_table *tbl,
+   long index, long npages,
+   unsigned long uaddr,
+   unsigned long *old_tces,
+   enum dma_data_direction direction,
+   struct dma_attrs *attrs);
void (*clear)(struct iommu_table *tbl,
long index, long npages);
unsigned long (*get)(struct iommu_table *tbl, long index);
@@ -225,10 +231,9 @@ extern int iommu_tce_clear_param_check(struct iommu_table 
*tbl,
unsigned long npages);
 extern int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce);
-extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-   unsigned long hwaddr, enum dma_data_direction direction);
-extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
-   unsigned long entry);
+extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+   unsigned long hwaddr, unsigned long *oldtce,
+   enum dma_data_direction direction);
 
 extern void iommu_flush_tce(struct iommu_table *tbl);
 extern int iommu_take_ownership(struct powerpc_iommu *iommu);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 9d06425..e4b89bf 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -974,44 +974,30 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
 }
 EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
 
-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
+static void iommu_tce_mk_dirty(unsigned long tce)
 {
-   unsigned long oldtce;
-   struct iommu_pool *pool = get_pool(tbl, entry);
+   if (tce  TCE_PCI_WRITE) {
+   struct page *pg = pfn_to_page(__pa(tce)  PAGE_SHIFT);
 
-   spin_lock((pool-lock));
-
-   oldtce = tbl-it_ops-get(tbl, entry);
-   if (oldtce  (TCE_PCI_WRITE | TCE_PCI_READ))
-   tbl-it_ops-clear(tbl, entry, 1);
-   else
-   oldtce = 0;
-
-   spin_unlock((pool-lock));
-
-   return oldtce;
+   SetPageDirty(pg);
+   }
 }
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
 
 /*
  * hwaddr is a kernel virtual address here (0xc... bazillion),
  * tce_build converts it to a physical address.
  */
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-   unsigned long hwaddr, enum dma_data_direction direction)
+long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+   unsigned long hwaddr, unsigned long *oldtce,
+   enum dma_data_direction direction)
 {
-   int ret = -EBUSY;
-   unsigned long oldtce;
-   struct iommu_pool *pool = get_pool(tbl, entry);
+   long ret;
 
-   spin_lock((pool-lock));
+   ret = tbl-it_ops-exchange(tbl, entry, 1, hwaddr, oldtce,
+   direction, NULL);
 
-   oldtce = tbl-it_ops-get(tbl, entry);