Re: powernv/npu-dma.c: Add explicit flush when sending an ATSD

2017-06-27 Thread Michael Ellerman
On Tue, 2017-06-20 at 08:37:28 UTC, Alistair Popple wrote:
> NPU2 requires an extra explicit flush to an active GPU PID when sending
> address translation shoot downs (ATSDs) to reliably flush the GPU TLB. This
> patch adds just such a flush at the end of each sequence of ATSDs.
> 
> We can safely use PID 0 which is always reserved and active on the GPU. PID
> 0 is only used for init_mm which will never be a user mm on the GPU. To
> enforce this we add a check in pnv_npu2_init_context() just in case someone
> tries to use PID 0 on the GPU.
> 
> Signed-off-by: Alistair Popple 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/bbd5ff50afffcf4a01d05367524736

cheers


[PATCH] powernv/npu-dma.c: Add explicit flush when sending an ATSD

2017-06-20 Thread Alistair Popple
NPU2 requires an extra explicit flush to an active GPU PID when sending
address translation shoot downs (ATSDs) to reliably flush the GPU TLB. This
patch adds just such a flush at the end of each sequence of ATSDs.

We can safely use PID 0 which is always reserved and active on the GPU. PID
0 is only used for init_mm which will never be a user mm on the GPU. To
enforce this we add a check in pnv_npu2_init_context() just in case someone
tries to use PID 0 on the GPU.

Signed-off-by: Alistair Popple 
---

Michael,

It turns out my assumptions about MMU_NO_CONTEXT were wrong so we have
reverted to using HW context id/PID 0 (init_mm) as that is quite clearly
reserved on hash and radix and it seems unlikely PID 0 would ever be used
for anything else.

That said if you feel strongly it would be easy enough to add functions to
reserve a PID and an exported function for device drivers to call to find
out what the reserved PID is. I was avoiding it because it would be more
invasive adding code and an external API for something that I'm not sure
will ever change, although if it does there is a check in
pnv2_npu2_init_context() to flag it so it won't result in weird bugs.

Anyway let me know which way you would like us to go here and I can update
the patch as required, thanks!

- Alistair

arch/powerpc/platforms/powernv/npu-dma.c | 93 ++--
 1 file changed, 64 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
b/arch/powerpc/platforms/powernv/npu-dma.c
index e6f444b..9468064 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned 
long launch,
return mmio_atsd_reg;
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
 {
unsigned long launch;
 
@@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned 
long pid)
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
 
+   /* No flush */
+   launch |= !flush << PPC_BITLSHIFT(39);
+
/* Invalidating the entire process doesn't use a va */
return mmio_launch_invalidate(npu, launch, 0);
 }
 
 static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-   unsigned long pid)
+   unsigned long pid, bool flush)
 {
unsigned long launch;
 
@@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned 
long va,
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
 
+   /* No flush */
+   launch |= !flush << PPC_BITLSHIFT(39);
+
return mmio_launch_invalidate(npu, launch, va);
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
+struct mmio_atsd_reg {
+   struct npu *npu;
+   int reg;
+};
+
+static void mmio_invalidate_wait(
+   struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+   struct npu *npu;
+   int i, reg;
+
+   /* Wait for all invalidations to complete */
+   for (i = 0; i <= max_npu2_index; i++) {
+   if (mmio_atsd_reg[i].reg < 0)
+   continue;
+
+   /* Wait for completion */
+   npu = mmio_atsd_reg[i].npu;
+   reg = mmio_atsd_reg[i].reg;
+   while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+   cpu_relax();
+
+   put_mmio_atsd_reg(npu, reg);
+
+   /*
+* The GPU requires two flush ATSDs to ensure all entries have
+* been flushed. We use PID 0 as it will never be used for a
+* process on the GPU.
+*/
+   if (flush)
+   mmio_invalidate_pid(npu, 0, 1);
+   }
+}
+
 /*
  * Invalidate either a single address or an entire PID depending on
  * the value of va.
  */
 static void mmio_invalidate(struct npu_context *npu_context, int va,
-   unsigned long address)
+   unsigned long address, bool flush)
 {
-   int i, j, reg;
+   int i, j;
struct npu *npu;
struct pnv_phb *nphb;
struct pci_dev *npdev;
-   struct {
-   struct npu *npu;
-   int reg;
-   } mmio_atsd_reg[NV_MAX_NPUS];
+   struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
 
/*
@@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context 
*npu_context, int va,
 
if (va)
mmio_atsd_reg[i].reg =
-   mmio_invalidate_va(npu, address, pid);
+   mmio_invalidate_va(npu, address, pid,
+   flush);