On Tuesday, 16 January 2018 3:15:05 PM AEDT Alistair Popple wrote:
> Thanks Balbir, one question below. I have no way of testing this at present 
> but
> it looks ok to me. Thanks!

The below are more future optimisations once we can test. So in the meantime:

Acked-by: Alistair Popple <alist...@popple.id.au>

> On Thursday, 14 December 2017 12:10:08 PM AEDT Balbir Singh wrote:
> > While reviewing the code I found that the flush assumes all
> > pages are of mmu_linear_psize, which is not correct. The patch
> > uses find_linux_pte to find the right page size and uses that
> > for launching the ATSD invalidation. A new helper is added
> > to abstract the invalidation from the various notifiers.
> > 
> > The patch also cleans up a bit by removing AP size from PID
> > flushes.
> > 
> > Signed-off-by: Balbir Singh <bsinghar...@gmail.com>
> > ---
> > 
> > Changelog:
> >   Refactor the handling of return values from find_linux_pte
> >   as suggested by Aneesh
> > 
> >  arch/powerpc/platforms/powernv/npu-dma.c | 55 
> > ++++++++++++++++++++++----------
> >  1 file changed, 38 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
> > b/arch/powerpc/platforms/powernv/npu-dma.c
> > index f6cbc1a71472..e8caa3e2019d 100644
> > --- a/arch/powerpc/platforms/powernv/npu-dma.c
> > +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> > @@ -17,6 +17,7 @@
> >  #include <linux/pci.h>
> >  #include <linux/memblock.h>
> >  #include <linux/iommu.h>
> > +#include <linux/huge_mm.h>
> >  
> >  #include <asm/tlb.h>
> >  #include <asm/powernv.h>
> > @@ -27,6 +28,7 @@
> >  #include <asm/pnv-pci.h>
> >  #include <asm/msi_bitmap.h>
> >  #include <asm/opal.h>
> > +#include <asm/pte-walk.h>
> >  
> >  #include "powernv.h"
> >  #include "pci.h"
> > @@ -460,9 +462,6 @@ static int mmio_invalidate_pid(struct npu *npu, 
> > unsigned long pid, bool flush)
> >     /* PRS set to process-scoped */
> >     launch |= PPC_BIT(13);
> >  
> > -   /* AP */
> > -   launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
> >
> >     /* PID */
> >     launch |= pid << PPC_BITLSHIFT(38);
> >  
> > @@ -474,7 +473,8 @@ static int mmio_invalidate_pid(struct npu *npu, 
> > unsigned long pid, bool flush)
> >  }
> >  
> >  static int mmio_invalidate_va(struct npu *npu, unsigned long va,
> > -                   unsigned long pid, bool flush)
> > +                   unsigned long pid, bool flush,
> > +                   unsigned int shift)
> >  {
> >     unsigned long launch;
> >  
> > @@ -485,9 +485,8 @@ static int mmio_invalidate_va(struct npu *npu, unsigned 
> > long va,
> >     launch |= PPC_BIT(13);
> >  
> >     /* AP */
> > -   launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
> > +   launch |= (u64) mmu_get_ap(shift) << PPC_BITLSHIFT(17);
> >  
> > -   /* PID */
> >     launch |= pid << PPC_BITLSHIFT(38);
> >  
> >     /* No flush */
> > @@ -504,7 +503,8 @@ struct mmio_atsd_reg {
> >  };
> >  
> >  static void mmio_invalidate_wait(
> > -   struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
> > +   struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush,
> > +   unsigned int shift)
> >  {
> >     struct npu *npu;
> >     int i, reg;
> > @@ -537,7 +537,8 @@ static void mmio_invalidate_wait(
> >   * the value of va.
> >   */
> >  static void mmio_invalidate(struct npu_context *npu_context, int va,
> > -                   unsigned long address, bool flush)
> > +                   unsigned long address, bool flush,
> > +                   unsigned int shift)
> >  {
> >     int i, j;
> >     struct npu *npu;
> > @@ -572,7 +573,7 @@ static void mmio_invalidate(struct npu_context 
> > *npu_context, int va,
> >                     if (va)
> >                             mmio_atsd_reg[i].reg =
> >                                     mmio_invalidate_va(npu, address, pid,
> > -                                                   flush);
> > +                                                   flush, shift);
> >                     else
> >                             mmio_atsd_reg[i].reg =
> >                                     mmio_invalidate_pid(npu, pid, flush);
> > @@ -585,10 +586,32 @@ static void mmio_invalidate(struct npu_context 
> > *npu_context, int va,
> >             }
> >     }
> >  
> > -   mmio_invalidate_wait(mmio_atsd_reg, flush);
> > +   mmio_invalidate_wait(mmio_atsd_reg, flush, shift);
> >     if (flush)
> >             /* Wait for the flush to complete */
> > -           mmio_invalidate_wait(mmio_atsd_reg, false);
> > +           mmio_invalidate_wait(mmio_atsd_reg, false, shift);
> > +}
> > +
> > +static void pnv_npu2_invalidate_helper(struct npu_context *npu_context,
> > +           struct mm_struct *mm, unsigned long start,
> > +           unsigned long end, bool flush)
> > +{
> > +   unsigned long address;
> > +   unsigned int hshift = 0, shift;
> > +
> > +   address = start;
> > +   do {
> > +           local_irq_disable();
> > +           find_linux_pte(mm->pgd, address, NULL, &hshift);
> > +           if (hshift)
> > +                   shift = hshift;
> > +           else
> > +                   shift = PAGE_SHIFT;
> 
> Looks better, thanks!
> 
> Also in future we might be able to futher optimise this as I don't think the
> shift needs to match the actual page size as we are directly issuing ATSDs to
> the GPU. ie. we could bump shift to cover the whole of (start, end) or to
> invalidate larger chunks at a time - I don't think we need to limit it to
> PAGE_SHIFT.
> 
> > +           mmio_invalidate(npu_context, address > 0, address, flush,
> > +                           shift);
> 
> If address == 0 we end up invalidating the entire PID rather than just the 
> page
> at address 0. Probably not a big issue though as I'm guessing we wouldn't
> actually see invalidations for the page@0 very often?
> 
> > +           local_irq_enable();
> > +           address += (1ull << shift);
> > +   } while (address < end);
> >  }
> >  
> >  static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> > @@ -604,7 +627,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
> >      * There should be no more translation requests for this PID, but we
> >      * need to ensure any entries for it are removed from the TLB.
> >      */
> > -   mmio_invalidate(npu_context, 0, 0, true);
> > +   pnv_npu2_invalidate_helper(npu_context, mm, 0, PAGE_SIZE, true);
> >  }
> >  
> >  static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
> > @@ -614,7 +637,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier 
> > *mn,
> >  {
> >     struct npu_context *npu_context = mn_to_npu_context(mn);
> >  
> > -   mmio_invalidate(npu_context, 1, address, true);
> > +   pnv_npu2_invalidate_helper(npu_context, mm, address, address, true);
> >  }
> >  
> >  static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
> > @@ -622,13 +645,11 @@ static void pnv_npu2_mn_invalidate_range(struct 
> > mmu_notifier *mn,
> >                                     unsigned long start, unsigned long end)
> >  {
> >     struct npu_context *npu_context = mn_to_npu_context(mn);
> > -   unsigned long address;
> >  
> > -   for (address = start; address < end; address += PAGE_SIZE)
> > -           mmio_invalidate(npu_context, 1, address, false);
> > +   pnv_npu2_invalidate_helper(npu_context, mm, start, end, false);
> >  
> >     /* Do the flush only on the final addess == end */
> > -   mmio_invalidate(npu_context, 1, address, true);
> > +   pnv_npu2_invalidate_helper(npu_context, mm, end, end, true);
> >  }
> >  
> >  static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
> > 
> 
> 


Reply via email to