As far as I can tell, the whole series is a small cleanup and big 
refactor to enable CPU clearing of PTs without a lot of ugliness or code 
duplication. It looks good to me. I haven't reviewed all the moved SDMA 
update code to make sure it all works correctly, but at least the 
prepare and commit functions look sane to me.

For this patch I have a suggestion (inline) to remove params->ib, which 
seems redundant with params->job. That would also ripple through the 
remaining patches.

Other than that, the series is Reviewed-by: Felix Kuehling 
<felix.kuehl...@amd.com>

[+Kent], Look out for this patch series in an upcoming merge to 
amd-kfd-staging. I don't think it'll cause conflicts, but has a risk of 
regressions (like all big amdgpu_vm changes IME).

Regards,
   Felix

On 3/19/2019 8:44 AM, Christian König wrote:
> Separate out all functions for SDMA and CPU based page table
> updates into separate backends.
>
> This way we can keep most of the complexity of those from the
> core VM code.
>
> Signed-off-by: Christian König <christian.koe...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile         |   3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      |   7 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      |  30 ++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  | 116 +++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 248 ++++++++++++++++++++
>   5 files changed, 401 insertions(+), 3 deletions(-)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
[snip]
> +/**
> + * amdgpu_vm_sdma_prepare - prepare SDMA command submission
> + *
> + * @p: see amdgpu_vm_update_params definition
> + * @owner: owner we need to sync to
> + * @exclusive: exclusive move fence we need to sync to
> + *
> + * Returns:
> + * Negativ errno, 0 for success.
> + */
> +static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
> +                               void *owner, struct dma_fence *exclusive)
> +{
> +     struct amdgpu_bo *root = p->vm->root.base.bo;
> +     unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
> +     int r;
> +
> +     r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job);
> +     if (r)
> +             return r;
> +
> +     r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false);
> +     if (r)
> +             return r;
> +
> +     r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv,
> +                          owner, false);
> +     if (r)
> +             return r;
> +
> +     p->num_dw_left = ndw;
> +     p->ib = &p->job->ibs[0];

With p->job added, do we still need p->ib? We could just use 
&p->job->ibs[0] directly, which should perform the same or be more 
efficient since it's just a constant offset from p->job.


> +     return 0;
> +}
> +
> +/**
> + * amdgpu_vm_sdma_commit - commit SDMA command submission
> + *
> + * @p: see amdgpu_vm_update_params definition
> + * @fence: resulting fence
> + *
> + * Returns:
> + * Negativ errno, 0 for success.
> + */
> +static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
> +                              struct dma_fence **fence)
> +{
> +     struct amdgpu_bo *root = p->vm->root.base.bo;
> +     struct amdgpu_ring *ring;
> +     struct dma_fence *f;
> +     int r;
> +
> +     ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched);
> +
> +     WARN_ON(p->ib->length_dw == 0);
> +     amdgpu_ring_pad_ib(ring, p->ib);
> +     WARN_ON(p->ib->length_dw > p->num_dw_left);
> +     r = amdgpu_job_submit(p->job, &p->vm->entity,
> +                           AMDGPU_FENCE_OWNER_VM, &f);
> +     if (r)
> +             goto error;
> +
> +     amdgpu_bo_fence(root, f, true);
> +     if (fence)
> +             swap(*fence, f);
> +     dma_fence_put(f);
> +     return 0;
> +
> +error:
> +     amdgpu_job_free(p->job);
> +     return r;
> +}
> +
> +
> +/**
> + * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping
> + *
> + * @p: see amdgpu_vm_update_params definition
> + * @bo: PD/PT to update
> + * @pe: addr of the page entry
> + * @count: number of page entries to copy
> + *
> + * Traces the parameters and calls the DMA function to copy the PTEs.
> + */
> +static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p,
> +                                  struct amdgpu_bo *bo, uint64_t pe,
> +                                  unsigned count)
> +{
> +     uint64_t src = p->ib->gpu_addr;
> +
> +     src += p->num_dw_left * 4;
> +
> +     pe += amdgpu_bo_gpu_offset(bo);
> +     trace_amdgpu_vm_copy_ptes(pe, src, count);
> +
> +     amdgpu_vm_copy_pte(p->adev, p->ib, pe, src, count);
> +}
> +
> +/**
> + * amdgpu_vm_sdma_set_ptes - helper to call the right asic function
> + *
> + * @p: see amdgpu_vm_update_params definition
> + * @bo: PD/PT to update
> + * @pe: addr of the page entry
> + * @addr: dst addr to write into pe
> + * @count: number of page entries to update
> + * @incr: increase next addr by incr bytes
> + * @flags: hw access flags
> + *
> + * Traces the parameters and calls the right asic functions
> + * to setup the page table using the DMA.
> + */
> +static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p,
> +                                 struct amdgpu_bo *bo, uint64_t pe,
> +                                 uint64_t addr, unsigned count,
> +                                 uint32_t incr, uint64_t flags)
> +{
> +     pe += amdgpu_bo_gpu_offset(bo);
> +     trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
> +     if (count < 3) {
> +             amdgpu_vm_write_pte(p->adev, p->ib, pe, addr | flags,
> +                                 count, incr);
> +     } else {
> +             amdgpu_vm_set_pte_pde(p->adev, p->ib, pe, addr,
> +                                   count, incr, flags);
> +     }
> +}
> +
> +/**
> + * amdgpu_vm_sdma_update - execute VM update
> + *
> + * @p: see amdgpu_vm_update_params definition
> + * @bo: PD/PT to update
> + * @pe: addr of the page entry
> + * @addr: dst addr to write into pe
> + * @count: number of page entries to update
> + * @incr: increase next addr by incr bytes
> + * @flags: hw access flags
> + *
> + * Reserve space in the IB, setup mapping buffer on demand and write 
> commands to
> + * the IB.
> + */
> +static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
> +                              struct amdgpu_bo *bo, uint64_t pe,
> +                              uint64_t addr, unsigned count, uint32_t incr,
> +                              uint64_t flags)
> +{
> +     unsigned int i, ndw, nptes;
> +     uint64_t *pte;
> +     int r;
> +
> +     do {
> +             ndw = p->num_dw_left;
> +             ndw -= p->ib->length_dw;
> +
> +             if (ndw < 32) {
> +                     r = amdgpu_vm_sdma_commit(p, NULL);
> +                     if (r)
> +                             return r;
> +
> +                     /* estimate how many dw we need */
> +                     ndw = 32;
> +                     if (p->pages_addr)
> +                             ndw += count * 2;
> +                     ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
> +                     ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
> +
> +                     r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job);
> +                     if (r)
> +                             return r;
> +
> +                     p->num_dw_left = ndw;
> +                     p->ib = &p->job->ibs[0];
> +             }
> +
> +             if (!p->pages_addr) {
> +                     /* set page commands needed */
> +                     if (bo->shadow)
> +                             amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr,
> +                                                     count, incr, flags);
> +                     amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count,
> +                                             incr, flags);
> +                     return 0;
> +             }
> +
> +             /* copy commands needed */
> +             ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw *
> +                     (bo->shadow ? 2 : 1);
> +
> +             /* for padding */
> +             ndw -= 7;
> +
> +             nptes = min(count, ndw / 2);
> +
> +             /* Put the PTEs at the end of the IB. */
> +             p->num_dw_left -= nptes * 2;
> +             pte = (uint64_t *)&(p->ib->ptr[p->num_dw_left]);
> +             for (i = 0; i < nptes; ++i, addr += incr) {
> +                     pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr);
> +                     pte[i] |= flags;
> +             }
> +
> +             if (bo->shadow)
> +                     amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes);
> +             amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes);
> +
> +             pe += nptes * 8;
> +             count -= nptes;
> +     } while (count);
> +
> +     return 0;
> +}
> +
> +const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = {
> +     .prepare = amdgpu_vm_sdma_prepare,
> +     .update = amdgpu_vm_sdma_update,
> +     .commit = amdgpu_vm_sdma_commit
> +};
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to