Regards,
David Zhou
> + else
> + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> + AMDGPU_GEM_CREATE_SHADOW);
> +
> /* walk over the address space and allocate the page tables */
> for (pt_idx = from; pt_idx <= to; ++pt_idx) {
> struct reservation_object *resv = vm->root.bo->tbo.resv;
> @@ -310,10 +319,7 @@ static int amdgpu_vm_alloc_levels(struct
amdgpu_device *adev,
> amdgpu_vm_bo_size(adev, level),
> AMDGPU_GPU_PAGE_SIZE, true,
> AMDGPU_GEM_DOMAIN_VRAM,
> - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> - AMDGPU_GEM_CREATE_SHADOW |
> - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> - AMDGPU_GEM_CREATE_VRAM_CLEARED,
> + flags,
> NULL, resv, &pt);
> if (r)
> return r;
> @@ -952,6 +958,43 @@ static uint64_t amdgpu_vm_map_gart(const
dma_addr_t *pages_addr, uint64_t addr)
> return result;
> }
>
> +/**
> + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
> + *
> + * @params: see amdgpu_pte_update_params definition
> + * @pe: kmap addr of the page entry
> + * @addr: dst addr to write into pe
> + * @count: number of page entries to update
> + * @incr: increase next addr by incr bytes
> + * @flags: hw access flags
> + */
> +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params
*params,
> + uint64_t pe, uint64_t addr,
> + unsigned count, uint32_t incr,
> + uint64_t flags)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < count; i++) {
> + amdgpu_gart_set_pte_pde(params->adev, (void *)pe,
> + i, addr, flags);
> + addr += incr;
> + }
> +
> + mb();
> + amdgpu_gart_flush_gpu_tlb(params->adev, 0);
> +}
> +
> +static void amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct
amdgpu_bo *bo)
> +{
> + struct amdgpu_sync sync;
> +
> + amdgpu_sync_create(&sync);
> + amdgpu_sync_resv(adev, &sync, bo->tbo.resv,
AMDGPU_FENCE_OWNER_VM);
> + amdgpu_sync_wait(&sync);
> + amdgpu_sync_free(&sync);
> +}
> +
> /*
> * amdgpu_vm_update_level - update a single level in the hierarchy
> *
> @@ -981,34 +1024,50 @@ static int amdgpu_vm_update_level(struct
amdgpu_device *adev,
>
> if (!parent->entries)
> return 0;
> - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
>
> - /* padding, etc. */
> - ndw = 64;
> + memset(¶ms, 0, sizeof(params));
> + params.adev = adev;
> + shadow = parent->bo->shadow;
>
> - /* assume the worst case */
> - ndw += parent->last_entry_used * 6;
> + WARN_ON(vm->use_cpu_for_update && shadow);
> + if (vm->use_cpu_for_update && !shadow) {
> + r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr);
> + if (r)
> + return r;
> + amdgpu_vm_bo_wait(adev, parent->bo);
> + params.func = amdgpu_vm_cpu_set_ptes;
> + } else {
> + if (shadow) {
> + r = amdgpu_ttm_bind(&shadow->tbo,
&shadow->tbo.mem);
> + if (r)
> + return r;
> + }
> + ring = container_of(vm->entity.sched, struct amdgpu_ring,
> + sched);
>
> - pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> + /* padding, etc. */
> + ndw = 64;
>
> - shadow = parent->bo->shadow;
> - if (shadow) {
> - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
> + /* assume the worst case */
> + ndw += parent->last_entry_used * 6;
> +
> + pd_addr = amdgpu_bo_gpu_offset(parent->bo);
> +
> + if (shadow) {
> + shadow_addr = amdgpu_bo_gpu_offset(shadow);
> + ndw *= 2;
> + } else {
> + shadow_addr = 0;
> + }
> +
> + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> if (r)
> return r;
> - shadow_addr = amdgpu_bo_gpu_offset(shadow);
> - ndw *= 2;
> - } else {
> - shadow_addr = 0;
> - }
>
> - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
> - if (r)
> - return r;
> + params.ib = &job->ibs[0];
> + params.func = amdgpu_vm_do_set_ptes;
> + }
>
> - memset(¶ms, 0, sizeof(params));
> - params.adev = adev;
> - params.ib = &job->ibs[0];
>
> /* walk over the address space and update the directory */
> for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
> @@ -1043,15 +1102,15 @@ static int amdgpu_vm_update_level(struct
amdgpu_device *adev,
> amdgpu_vm_adjust_mc_addr(adev, last_pt);
>
> if (shadow)
> - amdgpu_vm_do_set_ptes(¶ms,
> - last_shadow,
> - pt_addr, count,
> - incr,
> - AMDGPU_PTE_VALID);
> -
> - amdgpu_vm_do_set_ptes(¶ms, last_pde,
> - pt_addr, count, incr,
> - AMDGPU_PTE_VALID);
> + params.func(¶ms,
> + last_shadow,
> + pt_addr, count,
> + incr,
> + AMDGPU_PTE_VALID);
> +
> + params.func(¶ms, last_pde,
> + pt_addr, count, incr,
> + AMDGPU_PTE_VALID);
> }
>
> count = 1;
> @@ -1067,14 +1126,16 @@ static int amdgpu_vm_update_level(struct
amdgpu_device *adev,
> uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev,
last_pt);
>
> if (vm->root.bo->shadow)
> - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr,
> - count, incr,
AMDGPU_PTE_VALID);
> + params.func(¶ms, last_shadow, pt_addr,
> + count, incr, AMDGPU_PTE_VALID);
>
> - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr,
> - count, incr, AMDGPU_PTE_VALID);
> + params.func(¶ms, last_pde, pt_addr,
> + count, incr, AMDGPU_PTE_VALID);
> }
>
> - if (params.ib->length_dw == 0) {
> + if (params.func == amdgpu_vm_cpu_set_ptes)
> + amdgpu_bo_kunmap(parent->bo);
> + else if (params.ib->length_dw == 0) {
> amdgpu_job_free(job);
> } else {
> amdgpu_ring_pad_ib(ring, params.ib);
> @@ -2309,6 +2370,7 @@ int amdgpu_vm_init(struct amdgpu_device
*adev, struct amdgpu_vm *vm,
> struct amdgpu_ring *ring;
> struct amd_sched_rq *rq;
> int r, i;
> + u64 flags;
>
> vm->va = RB_ROOT;
> vm->client_id =
atomic64_inc_return(&adev->vm_manager.client_counter);
> @@ -2342,12 +2404,17 @@ int amdgpu_vm_init(struct amdgpu_device
*adev, struct amdgpu_vm *vm,
> "CPU update of VM recommended only for large BAR
system\n");
> vm->last_dir_update = NULL;
>
> + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> + AMDGPU_GEM_CREATE_VRAM_CLEARED;
> + if (vm->use_cpu_for_update)
> + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> + else
> + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> + AMDGPU_GEM_CREATE_SHADOW);
> +
> r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0),
align, true,
> AMDGPU_GEM_DOMAIN_VRAM,
> - AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
> - AMDGPU_GEM_CREATE_SHADOW |
> - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
> - AMDGPU_GEM_CREATE_VRAM_CLEARED,
> + flags,
> NULL, NULL, &vm->root.bo);
> if (r)
> goto error_free_sched_entity;