Hi,
On Fri, Aug 09, 2019 at 02:04:24PM +0200, Lucas Stach wrote:
> This builds on top of the MMU contexts introduced earlier. Instead of having
> one context per GPU core, each GPU client receives its own context.
> 
> On MMUv1 this still means a single shared pagetable set is used by all
> clients, but on MMUv2 there is now a distinct set of pagetables for each
> client. As the command fetch is also translated via the MMU on MMUv2 the
> kernel command ringbuffer is mapped into each of the client pagetables.
> 
> As the MMU context switch is a bit of a heavy operation, due to the needed
> cache and TLB flushing, this patch implements a lazy way of switching the
> MMU context. The kernel does not have its own MMU context, but reuses the
> last client context for all of its operations. This has some visible impact,
> as the GPU can now only be started once a client has submitted some work and
> we got the client MMU context assigned. Also the MMU context has a different
> lifetime than the general client context, as the GPU might still execute the
> kernel command buffer in the context of a client even after the client has
> completed all GPU work and has been terminated. Only when the GPU is runtime
> suspended or switches to another clients MMU context is the old context
> freed up.
> 
> Signed-off-by: Lucas Stach <l.st...@pengutronix.de>

Reviewed-by: Guido Günther <a...@sigxcpu.org> 

> ---
> v3: Don't call etnaviv_cmdbuf_suballoc_unmap when mapping failed.
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_buffer.c     |  64 ++++++++---
>  drivers/gpu/drm/etnaviv/etnaviv_drv.c        |  38 ++++++-
>  drivers/gpu/drm/etnaviv/etnaviv_drv.h        |   6 +-
>  drivers/gpu/drm/etnaviv/etnaviv_dump.c       |   4 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem.c        |   7 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem.h        |   4 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |  11 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c        | 105 ++++++++-----------
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.h        |   4 -
>  drivers/gpu/drm/etnaviv/etnaviv_iommu.c      |  10 +-
>  drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c   |  17 ++-
>  drivers/gpu/drm/etnaviv/etnaviv_mmu.c        |  42 ++++++--
>  drivers/gpu/drm/etnaviv/etnaviv_mmu.h        |  11 +-
>  13 files changed, 208 insertions(+), 115 deletions(-)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> index 4324b098689f..876a035ee1a2 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
> @@ -118,7 +118,8 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
>       u32 *ptr = buf->vaddr + off;
>  
>       dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
> -                     ptr, etnaviv_cmdbuf_get_va(buf, &gpu->cmdbuf_mapping) +
> +                     ptr, etnaviv_cmdbuf_get_va(buf,
> +                     &gpu->mmu_context->cmdbuf_mapping) +
>                       off, size - len * 4 - off);
>  
>       print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
> @@ -152,7 +153,8 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
>       if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
>               buffer->user_size = 0;
>  
> -     return etnaviv_cmdbuf_get_va(buffer, &gpu->cmdbuf_mapping) +
> +     return etnaviv_cmdbuf_get_va(buffer,
> +                                  &gpu->mmu_context->cmdbuf_mapping) +
>              buffer->user_size;
>  }
>  
> @@ -166,7 +168,8 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
>       buffer->user_size = 0;
>  
>       CMD_WAIT(buffer);
> -     CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer, &gpu->cmdbuf_mapping)
> +     CMD_LINK(buffer, 2,
> +              etnaviv_cmdbuf_get_va(buffer, 
> &gpu->mmu_context->cmdbuf_mapping)
>                + buffer->user_size - 4);
>  
>       return buffer->user_size / 8;
> @@ -293,7 +296,8 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, 
> unsigned int event)
>  
>       /* Append waitlink */
>       CMD_WAIT(buffer);
> -     CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer, &gpu->cmdbuf_mapping)
> +     CMD_LINK(buffer, 2,
> +              etnaviv_cmdbuf_get_va(buffer, 
> &gpu->mmu_context->cmdbuf_mapping)
>                + buffer->user_size - 4);
>  
>       /*
> @@ -308,7 +312,8 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, 
> unsigned int event)
>  
>  /* Append a command buffer to the ring buffer. */
>  void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
> -     unsigned int event, struct etnaviv_cmdbuf *cmdbuf)
> +     struct etnaviv_iommu_context *mmu_context, unsigned int event,
> +     struct etnaviv_cmdbuf *cmdbuf)
>  {
>       struct etnaviv_cmdbuf *buffer = &gpu->buffer;
>       unsigned int waitlink_offset = buffer->user_size - 16;
> @@ -317,17 +322,19 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 
> exec_state,
>       bool switch_context = gpu->exec_state != exec_state;
>       unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
>       bool need_flush = gpu->flush_seq != new_flush_seq;
> +     bool switch_mmu_context = gpu->mmu_context != mmu_context;
>  
>       lockdep_assert_held(&gpu->lock);
>  
>       if (drm_debug & DRM_UT_DRIVER)
>               etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
>  
> -     link_target = etnaviv_cmdbuf_get_va(cmdbuf, &gpu->cmdbuf_mapping);
> +     link_target = etnaviv_cmdbuf_get_va(cmdbuf,
> +                                         &gpu->mmu_context->cmdbuf_mapping);
>       link_dwords = cmdbuf->size / 8;
>  
>       /*
> -      * If we need maintanence prior to submitting this buffer, we will
> +      * If we need maintenance prior to submitting this buffer, we will
>        * need to append a mmu flush load state, followed by a new
>        * link to this buffer - a total of four additional words.
>        */
> @@ -349,7 +356,24 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 
> exec_state,
>               if (switch_context)
>                       extra_dwords += 4;
>  
> +             /* PTA load command */
> +             if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
> +                     extra_dwords += 1;
> +
>               target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
> +             /*
> +              * Switch MMU context if necessary. Must be done after the
> +              * link target has been calculated, as the jump forward in the
> +              * kernel ring still uses the last active MMU context before
> +              * the switch.
> +              */
> +             if (switch_mmu_context) {
> +                     struct etnaviv_iommu_context *old_context = 
> gpu->mmu_context;
> +
> +                     etnaviv_iommu_context_get(mmu_context);
> +                     gpu->mmu_context = mmu_context;
> +                     etnaviv_iommu_context_put(old_context);
> +             }
>  
>               if (need_flush) {
>                       /* Add the MMU flush */
> @@ -361,10 +385,23 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 
> exec_state,
>                                              VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
>                                              VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
>                       } else {
> +                             u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
> +                                         
> VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
> +
> +                             if (switch_mmu_context &&
> +                                 gpu->sec_mode == ETNA_SEC_KERNEL) {
> +                                     unsigned short id =
> +                                             
> etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
> +                                     CMD_LOAD_STATE(buffer,
> +                                             VIVS_MMUv2_PTA_CONFIG,
> +                                             
> VIVS_MMUv2_PTA_CONFIG_INDEX(id));
> +                             }
> +
> +                             if (gpu->sec_mode == ETNA_SEC_NONE)
> +                                     flush |= 
> etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
> +
>                               CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
> -                                     VIVS_MMUv2_CONFIGURATION_MODE_MASK |
> -                                     VIVS_MMUv2_CONFIGURATION_ADDRESS_MASK |
> -                                     VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH);
> +                                            flush);
>                               CMD_SEM(buffer, SYNC_RECIPIENT_FE,
>                                       SYNC_RECIPIENT_PE);
>                               CMD_STALL(buffer, SYNC_RECIPIENT_FE,
> @@ -380,6 +417,8 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 
> exec_state,
>               }
>  
>               /* And the link to the submitted buffer */
> +             link_target = etnaviv_cmdbuf_get_va(cmdbuf,
> +                                     &gpu->mmu_context->cmdbuf_mapping);
>               CMD_LINK(buffer, link_dwords, link_target);
>  
>               /* Update the link target to point to above instructions */
> @@ -416,13 +455,14 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 
> exec_state,
>       CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
>                      VIVS_GL_EVENT_FROM_PE);
>       CMD_WAIT(buffer);
> -     CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer, &gpu->cmdbuf_mapping)
> +     CMD_LINK(buffer, 2,
> +              etnaviv_cmdbuf_get_va(buffer, 
> &gpu->mmu_context->cmdbuf_mapping)
>                + buffer->user_size - 4);
>  
>       if (drm_debug & DRM_UT_DRIVER)
>               pr_info("stream link to 0x%08x @ 0x%08x %p\n",
>                       return_target,
> -                     etnaviv_cmdbuf_get_va(cmdbuf, &gpu->cmdbuf_mapping),
> +                     etnaviv_cmdbuf_get_va(cmdbuf, 
> &gpu->mmu_context->cmdbuf_mapping),
>                       cmdbuf->vaddr);
>  
>       if (drm_debug & DRM_UT_DRIVER) {
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> index eb0c23fe979a..80f1edcbbea0 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> @@ -50,12 +50,19 @@ static int etnaviv_open(struct drm_device *dev, struct 
> drm_file *file)
>  {
>       struct etnaviv_drm_private *priv = dev->dev_private;
>       struct etnaviv_file_private *ctx;
> -     int i;
> +     int ret, i;
>  
>       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
>       if (!ctx)
>               return -ENOMEM;
>  
> +     ctx->mmu = etnaviv_iommu_context_init(priv->mmu_global,
> +                                           priv->cmdbuf_suballoc);
> +     if (!ctx->mmu) {
> +             ret = -ENOMEM;
> +             goto out_free;
> +     }
> +
>       for (i = 0; i < ETNA_MAX_PIPES; i++) {
>               struct etnaviv_gpu *gpu = priv->gpu[i];
>               struct drm_sched_rq *rq;
> @@ -70,6 +77,10 @@ static int etnaviv_open(struct drm_device *dev, struct 
> drm_file *file)
>       file->driver_priv = ctx;
>  
>       return 0;
> +
> +out_free:
> +     kfree(ctx);
> +     return ret;
>  }
>  
>  static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
> @@ -85,6 +96,8 @@ static void etnaviv_postclose(struct drm_device *dev, 
> struct drm_file *file)
>                       drm_sched_entity_destroy(&ctx->sched_entity[i]);
>       }
>  
> +     etnaviv_iommu_context_put(ctx->mmu);
> +
>       kfree(ctx);
>  }
>  
> @@ -116,12 +129,29 @@ static int etnaviv_mm_show(struct drm_device *dev, 
> struct seq_file *m)
>  static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m)
>  {
>       struct drm_printer p = drm_seq_file_printer(m);
> +     struct etnaviv_iommu_context *mmu_context;
>  
>       seq_printf(m, "Active Objects (%s):\n", dev_name(gpu->dev));
>  
> -     mutex_lock(&gpu->mmu_context->lock);
> -     drm_mm_print(&gpu->mmu_context->mm, &p);
> -     mutex_unlock(&gpu->mmu_context->lock);
> +     /*
> +      * Lock the GPU to avoid a MMU context switch just now and elevate
> +      * the refcount of the current context to avoid it disappearing from
> +      * under our feet.
> +      */
> +     mutex_lock(&gpu->lock);
> +     mmu_context = gpu->mmu_context;
> +     if (mmu_context)
> +             etnaviv_iommu_context_get(mmu_context);
> +     mutex_unlock(&gpu->lock);
> +
> +     if (!mmu_context)
> +             return 0;
> +
> +     mutex_lock(&mmu_context->lock);
> +     drm_mm_print(&mmu_context->mm, &p);
> +     mutex_unlock(&mmu_context->lock);
> +
> +     etnaviv_iommu_context_put(mmu_context);
>  
>       return 0;
>  }
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h 
> b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> index 5f8db08f1c17..a488cfdb6bbf 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
> @@ -25,10 +25,7 @@ struct etnaviv_gem_submit;
>  struct etnaviv_iommu_global;
>  
>  struct etnaviv_file_private {
> -     /*
> -      * When per-context address spaces are supported we'd keep track of
> -      * the context's page-tables here.
> -      */
> +     struct etnaviv_iommu_context    *mmu;
>       struct drm_sched_entity         sched_entity[ETNA_MAX_PIPES];
>  };
>  
> @@ -75,6 +72,7 @@ u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, 
> unsigned short id);
>  void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
>  void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
>  void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
> +     struct etnaviv_iommu_context *mmu,
>       unsigned int event, struct etnaviv_cmdbuf *cmdbuf);
>  void etnaviv_validate_init(void);
>  bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu,
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> index 7e6791517693..698db540972c 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
> @@ -173,12 +173,12 @@ void etnaviv_core_dump(struct etnaviv_gem_submit 
> *submit)
>       etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr,
>                             gpu->buffer.size,
>                             etnaviv_cmdbuf_get_va(&gpu->buffer,
> -                                                 &gpu->cmdbuf_mapping));
> +                                     &gpu->mmu_context->cmdbuf_mapping));
>  
>       etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
>                             submit->cmdbuf.vaddr, submit->cmdbuf.size,
>                             etnaviv_cmdbuf_get_va(&submit->cmdbuf,
> -                                                 &gpu->cmdbuf_mapping));
> +                                     &gpu->mmu_context->cmdbuf_mapping));
>  
>       /* Reserve space for the bomap */
>       if (n_bomap_pages) {
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> index 04c8170f76cd..e79f6ef3659a 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> @@ -248,8 +248,7 @@ void etnaviv_gem_mapping_unreference(struct 
> etnaviv_vram_mapping *mapping)
>  }
>  
>  struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
> -     struct drm_gem_object *obj, struct etnaviv_gpu *gpu,
> -     struct etnaviv_iommu_context *mmu_context)
> +     struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context)
>  {
>       struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
>       struct etnaviv_vram_mapping *mapping;
> @@ -308,8 +307,8 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
>       mapping->context = mmu_context;
>       mapping->use = 1;
>  
> -     ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj, gpu->memory_base,
> -                                 mapping);
> +     ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj,
> +                                 mmu_context->global->memory_base, mapping);
>       if (ret < 0)
>               kfree(mapping);
>       else
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h 
> b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> index f342560b5938..1e11659a8842 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> @@ -93,6 +93,7 @@ struct etnaviv_gem_submit {
>       struct kref refcount;
>       struct etnaviv_file_private *ctx;
>       struct etnaviv_gpu *gpu;
> +     struct etnaviv_iommu_context *mmu_context, *prev_mmu_context;
>       struct dma_fence *out_fence, *in_fence;
>       int out_fence_id;
>       struct list_head node; /* GPU active submit list */
> @@ -119,8 +120,7 @@ struct page **etnaviv_gem_get_pages(struct 
> etnaviv_gem_object *obj);
>  void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
>  
>  struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
> -     struct drm_gem_object *obj, struct etnaviv_gpu *gpu,
> -     struct etnaviv_iommu_context *mmu_context);
> +     struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context);
>  void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping);
>  
>  #endif /* __ETNAVIV_GEM_H__ */
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index 7929d258daf8..16e7d371a7ef 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -224,8 +224,7 @@ static int submit_pin_objects(struct etnaviv_gem_submit 
> *submit)
>               struct etnaviv_vram_mapping *mapping;
>  
>               mapping = etnaviv_gem_mapping_get(&etnaviv_obj->base,
> -                                               submit->gpu,
> -                                               submit->gpu->mmu_context);
> +                                               submit->mmu_context);
>               if (IS_ERR(mapping)) {
>                       ret = PTR_ERR(mapping);
>                       break;
> @@ -362,6 +361,12 @@ static void submit_cleanup(struct kref *kref)
>       if (submit->cmdbuf.suballoc)
>               etnaviv_cmdbuf_free(&submit->cmdbuf);
>  
> +     if (submit->mmu_context)
> +             etnaviv_iommu_context_put(submit->mmu_context);
> +
> +     if (submit->prev_mmu_context)
> +             etnaviv_iommu_context_put(submit->prev_mmu_context);
> +
>       for (i = 0; i < submit->nr_bos; i++) {
>               struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
>  
> @@ -503,6 +508,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void 
> *data,
>               goto err_submit_objects;
>  
>       submit->ctx = file->driver_priv;
> +     etnaviv_iommu_context_get(submit->ctx->mmu);
> +     submit->mmu_context = submit->ctx->mmu;
>       submit->exec_state = args->exec_state;
>       submit->flags = args->flags;
>  
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> index 3af72a4127b0..d8a83ebfce47 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> @@ -602,7 +602,8 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 
> address, u16 prefetch)
>  
>  static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu)
>  {
> -     u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer, &gpu->cmdbuf_mapping);
> +     u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer,
> +                             &gpu->mmu_context->cmdbuf_mapping);
>       u16 prefetch;
>  
>       /* setup the MMU */
> @@ -693,8 +694,6 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
>       etnaviv_gpu_setup_pulse_eater(gpu);
>  
>       gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
> -
> -     etnaviv_gpu_start_fe_idleloop(gpu);
>  }
>  
>  int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
> @@ -724,28 +723,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>               goto fail;
>       }
>  
> -     /*
> -      * Set the GPU linear window to be at the end of the DMA window, where
> -      * the CMA area is likely to reside. This ensures that we are able to
> -      * map the command buffers while having the linear window overlap as
> -      * much RAM as possible, so we can optimize mappings for other buffers.
> -      *
> -      * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
> -      * to different views of the memory on the individual engines.
> -      */
> -     if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
> -         (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
> -             u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
> -             if (dma_mask < PHYS_OFFSET + SZ_2G)
> -                     gpu->memory_base = PHYS_OFFSET;
> -             else
> -                     gpu->memory_base = dma_mask - SZ_2G + 1;
> -     } else if (PHYS_OFFSET >= SZ_2G) {
> -             dev_info(gpu->dev, "Need to move linear window on MC1.0, 
> disabling TS\n");
> -             gpu->memory_base = PHYS_OFFSET;
> -             gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
> -     }
> -
>       /*
>        * On cores with security features supported, we claim control over the
>        * security states.
> @@ -764,20 +741,26 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>       if (ret)
>               goto fail;
>  
> -     gpu->mmu_context = etnaviv_iommu_context_init(priv->mmu_global);
> -     if (IS_ERR(gpu->mmu_context)) {
> -             dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
> -             ret = PTR_ERR(gpu->mmu_context);
> -             goto iommu_global_fini;
> -     }
> -
> -     ret = etnaviv_cmdbuf_suballoc_map(priv->cmdbuf_suballoc,
> -                                       gpu->mmu_context,
> -                                       &gpu->cmdbuf_mapping,
> -                                       gpu->memory_base);
> -     if (ret) {
> -             dev_err(gpu->dev, "failed to map cmdbuf suballoc\n");
> -             goto destroy_iommu;
> +     /*
> +      * Set the GPU linear window to be at the end of the DMA window, where
> +      * the CMA area is likely to reside. This ensures that we are able to
> +      * map the command buffers while having the linear window overlap as
> +      * much RAM as possible, so we can optimize mappings for other buffers.
> +      *
> +      * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
> +      * to different views of the memory on the individual engines.
> +      */
> +     if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
> +         (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
> +             u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
> +             if (dma_mask < PHYS_OFFSET + SZ_2G)
> +                     priv->mmu_global->memory_base = PHYS_OFFSET;
> +             else
> +                     priv->mmu_global->memory_base = dma_mask - SZ_2G + 1;
> +     } else if (PHYS_OFFSET >= SZ_2G) {
> +             dev_info(gpu->dev, "Need to move linear window on MC1.0, 
> disabling TS\n");
> +             priv->mmu_global->memory_base = PHYS_OFFSET;
> +             gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
>       }
>  
>       /* Create buffer: */
> @@ -785,15 +768,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>                                 PAGE_SIZE);
>       if (ret) {
>               dev_err(gpu->dev, "could not create command buffer\n");
> -             goto unmap_suballoc;
> -     }
> -
> -     if (!(gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION) &&
> -         etnaviv_cmdbuf_get_va(&gpu->buffer, &gpu->cmdbuf_mapping) > 
> 0x80000000) {
> -             ret = -EINVAL;
> -             dev_err(gpu->dev,
> -                     "command buffer outside valid memory window\n");
> -             goto free_buffer;
> +             goto fail;
>       }
>  
>       /* Setup event management */
> @@ -816,14 +791,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>  
>       return 0;
>  
> -free_buffer:
> -     etnaviv_cmdbuf_free(&gpu->buffer);
> -unmap_suballoc:
> -     etnaviv_cmdbuf_suballoc_unmap(gpu->mmu_context, &gpu->cmdbuf_mapping);
> -destroy_iommu:
> -     etnaviv_iommu_context_put(gpu->mmu_context);
> -iommu_global_fini:
> -     etnaviv_iommu_global_fini(gpu);
>  fail:
>       pm_runtime_mark_last_busy(gpu->dev);
>       pm_runtime_put_autosuspend(gpu->dev);
> @@ -1017,6 +984,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
>  
>       etnaviv_gpu_hw_init(gpu);
>       gpu->exec_state = -1;
> +     gpu->mmu_context = NULL;
>  
>       mutex_unlock(&gpu->lock);
>       pm_runtime_mark_last_busy(gpu->dev);
> @@ -1323,6 +1291,15 @@ struct dma_fence *etnaviv_gpu_submit(struct 
> etnaviv_gem_submit *submit)
>               goto out_unlock;
>       }
>  
> +     if (!gpu->mmu_context) {
> +             etnaviv_iommu_context_get(submit->mmu_context);
> +             gpu->mmu_context = submit->mmu_context;
> +             etnaviv_gpu_start_fe_idleloop(gpu);
> +     } else {
> +             etnaviv_iommu_context_get(gpu->mmu_context);
> +             submit->prev_mmu_context = gpu->mmu_context;
> +     }
> +
>       if (submit->nr_pmrs) {
>               gpu->event[event[1]].sync_point = 
> &sync_point_perfmon_sample_pre;
>               kref_get(&submit->refcount);
> @@ -1332,8 +1309,8 @@ struct dma_fence *etnaviv_gpu_submit(struct 
> etnaviv_gem_submit *submit)
>  
>       gpu->event[event[0]].fence = gpu_fence;
>       submit->cmdbuf.user_size = submit->cmdbuf.size - 8;
> -     etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
> -                          &submit->cmdbuf);
> +     etnaviv_buffer_queue(gpu, submit->exec_state, submit->mmu_context,
> +                          event[0], &submit->cmdbuf);
>  
>       if (submit->nr_pmrs) {
>               gpu->event[event[2]].sync_point = 
> &sync_point_perfmon_sample_post;
> @@ -1535,7 +1512,7 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, 
> unsigned int timeout_ms)
>  
>  static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
>  {
> -     if (gpu->initialized) {
> +     if (gpu->initialized && gpu->mmu_context) {
>               /* Replace the last WAIT with END */
>               mutex_lock(&gpu->lock);
>               etnaviv_buffer_end(gpu);
> @@ -1547,8 +1524,13 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu 
> *gpu)
>                * we fail, just warn and continue.
>                */
>               etnaviv_gpu_wait_idle(gpu, 100);
> +
> +             etnaviv_iommu_context_put(gpu->mmu_context);
> +             gpu->mmu_context = NULL;
>       }
>  
> +     gpu->exec_state = -1;
> +
>       return etnaviv_gpu_clk_disable(gpu);
>  }
>  
> @@ -1564,8 +1546,6 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu 
> *gpu)
>       etnaviv_gpu_update_clock(gpu);
>       etnaviv_gpu_hw_init(gpu);
>  
> -     gpu->exec_state = -1;
> -
>       mutex_unlock(&gpu->lock);
>  
>       return 0;
> @@ -1696,9 +1676,6 @@ static void etnaviv_gpu_unbind(struct device *dev, 
> struct device *master,
>  
>       if (gpu->initialized) {
>               etnaviv_cmdbuf_free(&gpu->buffer);
> -             etnaviv_cmdbuf_suballoc_unmap(gpu->mmu_context,
> -                                           &gpu->cmdbuf_mapping);
> -             etnaviv_iommu_context_put(gpu->mmu_context);
>               etnaviv_iommu_global_fini(gpu);
>               gpu->initialized = false;
>       }
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h 
> b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> index c0bd6018d53b..8f9bd4edc96a 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
> @@ -103,13 +103,9 @@ struct etnaviv_gpu {
>       bool initialized;
>  
>       /* 'ring'-buffer: */
> -     struct etnaviv_vram_mapping cmdbuf_mapping;
>       struct etnaviv_cmdbuf buffer;
>       int exec_state;
>  
> -     /* bus base address of memory  */
> -     u32 memory_base;
> -
>       /* event management: */
>       DECLARE_BITMAP(event_bitmap, ETNA_NR_EVENTS);
>       struct etnaviv_event event[ETNA_NR_EVENTS];
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
> index a2f1ff151822..aac8dbf3ea56 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
> @@ -93,11 +93,11 @@ static void etnaviv_iommuv1_restore(struct etnaviv_gpu 
> *gpu,
>       u32 pgtable;
>  
>       /* set base addresses */
> -     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, gpu->memory_base);
> -     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, gpu->memory_base);
> -     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, gpu->memory_base);
> -     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, gpu->memory_base);
> -     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, gpu->memory_base);
> +     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, 
> context->global->memory_base);
> +     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, 
> context->global->memory_base);
> +     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, 
> context->global->memory_base);
> +     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, 
> context->global->memory_base);
> +     gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, 
> context->global->memory_base);
>  
>       /* set page table address in MC */
>       pgtable = (u32)v1_context->pgtable_dma;
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> index 5ca2077c148d..043111a1d60c 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
> @@ -206,7 +206,7 @@ static void etnaviv_iommuv2_restore_sec(struct 
> etnaviv_gpu *gpu,
>                 VIVS_MMUv2_SAFE_ADDRESS_CONFIG_SEC_SAFE_ADDR_HIGH(
>                 upper_32_bits(context->global->bad_page_dma)));
>  
> -     context->global->v2.pta_cpu[0] = v2_context->mtlb_dma |
> +     context->global->v2.pta_cpu[v2_context->id] = v2_context->mtlb_dma |
>                                        VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K;
>  
>       /* trigger a PTA load through the FE */
> @@ -218,6 +218,19 @@ static void etnaviv_iommuv2_restore_sec(struct 
> etnaviv_gpu *gpu,
>       gpu_write(gpu, VIVS_MMUv2_SEC_CONTROL, VIVS_MMUv2_SEC_CONTROL_ENABLE);
>  }
>  
> +u32 etnaviv_iommuv2_get_mtlb_addr(struct etnaviv_iommu_context *context)
> +{
> +     struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
> +
> +     return v2_context->mtlb_dma;
> +}
> +
> +unsigned short etnaviv_iommuv2_get_pta_id(struct etnaviv_iommu_context 
> *context)
> +{
> +     struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
> +
> +     return v2_context->id;
> +}
>  static void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu,
>                                   struct etnaviv_iommu_context *context)
>  {
> @@ -272,6 +285,8 @@ etnaviv_iommuv2_context_alloc(struct etnaviv_iommu_global 
> *global)
>       memset32(v2_context->mtlb_cpu, MMUv2_PTE_EXCEPTION,
>                MMUv2_MAX_STLB_ENTRIES);
>  
> +     global->v2.pta_cpu[v2_context->id] = v2_context->mtlb_dma;
> +
>       context = &v2_context->base;
>       context->global = global;
>       kref_init(&context->refcount);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
> index 2f64eef773ed..82822e30bf30 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
> @@ -290,6 +290,8 @@ static void etnaviv_iommu_context_free(struct kref *kref)
>       struct etnaviv_iommu_context *context =
>               container_of(kref, struct etnaviv_iommu_context, refcount);
>  
> +     etnaviv_cmdbuf_suballoc_unmap(context, &context->cmdbuf_mapping);
> +
>       context->global->ops->free(context);
>  }
>  void etnaviv_iommu_context_put(struct etnaviv_iommu_context *context)
> @@ -298,12 +300,28 @@ void etnaviv_iommu_context_put(struct 
> etnaviv_iommu_context *context)
>  }
>  
>  struct etnaviv_iommu_context *
> -etnaviv_iommu_context_init(struct etnaviv_iommu_global *global)
> +etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
> +                        struct etnaviv_cmdbuf_suballoc *suballoc)
>  {
> +     struct etnaviv_iommu_context *ctx;
> +     int ret;
> +
>       if (global->version == ETNAVIV_IOMMU_V1)
> -             return etnaviv_iommuv1_context_alloc(global);
> +             ctx = etnaviv_iommuv1_context_alloc(global);
>       else
> -             return etnaviv_iommuv2_context_alloc(global);
> +             ctx = etnaviv_iommuv2_context_alloc(global);
> +
> +     if (!ctx)
> +             return NULL;
> +
> +     ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping,
> +                                       global->memory_base);
> +     if (ret) {
> +             global->ops->free(ctx);
> +             return NULL;
> +     }
> +
> +     return ctx;
>  }
>  
>  void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
> @@ -319,6 +337,12 @@ int etnaviv_iommu_get_suballoc_va(struct 
> etnaviv_iommu_context *context,
>  {
>       mutex_lock(&context->lock);
>  
> +     if (mapping->use > 0) {
> +             mapping->use++;
> +             mutex_unlock(&context->lock);
> +             return 0;
> +     }
> +
>       /*
>        * For MMUv1 we don't add the suballoc region to the pagetables, as
>        * those GPUs can only work with cmdbufs accessed through the linear
> @@ -340,7 +364,6 @@ int etnaviv_iommu_get_suballoc_va(struct 
> etnaviv_iommu_context *context,
>               mapping->iova = node->start;
>               ret = etnaviv_context_map(context, node->start, paddr, size,
>                                         ETNAVIV_PROT_READ);
> -
>               if (ret < 0) {
>                       drm_mm_remove_node(node);
>                       mutex_unlock(&context->lock);
> @@ -363,15 +386,14 @@ void etnaviv_iommu_put_suballoc_va(struct 
> etnaviv_iommu_context *context,
>  {
>       struct drm_mm_node *node = &mapping->vram_node;
>  
> -     if (!mapping->use)
> -             return;
> -
> -     mapping->use = 0;
> +     mutex_lock(&context->lock);
> +     mapping->use--;
>  
> -     if (context->global->version == ETNAVIV_IOMMU_V1)
> +     if (mapping->use > 0 || context->global->version == ETNAVIV_IOMMU_V1) {
> +             mutex_unlock(&context->lock);
>               return;
> +     }
>  
> -     mutex_lock(&context->lock);
>       etnaviv_context_unmap(context, node->start, node->size);
>       drm_mm_remove_node(node);
>       mutex_unlock(&context->lock);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h 
> b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
> index 4438d66db6ab..c01491a6c4d8 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
> @@ -47,6 +47,8 @@ struct etnaviv_iommu_global {
>       void *bad_page_cpu;
>       dma_addr_t bad_page_dma;
>  
> +     u32 memory_base;
> +
>       /*
>        * This union holds members needed by either MMUv1 or MMUv2, which
>        * can not exist at the same time.
> @@ -74,6 +76,9 @@ struct etnaviv_iommu_context {
>       struct list_head mappings;
>       struct drm_mm mm;
>       unsigned int flush_seq;
> +
> +     /* Not part of the context, but needs to have the same lifetime */
> +     struct etnaviv_vram_mapping cmdbuf_mapping;
>  };
>  
>  int etnaviv_iommu_global_init(struct etnaviv_gpu *gpu);
> @@ -98,7 +103,8 @@ size_t etnaviv_iommu_dump_size(struct 
> etnaviv_iommu_context *ctx);
>  void etnaviv_iommu_dump(struct etnaviv_iommu_context *ctx, void *buf);
>  
>  struct etnaviv_iommu_context *
> -etnaviv_iommu_context_init(struct etnaviv_iommu_global *global);
> +etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
> +                        struct etnaviv_cmdbuf_suballoc *suballoc);
>  static inline void etnaviv_iommu_context_get(struct etnaviv_iommu_context 
> *ctx)
>  {
>       kref_get(&ctx->refcount);
> @@ -112,4 +118,7 @@ etnaviv_iommuv1_context_alloc(struct etnaviv_iommu_global 
> *global);
>  struct etnaviv_iommu_context *
>  etnaviv_iommuv2_context_alloc(struct etnaviv_iommu_global *global);
>  
> +u32 etnaviv_iommuv2_get_mtlb_addr(struct etnaviv_iommu_context *context);
> +unsigned short etnaviv_iommuv2_get_pta_id(struct etnaviv_iommu_context 
> *context);
> +
>  #endif /* __ETNAVIV_MMU_H__ */
> -- 
> 2.20.1
> 
> _______________________________________________
> etnaviv mailing list
> etna...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/etnaviv
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to