On Mon, 27 Mar 2017 16:10:25 -0700
Eric Anholt wrote:
> Until now, we've had to limit Raspberry Pi to 256MB of CMA memory to
> keep from triggering the hardware addressing bug between of the tile
> binner of the tile alloc memory (where the top 4 bits come from the
> tile state data array's address).
>
> To work around that and allow more memory to be reserved for graphics,
> allocate a single BO to store tile state data arrays and tile
> alloc/overflow memory while the GPU is active, and make sure that that
> one BO doesn't happen to cross a 256MB boundary. With that in place,
> we can allocate textures and shaders anywhere in system memory (still
> contiguous, of course).
It's hard to review something I don't quite understand, but I didn't
spot any problem and the code seems to follow what the commit message
says: make sure the memory used by the tile binner (still have to look
at what a tile binner is :-)) does not cross a 256MB.
So, not sure my review has a real value here, but
Reviewed-by: Boris Brezillon
>
> Signed-off-by: Eric Anholt
> ---
> drivers/gpu/drm/vc4/vc4_drv.h | 28 +--
> drivers/gpu/drm/vc4/vc4_gem.c | 12 ++-
> drivers/gpu/drm/vc4/vc4_irq.c | 61 +++
> drivers/gpu/drm/vc4/vc4_render_cl.c | 3 +-
> drivers/gpu/drm/vc4/vc4_v3d.c | 150
>
> drivers/gpu/drm/vc4/vc4_validate.c | 54 ++---
> 6 files changed, 234 insertions(+), 74 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
> index dffce6293d87..5d9532163cbe 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.h
> +++ b/drivers/gpu/drm/vc4/vc4_drv.h
> @@ -95,12 +95,23 @@ struct vc4_dev {
>*/
> struct list_head seqno_cb_list;
>
> - /* The binner overflow memory that's currently set up in
> - * BPOA/BPOS registers. When overflow occurs and a new one is
> - * allocated, the previous one will be moved to
> - * vc4->current_exec's free list.
> + /* The memory used for storing binner tile alloc, tile state,
> + * and overflow memory allocations. This is freed when V3D
> + * powers down.
>*/
> - struct vc4_bo *overflow_mem;
> + struct vc4_bo *bin_bo;
> +
> + /* Size of blocks allocated within bin_bo. */
> + uint32_t bin_alloc_size;
> +
> + /* Bitmask of the bin_alloc_size chunks in bin_bo that are
> + * used.
> + */
> + uint32_t bin_alloc_used;
> +
> + /* Bitmask of the current bin_alloc used for overflow memory. */
> + uint32_t bin_alloc_overflow;
> +
> struct work_struct overflow_mem_work;
>
> int power_refcount;
> @@ -293,8 +304,12 @@ struct vc4_exec_info {
> bool found_increment_semaphore_packet;
> bool found_flush;
> uint8_t bin_tiles_x, bin_tiles_y;
> - struct drm_gem_cma_object *tile_bo;
> + /* Physical address of the start of the tile alloc array
> + * (where each tile's binned CL will start)
> + */
> uint32_t tile_alloc_offset;
> + /* Bitmask of which binner slots are freed when this job completes. */
> + uint32_t bin_slots;
>
> /**
>* Computed addresses pointing into exec_bo where we start the
> @@ -522,6 +537,7 @@ void vc4_plane_async_set_fb(struct drm_plane *plane,
> extern struct platform_driver vc4_v3d_driver;
> int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
> int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
> +int vc4_v3d_get_bin_slot(struct vc4_dev *vc4);
>
> /* vc4_validate.c */
> int
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index e9c381c42139..3ecd1ba7af75 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -705,6 +705,7 @@ static void
> vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
> {
> struct vc4_dev *vc4 = to_vc4_dev(dev);
> + unsigned long irqflags;
> unsigned i;
>
> if (exec->bo) {
> @@ -720,6 +721,11 @@ vc4_complete_exec(struct drm_device *dev, struct
> vc4_exec_info *exec)
> drm_gem_object_unreference_unlocked(&bo->base.base);
> }
>
> + /* Free up the allocation of any bin slots we used. */
> + spin_lock_irqsave(&vc4->job_lock, irqflags);
> + vc4->bin_alloc_used &= ~exec->bin_slots;
> + spin_unlock_irqrestore(&vc4->job_lock, irqflags);
> +
> mutex_lock(&vc4->power_lock);
> if (--vc4->power_refcount == 0) {
> pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
> @@ -968,9 +974,9 @@ vc4_gem_destroy(struct drm_device *dev)
> /* V3D should already have disabled its interrupt and cleared
>* the overflow allocation registers. Now free the object.
>*/
> - if (vc4->overflow_mem) {
> -
> drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
> - vc4->overflow_mem = NULL;
> + if (vc4->bin_bo) {
> + drm_gem_object_put_unloc