On Fri, Aug 07, 2015 at 10:45:21AM +0100, Michel Thierry wrote:
> Gen8+ supports 48-bit virtual addresses, but some objects must always be
> allocated inside the 32-bit address range.
> 
> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
> General State Heap (GSH) or Instruction State Heap (ISH) must be in a
> 32-bit range, because the General State Offset and Instruction State Offset
> are limited to 32-bits.
> 
> The i915 driver has been modified to provide a flag to set when the 4GB
> limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
> 48-bit range will only be used when explicitly requested.
> 
> Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
> set automatically, while calls to drm_intel_bo_emit_reloc will clear it.
> 
> v2: Make set/clear functions nops on pre-gen8 platforms, and use them
>     internally in emit_reloc functions (Ben)
>     s/48BADDRESS/48B_ADDRESS/ (Dave)
> v3: Keep set/clear functions internal, no-one needs to use them directly.
> 
> References: 
> http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
> Cc: Ben Widawsky <b...@bwidawsk.net>
> Cc: Dave Gordon <david.s.gor...@intel.com>
> Signed-off-by: Michel Thierry <michel.thie...@intel.com>
> ---
>  include/drm/i915_drm.h    |  3 ++-
>  intel/intel_bufmgr.c      | 16 ++++++++++++++
>  intel/intel_bufmgr.h      |  6 +++++-
>  intel/intel_bufmgr_gem.c  | 54 
> +++++++++++++++++++++++++++++++++++++++++++----
>  intel/intel_bufmgr_priv.h | 11 ++++++++++
>  5 files changed, 84 insertions(+), 6 deletions(-)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index ded43b1..426b25c 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>  #define EXEC_OBJECT_NEEDS_GTT        (1<<1)
>  #define EXEC_OBJECT_WRITE    (1<<2)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>       __u64 flags;
>  
>       __u64 rsvd1;
> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
> index 14ea9f9..0bd5191 100644
> --- a/intel/intel_bufmgr.c
> +++ b/intel/intel_bufmgr.c
> @@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t 
> offset,
>                       drm_intel_bo *target_bo, uint32_t target_offset,
>                       uint32_t read_domains, uint32_t write_domain)
>  {
> +     if (bo->bufmgr->bo_clear_supports_48b_address)
> +             bo->bufmgr->bo_clear_supports_48b_address(target_bo);

This is always true - you assign func to this func pointer unconditionally.
Also - why not return some meaningful value if the user does not have
enable_ppgtt=3 set? You can get that from I915_PARAM_HAS_ALIASING_PPGTT right
now. Check for gen >= 8 seems rather inadequate, and even then you're not
returning anything useful to the caller.

> +
> +     return bo->bufmgr->bo_emit_reloc(bo, offset,
> +                                      target_bo, target_offset,
> +                                      read_domains, write_domain);
> +}
> +

Using emit_reloc to set a BO flag seems confusing and can be error prone,
emit_reloc can be called many times and caller needs to be careful and call the
right function for each reloc.

> +int
> +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
> +                     drm_intel_bo *target_bo, uint32_t target_offset,
> +                     uint32_t read_domains, uint32_t write_domain)
> +{
> +     if (bo->bufmgr->bo_set_supports_48b_address)
> +             bo->bufmgr->bo_set_supports_48b_address(target_bo);

Same situation as with clear_supports_48b_address.

> +
>       return bo->bufmgr->bo_emit_reloc(bo, offset,
>                                        target_bo, target_offset,
>                                        read_domains, write_domain);
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 285919e..8f91ffe 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -87,7 +87,8 @@ struct _drm_intel_bo {
>       /**
>        * Last seen card virtual address (offset from the beginning of the
>        * aperture) for the object.  This should be used to fill relocation
> -      * entries when calling drm_intel_bo_emit_reloc()
> +      * entries when calling drm_intel_bo_emit_reloc() or
> +      * drm_intel_bo_emit_reloc_48bit()
>        */
>       uint64_t offset64;
>  };
> @@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** 
> bo_array, int count);
>  int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>                           drm_intel_bo *target_bo, uint32_t target_offset,
>                           uint32_t read_domains, uint32_t write_domain);
> +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
> +                               drm_intel_bo *target_bo, uint32_t 
> target_offset,
> +                               uint32_t read_domains, uint32_t write_domain);
>  int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
>                                 drm_intel_bo *target_bo,
>                                 uint32_t target_offset,
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index 41de396..713ed4e 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -140,6 +140,7 @@ typedef struct _drm_intel_bufmgr_gem {
>  } drm_intel_bufmgr_gem;
>  
>  #define DRM_INTEL_RELOC_FENCE (1<<0)
> +#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0)

Why are you duplicating information about support for 48B in both emit reloc
function argument and struct bo_gem?
  
>  typedef struct _drm_intel_reloc_target_info {
>       drm_intel_bo *bo;
> @@ -237,6 +238,14 @@ struct _drm_intel_bo_gem {
>       bool is_userptr;
>  
>       /**
> +      * Boolean of whether this buffer can be in the whole 48-bit address.
> +      *
> +      * By default, buffers will be keep in a 32-bit range, unless this
> +      * flag is explicitly set.
> +      */
> +     bool supports_48b_address;
> +
> +     /**
>        * Size in bytes of this buffer and its relocation descendents.
>        *
>        * Used to avoid costly tree walking in
> @@ -463,13 +472,18 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
>  }
>  
>  static void
> -drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
> +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence,
> +                            int supports_48b_address)
>  {
>       drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
>       drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
>       int index;
>  
>       if (bo_gem->validate_index != -1) {
> +             if (supports_48b_address) {
> +                     bufmgr_gem->exec2_objects[bo_gem->validate_index].flags 
> |=
> +                             EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +             }
>               if (need_fence)
>                       bufmgr_gem->exec2_objects[bo_gem->validate_index].flags 
> |=
>                               EXEC_OBJECT_NEEDS_FENCE;
> @@ -508,6 +522,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int 
> need_fence)
>               bufmgr_gem->exec2_objects[index].flags |=
>                       EXEC_OBJECT_NEEDS_FENCE;
>       }
> +     if (supports_48b_address) {
> +             bufmgr_gem->exec2_objects[index].flags |=
> +                     EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +     }
>       bufmgr_gem->exec_count++;
>  }
>  
> @@ -1925,11 +1943,33 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>       else
>               bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
>  
> +     if (target_bo_gem->supports_48b_address)
> +             bo_gem->reloc_target_info[bo_gem->reloc_count].flags |=
> +                     DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS;
> +
>       bo_gem->reloc_count++;
>  
>       return 0;
>  }
>  
> +static void drm_intel_gem_bo_set_supports_48b_address(drm_intel_bo *bo)
> +{
> +     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +
> +     if (bufmgr_gem->gen >= 8)
> +             bo_gem->supports_48b_address = 1;
> +}
> +
> +static void drm_intel_gem_bo_clear_supports_48b_address(drm_intel_bo *bo)
> +{
> +     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +
> +     if (bufmgr_gem->gen >= 8)
> +             bo_gem->supports_48b_address = 0;
> +}
> +
>  static int
>  drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>                           drm_intel_bo *target_bo, uint32_t target_offset,
> @@ -2043,7 +2083,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
>  
>       for (i = 0; i < bo_gem->reloc_count; i++) {
>               drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
> -             int need_fence;
> +             int need_fence, supports_48b_addr;
>  
>               if (target_bo == bo)
>                       continue;
> @@ -2056,8 +2096,12 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
>               need_fence = (bo_gem->reloc_target_info[i].flags &
>                             DRM_INTEL_RELOC_FENCE);
>  
> +             supports_48b_addr = (bo_gem->reloc_target_info[i].flags &
> +                                 DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS);
> +
>               /* Add the target to the validate list */
> -             drm_intel_add_validate_buffer2(target_bo, need_fence);
> +             drm_intel_add_validate_buffer2(target_bo, need_fence,
> +                                            supports_48b_addr);
>       }
>  }
>  
> @@ -2217,7 +2261,7 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context 
> *ctx,
>       /* Add the batch buffer to the validation list.  There are no 
> relocations
>        * pointing to it.
>        */
> -     drm_intel_add_validate_buffer2(bo, 0);
> +     drm_intel_add_validate_buffer2(bo, 0, 0);
>  
>       memclear(execbuf);
>       execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
> @@ -3299,6 +3343,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>       bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
>       bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
>       bufmgr_gem->bufmgr.bo_emit_reloc_fence = 
> drm_intel_gem_bo_emit_reloc_fence;
> +     bufmgr_gem->bufmgr.bo_set_supports_48b_address = 
> drm_intel_gem_bo_set_supports_48b_address;
> +     bufmgr_gem->bufmgr.bo_clear_supports_48b_address = 
> drm_intel_gem_bo_clear_supports_48b_address;
>       bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
>       bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
>       bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
> index 59ebd18..774fa02 100644
> --- a/intel/intel_bufmgr_priv.h
> +++ b/intel/intel_bufmgr_priv.h
> @@ -152,6 +152,17 @@ struct _drm_intel_bufmgr {
>       void (*destroy) (drm_intel_bufmgr *bufmgr);
>  
>       /**
> +      * Set/Clear 48-bit address support flag in a given bo.
> +      *
> +      * Any resource used with flat/heapless (0x00000000-0xfffff000)
> +      * General State Heap (GSH) or Intructions State Heap (ISH) must
> +      * be in a 32-bit range. 48-bit range will only be used when explicitly
> +      * requested.
> +      */
> +     void (*bo_set_supports_48b_address) (drm_intel_bo *bo);
> +     void (*bo_clear_supports_48b_address) (drm_intel_bo *bo);
> +
> +     /**
>        * Add relocation entry in reloc_buf, which will be updated with the
>        * target buffer's real offset on on command submission.
>        *
> -- 
> 2.5.0

You also haven't updated the dump_validation_list with proper format specifier
(addresses are truncated otherwise).

I really don't like hiding set_supports_48b_address/clear_supports_48b_address
in emit reloc - making this explicit would be a better approach.
Of course this is just my opinion, but let me post a bit different version of
this API - just as an RFC so you can see how it would look with explicit set on
bo and without adding another emit_reloc variant.

-- 
Michał Winiarski
Intel VPG
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to