On 8/7/2015 11:56 AM, Michał Winiarski wrote:
> On Fri, Aug 07, 2015 at 10:45:21AM +0100, Michel Thierry wrote:
>> Gen8+ supports 48-bit virtual addresses, but some objects must always be
>> allocated inside the 32-bit address range.
>>
>> In specific, any resource used with flat/heapless (0x00000000-0xfffff000)
>> General State Heap (GSH) or Instruction State Heap (ISH) must be in a
>> 32-bit range, because the General State Offset and Instruction State Offset
>> are limited to 32-bits.
>>
>> The i915 driver has been modified to provide a flag to set when the 4GB
>> limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS).
>> 48-bit range will only be used when explicitly requested.
>>
>> Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag
>> set automatically, while calls to drm_intel_bo_emit_reloc will clear it.
>>
>> v2: Make set/clear functions nops on pre-gen8 platforms, and use them
>>      internally in emit_reloc functions (Ben)
>>      s/48BADDRESS/48B_ADDRESS/ (Dave)
>> v3: Keep set/clear functions internal, no-one needs to use them directly.
>>
>> References: 
>> http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html
>> Cc: Ben Widawsky <ben at bwidawsk.net>
>> Cc: Dave Gordon <david.s.gordon at intel.com>
>> Signed-off-by: Michel Thierry <michel.thierry at intel.com>
>> ---
>>   include/drm/i915_drm.h    |  3 ++-
>>   intel/intel_bufmgr.c      | 16 ++++++++++++++
>>   intel/intel_bufmgr.h      |  6 +++++-
>>   intel/intel_bufmgr_gem.c  | 54 
>> +++++++++++++++++++++++++++++++++++++++++++----
>>   intel/intel_bufmgr_priv.h | 11 ++++++++++
>>   5 files changed, 84 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
>> index ded43b1..426b25c 100644
>> --- a/include/drm/i915_drm.h
>> +++ b/include/drm/i915_drm.h
>> @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 {
>>   #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
>>   #define EXEC_OBJECT_NEEDS_GTT      (1<<1)
>>   #define EXEC_OBJECT_WRITE  (1<<2)
>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
>> +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>>      __u64 flags;
>>
>>      __u64 rsvd1;
>> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
>> index 14ea9f9..0bd5191 100644
>> --- a/intel/intel_bufmgr.c
>> +++ b/intel/intel_bufmgr.c
>> @@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t 
>> offset,
>>                      drm_intel_bo *target_bo, uint32_t target_offset,
>>                      uint32_t read_domains, uint32_t write_domain)
>>   {
>> +    if (bo->bufmgr->bo_clear_supports_48b_address)
>> +            bo->bufmgr->bo_clear_supports_48b_address(target_bo);
>
> This is always true - you assign func to this func pointer unconditionally.
> Also - why not return some meaningful value if the user does not have
> enable_ppgtt=3 set? You can get that from I915_PARAM_HAS_ALIASING_PPGTT right
> now. Check for gen >= 8 seems rather inadequate, and even then you're not
> returning anything useful to the caller.
>
>> +
>> +    return bo->bufmgr->bo_emit_reloc(bo, offset,
>> +                                     target_bo, target_offset,
>> +                                     read_domains, write_domain);
>> +}
>> +
>
> Using emit_reloc to set a BO flag seems confusing and can be error prone,
> emit_reloc can be called many times and caller needs to be careful and call 
> the
> right function for each reloc.
>
>> +int
>> +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
>> +                    drm_intel_bo *target_bo, uint32_t target_offset,
>> +                    uint32_t read_domains, uint32_t write_domain)
>> +{
>> +    if (bo->bufmgr->bo_set_supports_48b_address)
>> +            bo->bufmgr->bo_set_supports_48b_address(target_bo);
>
> Same situation as with clear_supports_48b_address.
>
>> +
>>      return bo->bufmgr->bo_emit_reloc(bo, offset,
>>                                       target_bo, target_offset,
>>                                       read_domains, write_domain);
>> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
>> index 285919e..8f91ffe 100644
>> --- a/intel/intel_bufmgr.h
>> +++ b/intel/intel_bufmgr.h
>> @@ -87,7 +87,8 @@ struct _drm_intel_bo {
>>      /**
>>       * Last seen card virtual address (offset from the beginning of the
>>       * aperture) for the object.  This should be used to fill relocation
>> -     * entries when calling drm_intel_bo_emit_reloc()
>> +     * entries when calling drm_intel_bo_emit_reloc() or
>> +     * drm_intel_bo_emit_reloc_48bit()
>>       */
>>      uint64_t offset64;
>>   };
>> @@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo 
>> ** bo_array, int count);
>>   int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>                          drm_intel_bo *target_bo, uint32_t target_offset,
>>                          uint32_t read_domains, uint32_t write_domain);
>> +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
>> +                              drm_intel_bo *target_bo, uint32_t 
>> target_offset,
>> +                              uint32_t read_domains, uint32_t write_domain);
>>   int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
>>                                drm_intel_bo *target_bo,
>>                                uint32_t target_offset,
>> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
>> index 41de396..713ed4e 100644
>> --- a/intel/intel_bufmgr_gem.c
>> +++ b/intel/intel_bufmgr_gem.c
>> @@ -140,6 +140,7 @@ typedef struct _drm_intel_bufmgr_gem {
>>   } drm_intel_bufmgr_gem;
>>
>>   #define DRM_INTEL_RELOC_FENCE (1<<0)
>> +#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0)
>
> Why are you duplicating information about support for 48B in both emit reloc
> function argument and struct bo_gem?
>
>>   typedef struct _drm_intel_reloc_target_info {
>>      drm_intel_bo *bo;
>> @@ -237,6 +238,14 @@ struct _drm_intel_bo_gem {
>>      bool is_userptr;
>>
>>      /**
>> +     * Boolean of whether this buffer can be in the whole 48-bit address.
>> +     *
>> +     * By default, buffers will be keep in a 32-bit range, unless this
>> +     * flag is explicitly set.
>> +     */
>> +    bool supports_48b_address;
>> +
>> +    /**
>>       * Size in bytes of this buffer and its relocation descendents.
>>       *
>>       * Used to avoid costly tree walking in
>> @@ -463,13 +472,18 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
>>   }
>>
>>   static void
>> -drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
>> +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence,
>> +                           int supports_48b_address)
>>   {
>>      drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
>>      drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
>>      int index;
>>
>>      if (bo_gem->validate_index != -1) {
>> +            if (supports_48b_address) {
>> +                    bufmgr_gem->exec2_objects[bo_gem->validate_index].flags 
>> |=
>> +                            EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +            }
>>              if (need_fence)
>>                      bufmgr_gem->exec2_objects[bo_gem->validate_index].flags 
>> |=
>>                              EXEC_OBJECT_NEEDS_FENCE;
>> @@ -508,6 +522,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int 
>> need_fence)
>>              bufmgr_gem->exec2_objects[index].flags |=
>>                      EXEC_OBJECT_NEEDS_FENCE;
>>      }
>> +    if (supports_48b_address) {
>> +            bufmgr_gem->exec2_objects[index].flags |=
>> +                    EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +    }
>>      bufmgr_gem->exec_count++;
>>   }
>>
>> @@ -1925,11 +1943,33 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>      else
>>              bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
>>
>> +    if (target_bo_gem->supports_48b_address)
>> +            bo_gem->reloc_target_info[bo_gem->reloc_count].flags |=
>> +                    DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS;
>> +
>>      bo_gem->reloc_count++;
>>
>>      return 0;
>>   }
>>
>> +static void drm_intel_gem_bo_set_supports_48b_address(drm_intel_bo *bo)
>> +{
>> +    drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +    drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +
>> +    if (bufmgr_gem->gen >= 8)
>> +            bo_gem->supports_48b_address = 1;
>> +}
>> +
>> +static void drm_intel_gem_bo_clear_supports_48b_address(drm_intel_bo *bo)
>> +{
>> +    drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +    drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> +
>> +    if (bufmgr_gem->gen >= 8)
>> +            bo_gem->supports_48b_address = 0;
>> +}
>> +
>>   static int
>>   drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
>>                          drm_intel_bo *target_bo, uint32_t target_offset,
>> @@ -2043,7 +2083,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
>>
>>      for (i = 0; i < bo_gem->reloc_count; i++) {
>>              drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
>> -            int need_fence;
>> +            int need_fence, supports_48b_addr;
>>
>>              if (target_bo == bo)
>>                      continue;
>> @@ -2056,8 +2096,12 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
>>              need_fence = (bo_gem->reloc_target_info[i].flags &
>>                            DRM_INTEL_RELOC_FENCE);
>>
>> +            supports_48b_addr = (bo_gem->reloc_target_info[i].flags &
>> +                                DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS);
>> +
>>              /* Add the target to the validate list */
>> -            drm_intel_add_validate_buffer2(target_bo, need_fence);
>> +            drm_intel_add_validate_buffer2(target_bo, need_fence,
>> +                                           supports_48b_addr);
>>      }
>>   }
>>
>> @@ -2217,7 +2261,7 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context 
>> *ctx,
>>      /* Add the batch buffer to the validation list.  There are no 
>> relocations
>>       * pointing to it.
>>       */
>> -    drm_intel_add_validate_buffer2(bo, 0);
>> +    drm_intel_add_validate_buffer2(bo, 0, 0);
>>
>>      memclear(execbuf);
>>      execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
>> @@ -3299,6 +3343,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>>      bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
>>      bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
>>      bufmgr_gem->bufmgr.bo_emit_reloc_fence = 
>> drm_intel_gem_bo_emit_reloc_fence;
>> +    bufmgr_gem->bufmgr.bo_set_supports_48b_address = 
>> drm_intel_gem_bo_set_supports_48b_address;
>> +    bufmgr_gem->bufmgr.bo_clear_supports_48b_address = 
>> drm_intel_gem_bo_clear_supports_48b_address;
>>      bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
>>      bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
>>      bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
>> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
>> index 59ebd18..774fa02 100644
>> --- a/intel/intel_bufmgr_priv.h
>> +++ b/intel/intel_bufmgr_priv.h
>> @@ -152,6 +152,17 @@ struct _drm_intel_bufmgr {
>>      void (*destroy) (drm_intel_bufmgr *bufmgr);
>>
>>      /**
>> +     * Set/Clear 48-bit address support flag in a given bo.
>> +     *
>> +     * Any resource used with flat/heapless (0x00000000-0xfffff000)
>> +     * General State Heap (GSH) or Intructions State Heap (ISH) must
>> +     * be in a 32-bit range. 48-bit range will only be used when explicitly
>> +     * requested.
>> +     */
>> +    void (*bo_set_supports_48b_address) (drm_intel_bo *bo);
>> +    void (*bo_clear_supports_48b_address) (drm_intel_bo *bo);
>> +
>> +    /**
>>       * Add relocation entry in reloc_buf, which will be updated with the
>>       * target buffer's real offset on on command submission.
>>       *
>> --
>> 2.5.0
>
> You also haven't updated the dump_validation_list with proper format specifier
> (addresses are truncated otherwise).
>
> I really don't like hiding set_supports_48b_address/clear_supports_48b_address
> in emit reloc - making this explicit would be a better approach.
> Of course this is just my opinion, but let me post a bit different version of
> this API - just as an RFC so you can see how it would look with explicit set 
> on
> bo and without adding another emit_reloc variant.
>

Hi Michał,

Ben suggested having the set/clear in emit reloc as this is the only 
place mesa cares about these wa.

But I see your point, this will be used not only by mesa, so we should 
have something that is good for all the other projects.

-Michel

Reply via email to