Re: [Mesa-dev] [PATCH 4/4] winsys/amdgpu: fix VDPAU interop by having one amdgpu_winsys_bo per BO

2018-07-18 Thread Leo Liu

The series are

Acked-by: Leo Liu 


On 07/16/2018 06:03 PM, Leo Liu wrote:



On 2018-07-16 04:01 PM, Marek Olšák wrote:

From: Marek Olšák 

Dependencies between rings are inserted correctly if a buffer is
represented by only one unique amdgpu_winsys_bo instance.
Use a hash table keyed by amdgpu_bo_handle to have exactly one
amdgpu_winsys_bo per amdgpu_bo_handle.

The series are:
Tested-by: Leo Liu 



v2: return offset and stride properly
---
  src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 36 ---
  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c |  5 +++
  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  5 +++
  3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c

index d9192c209e2..80563d3df98 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -21,20 +21,21 @@
   * USE OR OTHER DEALINGS IN THE SOFTWARE.
   *
   * The above copyright notice and this permission notice (including 
the
   * next paragraph) shall be included in all copies or substantial 
portions

   * of the Software.
   */
    #include "amdgpu_cs.h"
    #include "util/os_time.h"
+#include "util/u_hash_table.h"
  #include "state_tracker/drm_driver.h"
  #include 
  #include 
  #include 
  #include 
    #ifndef AMDGPU_GEM_CREATE_VM_ALWAYS_VALID
  #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6)
  #endif
  @@ -172,20 +173,24 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
   assert(bo->bo && "must not be called for slab entries");
   if (ws->debug_all_bos) {
    simple_mtx_lock(>global_bo_list_lock);
    LIST_DEL(>u.real.global_list_item);
    ws->num_buffers--;
    simple_mtx_unlock(>global_bo_list_lock);
 }
  +   simple_mtx_lock(>bo_export_table_lock);
+   util_hash_table_remove(ws->bo_export_table, bo->bo);
+   simple_mtx_unlock(>bo_export_table_lock);
+
 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, 
AMDGPU_VA_OP_UNMAP);

 amdgpu_va_range_free(bo->u.real.va_handle);
 amdgpu_bo_free(bo->bo);
   amdgpu_bo_remove_fences(bo);
   if (bo->initial_domain & RADEON_DOMAIN_VRAM)
    ws->allocated_vram -= align64(bo->base.size, 
ws->info.gart_page_size);

 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
    ws->allocated_gtt -= align64(bo->base.size, 
ws->info.gart_page_size);
@@ -1278,24 +1283,41 @@ static struct pb_buffer 
*amdgpu_bo_from_handle(struct radeon_winsys *rws,

 case WINSYS_HANDLE_TYPE_SHARED:
    type = amdgpu_bo_handle_type_gem_flink_name;
    break;
 case WINSYS_HANDLE_TYPE_FD:
    type = amdgpu_bo_handle_type_dma_buf_fd;
    break;
 default:
    return NULL;
 }
  +   if (stride)
+  *stride = whandle->stride;
+   if (offset)
+  *offset = whandle->offset;
+
 r = amdgpu_bo_import(ws->dev, type, whandle->handle, );
 if (r)
    return NULL;
  +   simple_mtx_lock(>bo_export_table_lock);
+   bo = util_hash_table_get(ws->bo_export_table, result.buf_handle);
+
+   /* If the amdgpu_winsys_bo instance already exists, bump the 
reference

+    * counter and return it.
+    */
+   if (bo) {
+  p_atomic_inc(>base.reference.count);
+  simple_mtx_unlock(>bo_export_table_lock);
+  return >base;
+   }
+
 /* Get initial domains. */
 r = amdgpu_bo_query_info(result.buf_handle, );
 if (r)
    goto error;
   r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
   result.alloc_size, 1 << 20, 0, , 
_handle,

   AMDGPU_VA_RANGE_HIGH);
 if (r)
    goto error;
@@ -1319,49 +1341,49 @@ static struct pb_buffer 
*amdgpu_bo_from_handle(struct radeon_winsys *rws,

 bo->bo = result.buf_handle;
 bo->base.size = result.alloc_size;
 bo->base.vtbl = _winsys_bo_vtbl;
 bo->ws = ws;
 bo->va = va;
 bo->u.real.va_handle = va_handle;
 bo->initial_domain = initial;
 bo->unique_id = __sync_fetch_and_add(>next_bo_unique_id, 1);
 bo->is_shared = true;
  -   if (stride)
-  *stride = whandle->stride;
-   if (offset)
-  *offset = whandle->offset;
-
 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
    ws->allocated_vram += align64(bo->base.size, 
ws->info.gart_page_size);

 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
    ws->allocated_gtt += align64(bo->base.size, 
ws->info.gart_page_size);

   amdgpu_add_buffer_to_global_list(bo);
  +   util_hash_table_set(ws->bo_export_table, bo->bo, bo);
+   simple_mtx_unlock(>bo_export_table_lock);
+
 return >base;
    error:
+   simple_mtx_unlock(>bo_export_table_lock);
 if (bo)
    FREE(bo);
 if (va_handle)
    amdgpu_va_range_free(va_handle);
 amdgpu_bo_free(result.buf_handle);
 return NULL;
  }
    static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,
   unsigned stride, unsigned offset,
   

Re: [Mesa-dev] [PATCH 4/4] winsys/amdgpu: fix VDPAU interop by having one amdgpu_winsys_bo per BO

2018-07-16 Thread Leo Liu



On 2018-07-16 04:01 PM, Marek Olšák wrote:

From: Marek Olšák 

Dependencies between rings are inserted correctly if a buffer is
represented by only one unique amdgpu_winsys_bo instance.
Use a hash table keyed by amdgpu_bo_handle to have exactly one
amdgpu_winsys_bo per amdgpu_bo_handle.

The series are:
Tested-by: Leo Liu 



v2: return offset and stride properly
---
  src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 36 ---
  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c |  5 +++
  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  5 +++
  3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index d9192c209e2..80563d3df98 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -21,20 +21,21 @@
   * USE OR OTHER DEALINGS IN THE SOFTWARE.
   *
   * The above copyright notice and this permission notice (including the
   * next paragraph) shall be included in all copies or substantial portions
   * of the Software.
   */
  
  #include "amdgpu_cs.h"
  
  #include "util/os_time.h"

+#include "util/u_hash_table.h"
  #include "state_tracker/drm_driver.h"
  #include 
  #include 
  #include 
  #include 
  
  #ifndef AMDGPU_GEM_CREATE_VM_ALWAYS_VALID

  #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6)
  #endif
  
@@ -172,20 +173,24 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf)
  
 assert(bo->bo && "must not be called for slab entries");
  
 if (ws->debug_all_bos) {

simple_mtx_lock(>global_bo_list_lock);
LIST_DEL(>u.real.global_list_item);
ws->num_buffers--;
simple_mtx_unlock(>global_bo_list_lock);
 }
  
+   simple_mtx_lock(>bo_export_table_lock);

+   util_hash_table_remove(ws->bo_export_table, bo->bo);
+   simple_mtx_unlock(>bo_export_table_lock);
+
 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
 amdgpu_va_range_free(bo->u.real.va_handle);
 amdgpu_bo_free(bo->bo);
  
 amdgpu_bo_remove_fences(bo);
  
 if (bo->initial_domain & RADEON_DOMAIN_VRAM)

ws->allocated_vram -= align64(bo->base.size, ws->info.gart_page_size);
 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size);
@@ -1278,24 +1283,41 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct 
radeon_winsys *rws,
 case WINSYS_HANDLE_TYPE_SHARED:
type = amdgpu_bo_handle_type_gem_flink_name;
break;
 case WINSYS_HANDLE_TYPE_FD:
type = amdgpu_bo_handle_type_dma_buf_fd;
break;
 default:
return NULL;
 }
  
+   if (stride)

+  *stride = whandle->stride;
+   if (offset)
+  *offset = whandle->offset;
+
 r = amdgpu_bo_import(ws->dev, type, whandle->handle, );
 if (r)
return NULL;
  
+   simple_mtx_lock(>bo_export_table_lock);

+   bo = util_hash_table_get(ws->bo_export_table, result.buf_handle);
+
+   /* If the amdgpu_winsys_bo instance already exists, bump the reference
+* counter and return it.
+*/
+   if (bo) {
+  p_atomic_inc(>base.reference.count);
+  simple_mtx_unlock(>bo_export_table_lock);
+  return >base;
+   }
+
 /* Get initial domains. */
 r = amdgpu_bo_query_info(result.buf_handle, );
 if (r)
goto error;
  
 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,

   result.alloc_size, 1 << 20, 0, , _handle,
 AMDGPU_VA_RANGE_HIGH);
 if (r)
goto error;
@@ -1319,49 +1341,49 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct 
radeon_winsys *rws,
 bo->bo = result.buf_handle;
 bo->base.size = result.alloc_size;
 bo->base.vtbl = _winsys_bo_vtbl;
 bo->ws = ws;
 bo->va = va;
 bo->u.real.va_handle = va_handle;
 bo->initial_domain = initial;
 bo->unique_id = __sync_fetch_and_add(>next_bo_unique_id, 1);
 bo->is_shared = true;
  
-   if (stride)

-  *stride = whandle->stride;
-   if (offset)
-  *offset = whandle->offset;
-
 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size);
 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size);
  
 amdgpu_add_buffer_to_global_list(bo);
  
+   util_hash_table_set(ws->bo_export_table, bo->bo, bo);

+   simple_mtx_unlock(>bo_export_table_lock);
+
 return >base;
  
  error:

+   simple_mtx_unlock(>bo_export_table_lock);
 if (bo)
FREE(bo);
 if (va_handle)
amdgpu_va_range_free(va_handle);
 amdgpu_bo_free(result.buf_handle);
 return NULL;
  }
  
  static bool amdgpu_bo_get_handle(struct pb_buffer *buffer,

   unsigned stride, unsigned offset,
   unsigned slice_size,