Re: [PATCH 1/3] drm/ttm: Provide struct ttm_global for referencing TTM global state

2018-10-18 Thread Huang Rui
On Fri, Oct 19, 2018 at 12:27:50AM +0800, Thomas Zimmermann wrote:
> The new struct ttm_global provides drivers with TTM's global memory and
> BO in a unified way. Initialization and release is handled internally.
> 
> The functionality provided by struct ttm_global is currently re-implemented
> by a dozen individual DRM drivers using struct drm_global. The implementation
> of struct ttm_global is also built on top of drm_global, so it can co-exists
> with the existing drivers. Once all TTM-based drivers have been converted to
> struct ttm_global, the implementation of struct drm_global can be made
> private to TTM.
> 
> Signed-off-by: Thomas Zimmermann 
> ---
>  drivers/gpu/drm/ttm/Makefile |  2 +-
>  drivers/gpu/drm/ttm/ttm_global.c | 98 
>  include/drm/drm_global.h |  8 +++
>  include/drm/ttm/ttm_global.h | 79 +
>  4 files changed, 186 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/ttm/ttm_global.c
>  create mode 100644 include/drm/ttm/ttm_global.h
> 
> diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
> index 01fc670ce7a2..b7272b26e9f3 100644
> --- a/drivers/gpu/drm/ttm/Makefile
> +++ b/drivers/gpu/drm/ttm/Makefile
> @@ -5,7 +5,7 @@
>  ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \
>   ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
>   ttm_execbuf_util.o ttm_page_alloc.o ttm_bo_manager.o \
> - ttm_page_alloc_dma.o
> + ttm_page_alloc_dma.o ttm_global.o
>  ttm-$(CONFIG_AGP) += ttm_agp_backend.o
>  
>  obj-$(CONFIG_DRM_TTM) += ttm.o
> diff --git a/drivers/gpu/drm/ttm/ttm_global.c 
> b/drivers/gpu/drm/ttm/ttm_global.c
> new file mode 100644
> index ..ca9da0a46147
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/ttm_global.c
> @@ -0,0 +1,98 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/**
> + *
> + * Copyright 2008-2009 VMware, Inc., Palo Alto, CA., USA
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY 
> CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + **/
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +static int ttm_global_init_mem(struct drm_global_reference *ref)
> +{
> + BUG_ON(!ref->object);
> + return ttm_mem_global_init(ref->object);
> +}
> +
> +static void ttm_global_release_mem(struct drm_global_reference *ref)
> +{
> + BUG_ON(!ref->object);
> + ttm_mem_global_release(ref->object);
> +}
> +
> +static int ttm_global_init_bo(struct drm_global_reference *ref)
> +{
> + struct ttm_global *glob =
> + container_of(ref, struct ttm_global, bo_ref);
> + BUG_ON(!ref->object);
> + BUG_ON(!glob->mem_ref.object);
> + return ttm_bo_global_init(ref->object, glob->mem_ref.object);
> +}
> +
> +static void ttm_global_release_bo(struct drm_global_reference *ref)
> +{
> + BUG_ON(!ref->object);
> + ttm_bo_global_release(ref->object);
> +}
> +
> +int ttm_global_init(struct ttm_global *glob)
> +{
> + int ret;
> +

We would better add a protection here to make sure the glob is not NULL.

if (!glob)
return -EINVAL;

Others, look good for me.

Thanks,
Ray

> + glob->mem_ref.global_type = DRM_GLOBAL_TTM_MEM;
> + glob->mem_ref.size = sizeof(struct ttm_mem_global);
> + glob->bo_ref.object = NULL;
> + glob->mem_ref.init = _global_init_mem;
> + glob->mem_ref.release = _global_release_mem;
> + ret = drm_global_item_ref(>mem_ref);
> + if (ret)
> + return ret;
> +
> + glob->bo_ref.global_type = DRM_GLOBAL_TTM_BO;
> + glob->bo_ref.size = sizeof(struct ttm_bo_global);
> + glob->bo_ref.object = NULL;
> + glob->bo_ref.init = _global_init_bo;
> + glob->bo_ref.release = _global_release_bo;
> + ret = 

Re: [PATCH 3/3] drm/radeon: Replace TTM initialization/release with ttm_global

2018-10-18 Thread Huang Rui
On Fri, Oct 19, 2018 at 12:27:52AM +0800, Thomas Zimmermann wrote:
> Unified initialization and release of the global TTM state is provided
> by struct ttm_global and its interfaces.
> 
> Signed-off-by: Thomas Zimmermann 

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/radeon/radeon.h |  4 +--
>  drivers/gpu/drm/radeon/radeon_ttm.c | 40 -
>  2 files changed, 7 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 1a6f6edb3515..554c0421b779 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -73,6 +73,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -448,8 +449,7 @@ struct radeon_surface_reg {
>   * TTM.
>   */
>  struct radeon_mman {
> - struct ttm_bo_global_refbo_global_ref;
> - struct drm_global_reference mem_global_ref;
> + struct ttm_global   glob;
>   struct ttm_bo_devicebdev;
>   boolmem_global_referenced;
>   boolinitialized;
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
> b/drivers/gpu/drm/radeon/radeon_ttm.c
> index dac4ec5a120b..363860e82892 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -64,45 +64,16 @@ static struct radeon_device *radeon_get_rdev(struct 
> ttm_bo_device *bdev)
>  /*
>   * Global memory.
>   */
> -static int radeon_ttm_mem_global_init(struct drm_global_reference *ref)
> -{
> - return ttm_mem_global_init(ref->object);
> -}
> -
> -static void radeon_ttm_mem_global_release(struct drm_global_reference *ref)
> -{
> - ttm_mem_global_release(ref->object);
> -}
>  
>  static int radeon_ttm_global_init(struct radeon_device *rdev)
>  {
> - struct drm_global_reference *global_ref;
>   int r;
>  
>   rdev->mman.mem_global_referenced = false;
> - global_ref = >mman.mem_global_ref;
> - global_ref->global_type = DRM_GLOBAL_TTM_MEM;
> - global_ref->size = sizeof(struct ttm_mem_global);
> - global_ref->init = _ttm_mem_global_init;
> - global_ref->release = _ttm_mem_global_release;
> - r = drm_global_item_ref(global_ref);
> - if (r != 0) {
> - DRM_ERROR("Failed setting up TTM memory accounting "
> -   "subsystem.\n");
> - return r;
> - }
>  
> - rdev->mman.bo_global_ref.mem_glob =
> - rdev->mman.mem_global_ref.object;
> - global_ref = >mman.bo_global_ref.ref;
> - global_ref->global_type = DRM_GLOBAL_TTM_BO;
> - global_ref->size = sizeof(struct ttm_bo_global);
> - global_ref->init = _bo_global_ref_init;
> - global_ref->release = _bo_global_ref_release;
> - r = drm_global_item_ref(global_ref);
> - if (r != 0) {
> - DRM_ERROR("Failed setting up TTM BO subsystem.\n");
> - drm_global_item_unref(>mman.mem_global_ref);
> + r = ttm_global_init(>mman.glob);
> + if (r) {
> + DRM_ERROR("Failed setting up TTM subsystem.\n");
>   return r;
>   }
>  
> @@ -113,8 +84,7 @@ static int radeon_ttm_global_init(struct radeon_device 
> *rdev)
>  static void radeon_ttm_global_fini(struct radeon_device *rdev)
>  {
>   if (rdev->mman.mem_global_referenced) {
> - drm_global_item_unref(>mman.bo_global_ref.ref);
> - drm_global_item_unref(>mman.mem_global_ref);
> + ttm_global_release(>mman.glob);
>   rdev->mman.mem_global_referenced = false;
>   }
>  }
> @@ -853,7 +823,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
>   }
>   /* No others user of address space so set it to 0 */
>   r = ttm_bo_device_init(>mman.bdev,
> -rdev->mman.bo_global_ref.ref.object,
> +ttm_global_get_bo_global(>mman.glob),
>  _bo_driver,
>  rdev->ddev->anon_inode->i_mapping,
>  DRM_FILE_PAGE_OFFSET,
> -- 
> 2.19.1
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 2/3] drm/amdgpu: Replace TTM initialization/release with ttm_global

2018-10-18 Thread Huang Rui
On Fri, Oct 19, 2018 at 12:27:51AM +0800, Thomas Zimmermann wrote:
> Unified initialization and relesae of the global TTM state is provided
> by struct ttm_global and its interfaces.
> 
> Signed-off-by: Thomas Zimmermann 

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 63 ++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  4 +-
>  2 files changed, 7 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 3a6802846698..70b0e8c77bb4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -65,33 +65,6 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device 
> *adev);
>   * Global memory.
>   */
>  
> -/**
> - * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
> - * memory object
> - *
> - * @ref: Object for initialization.
> - *
> - * This is called by drm_global_item_ref() when an object is being
> - * initialized.
> - */
> -static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
> -{
> - return ttm_mem_global_init(ref->object);
> -}
> -
> -/**
> - * amdgpu_ttm_mem_global_release - Drop reference to a memory object
> - *
> - * @ref: Object being removed
> - *
> - * This is called by drm_global_item_unref() when an object is being
> - * released.
> - */
> -static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
> -{
> - ttm_mem_global_release(ref->object);
> -}
> -
>  /**
>   * amdgpu_ttm_global_init - Initialize global TTM memory reference 
> structures.
>   *
> @@ -102,35 +75,15 @@ static void amdgpu_ttm_mem_global_release(struct 
> drm_global_reference *ref)
>   */
>  static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
>  {
> - struct drm_global_reference *global_ref;
>   int r;
>  
>   /* ensure reference is false in case init fails */
>   adev->mman.mem_global_referenced = false;
>  
> - global_ref = >mman.mem_global_ref;
> - global_ref->global_type = DRM_GLOBAL_TTM_MEM;
> - global_ref->size = sizeof(struct ttm_mem_global);
> - global_ref->init = _ttm_mem_global_init;
> - global_ref->release = _ttm_mem_global_release;
> - r = drm_global_item_ref(global_ref);
> + r = ttm_global_init(>mman.glob);
>   if (r) {
> - DRM_ERROR("Failed setting up TTM memory accounting "
> -   "subsystem.\n");
> - goto error_mem;
> - }
> -
> - adev->mman.bo_global_ref.mem_glob =
> - adev->mman.mem_global_ref.object;
> - global_ref = >mman.bo_global_ref.ref;
> - global_ref->global_type = DRM_GLOBAL_TTM_BO;
> - global_ref->size = sizeof(struct ttm_bo_global);
> - global_ref->init = _bo_global_ref_init;
> - global_ref->release = _bo_global_ref_release;
> - r = drm_global_item_ref(global_ref);
> - if (r) {
> - DRM_ERROR("Failed setting up TTM BO subsystem.\n");
> - goto error_bo;
> + DRM_ERROR("Failed setting up TTM subsystem.\n");
> + return r;
>   }
>  
>   mutex_init(>mman.gtt_window_lock);
> @@ -138,19 +91,13 @@ static int amdgpu_ttm_global_init(struct amdgpu_device 
> *adev)
>   adev->mman.mem_global_referenced = true;
>  
>   return 0;
> -
> -error_bo:
> - drm_global_item_unref(>mman.mem_global_ref);
> -error_mem:
> - return r;
>  }
>  
>  static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
>  {
>   if (adev->mman.mem_global_referenced) {
>   mutex_destroy(>mman.gtt_window_lock);
> - drm_global_item_unref(>mman.bo_global_ref.ref);
> - drm_global_item_unref(>mman.mem_global_ref);
> + ttm_global_release(>mman.glob);
>   adev->mman.mem_global_referenced = false;
>   }
>  }
> @@ -1765,7 +1712,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   }
>   /* No others user of address space so set it to 0 */
>   r = ttm_bo_device_init(>mman.bdev,
> -adev->mman.bo_global_ref.ref.object,
> +ttm_global_get_bo_global(>mman.glob),
>  _bo_driver,
>  adev->ddev->anon_inode->i_mapping,
>  DRM_FILE_PAGE_OFFSET,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index fe8f276e9811..c3a7fe3ead3

Re: [PATCH][drm-next] drm/amdgpu/powerplay: fix missing break in switch statements

2018-10-09 Thread Huang Rui
On Mon, Oct 08, 2018 at 05:22:28PM +0100, Colin King wrote:
> From: Colin Ian King 
> 
> There are several switch statements that are missing break statements.
> Add missing breaks to handle any fall-throughs corner cases.
> 
> Detected by CoverityScan, CID#1457175 ("Missing break in switch")
> 
> Fixes: 18aafc59b106 ("drm/amd/powerplay: implement fw related smu interface 
> for iceland.")
> Signed-off-by: Colin Ian King 

Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c  | 2 ++
>  drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c| 2 ++
>  drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c | 2 ++
>  drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c   | 2 ++
>  drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c   | 2 ++
>  5 files changed, 10 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> index 18643e06bc6f..669bd0c2a16c 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
> @@ -2269,11 +2269,13 @@ static uint32_t ci_get_offsetof(uint32_t type, 
> uint32_t member)
>   case DRAM_LOG_BUFF_SIZE:
>   return offsetof(SMU7_SoftRegisters, DRAM_LOG_BUFF_SIZE);
>   }
> + break;
>   case SMU_Discrete_DpmTable:
>   switch (member) {
>   case LowSclkInterruptThreshold:
>   return offsetof(SMU7_Discrete_DpmTable, 
> LowSclkInterruptT);
>   }
> + break;
>   }
>   pr_debug("can't get the offset of type %x member %x\n", type, member);
>   return 0;
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
> index ec14798e87b6..bddd6d09f887 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
> @@ -2331,6 +2331,7 @@ static uint32_t fiji_get_offsetof(uint32_t type, 
> uint32_t member)
>   case DRAM_LOG_BUFF_SIZE:
>   return offsetof(SMU73_SoftRegisters, 
> DRAM_LOG_BUFF_SIZE);
>   }
> + break;
>   case SMU_Discrete_DpmTable:
>   switch (member) {
>   case UvdBootLevel:
> @@ -2340,6 +2341,7 @@ static uint32_t fiji_get_offsetof(uint32_t type, 
> uint32_t member)
>   case LowSclkInterruptThreshold:
>   return offsetof(SMU73_Discrete_DpmTable, 
> LowSclkInterruptThreshold);
>   }
> + break;
>   }
>   pr_warn("can't get the offset of type %x member %x\n", type, member);
>   return 0;
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
> index 73aa368a454e..2d4c7f167b88 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
> @@ -2237,11 +2237,13 @@ static uint32_t iceland_get_offsetof(uint32_t type, 
> uint32_t member)
>   case DRAM_LOG_BUFF_SIZE:
>   return offsetof(SMU71_SoftRegisters, 
> DRAM_LOG_BUFF_SIZE);
>   }
> + break;
>   case SMU_Discrete_DpmTable:
>   switch (member) {
>   case LowSclkInterruptThreshold:
>   return offsetof(SMU71_Discrete_DpmTable, 
> LowSclkInterruptThreshold);
>   }
> + break;
>   }
>   pr_warn("can't get the offset of type %x member %x\n", type, member);
>   return 0;
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
> index ae8378ed32ee..a2ba5b012866 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
> @@ -2619,6 +2619,7 @@ static uint32_t tonga_get_offsetof(uint32_t type, 
> uint32_t member)
>   case DRAM_LOG_BUFF_SIZE:
>   return offsetof(SMU72_SoftRegisters, 
> DRAM_LOG_BUFF_SIZE);
>   }
> + break;
>   case SMU_Discrete_DpmTable:
>   switch (member) {
>   case UvdBootLevel:
> @@ -2628,6 +2629,7 @@ static uint32_t tonga_get_offsetof(uint32_t type, 
> uint32_t member)
>   case LowSclkInterruptThreshold:
>   return offsetof(SMU72_Discrete_DpmTable, 
> LowSclkInterruptThreshold);
>   }
> + break;
>   }
>   pr_warn("

Re: [PATCH] list: introduce list_bulk_move_tail helper

2018-09-17 Thread Huang Rui
On Mon, Sep 17, 2018 at 02:08:34PM +0200, Christian König wrote:
> Move all entries between @first and including @last before @head.
> 
> This is useful for LRU lists where a whole block of entries should be
> moved to the end of the list.
> 
> Used as a band aid in TTM, but better placed in the common list headers.
> 
> Signed-off-by: Christian König 
> ---

For TTM change, patch is Reviewed-by: Huang Rui 

For new list_bulk_move_tail helper in hinclude/linux/list.h, may we have
your comments from LKML?

Thanks,
Ray

>  drivers/gpu/drm/ttm/ttm_bo.c | 25 +
>  include/linux/list.h | 23 +++
>  2 files changed, 28 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index b2a33bf1ef10..26b889f86670 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -247,20 +247,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object 
> *bo,
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  
> -static void ttm_list_move_bulk_tail(struct list_head *list,
> - struct list_head *first,
> - struct list_head *last)
> -{
> - first->prev->next = last->next;
> - last->next->prev = first->prev;
> -
> - list->prev->next = first;
> - first->prev = list->prev;
> -
> - last->next = list;
> - list->prev = last;
> -}
> -
>  void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>  {
>   unsigned i;
> @@ -276,8 +262,8 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move 
> *bulk)
>   reservation_object_assert_held(pos->last->resv);
>  
>   man = >first->bdev->man[TTM_PL_TT];
> - ttm_list_move_bulk_tail(>lru[i], >first->lru,
> - >last->lru);
> + list_bulk_move_tail(>lru[i], >first->lru,
> + >last->lru);
>   }
>  
>   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> @@ -291,8 +277,8 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move 
> *bulk)
>   reservation_object_assert_held(pos->last->resv);
>  
>   man = >first->bdev->man[TTM_PL_VRAM];
> - ttm_list_move_bulk_tail(>lru[i], >first->lru,
> - >last->lru);
> + list_bulk_move_tail(>lru[i], >first->lru,
> + >last->lru);
>   }
>  
>   for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> @@ -306,8 +292,7 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move 
> *bulk)
>   reservation_object_assert_held(pos->last->resv);
>  
>   lru = >first->bdev->glob->swap_lru[i];
> - ttm_list_move_bulk_tail(lru, >first->swap,
> - >last->swap);
> + list_bulk_move_tail(lru, >first->swap, >last->swap);
>   }
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> diff --git a/include/linux/list.h b/include/linux/list.h
> index de04cc5ed536..edb7628e46ed 100644
> --- a/include/linux/list.h
> +++ b/include/linux/list.h
> @@ -183,6 +183,29 @@ static inline void list_move_tail(struct list_head *list,
>   list_add_tail(list, head);
>  }
>  
> +/**
> + * list_bulk_move_tail - move a subsection of a list to its tail
> + * @head: the head that will follow our entry
> + * @first: first entry to move
> + * @last: last entry to move, can be the same as first
> + *
> + * Move all entries between @first and including @last before @head.
> + * All three entries must belong to the same linked list.
> + */
> +static inline void list_bulk_move_tail(struct list_head *head,
> +struct list_head *first,
> +struct list_head *last)
> +{
> + first->prev->next = last->next;
> + last->next->prev = first->prev;
> +
> + head->prev->next = first;
> + first->prev = head->prev;
> +
> + last->next = head;
> + head->prev = last;
> +}
> +
>  /**
>   * list_is_last - tests whether @list is the last entry in list @head
>   * @list: the entry to test
> -- 
> 2.14.1
> 
> ___
> amd-gfx mailing list
> amd-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 1/2] list: introduce list_bulk_move_tail helper

2018-09-14 Thread Huang Rui
On Thu, Sep 13, 2018 at 01:22:07PM +0200, Christian König wrote:
> Move all entries between @first and including @last before @head.
> 
> This is useful for LRU lists where a whole block of entries should be
> moved to the end of an list.
> 
> Signed-off-by: Christian König 

Bulk move helper is useful for TTM driver to improve the LRU moving
efficiency. Please go on with my RB.

Series are Reviewed-and-Tested-by: Huang Rui 

> ---
>  include/linux/list.h | 23 +++
>  1 file changed, 23 insertions(+)
> 
> diff --git a/include/linux/list.h b/include/linux/list.h
> index de04cc5ed536..edb7628e46ed 100644
> --- a/include/linux/list.h
> +++ b/include/linux/list.h
> @@ -183,6 +183,29 @@ static inline void list_move_tail(struct list_head *list,
>   list_add_tail(list, head);
>  }
>  
> +/**
> + * list_bulk_move_tail - move a subsection of a list to its tail
> + * @head: the head that will follow our entry
> + * @first: first entry to move
> + * @last: last entry to move, can be the same as first
> + *
> + * Move all entries between @first and including @last before @head.
> + * All three entries must belong to the same linked list.
> + */
> +static inline void list_bulk_move_tail(struct list_head *head,
> +struct list_head *first,
> +struct list_head *last)
> +{
> + first->prev->next = last->next;
> + last->next->prev = first->prev;
> +
> + head->prev->next = first;
> + first->prev = head->prev;
> +
> + last->next = head;
> + head->prev = last;
> +}
> +
>  /**
>   * list_is_last - tests whether @list is the last entry in list @head
>   * @list: the entry to test
> -- 
> 2.14.1
> 
> ___
> amd-gfx mailing list
> amd-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/amdgpu/powerplay: Fix missing break in switch statement

2019-03-03 Thread Huang Rui
On Fri, Mar 01, 2019 at 03:29:43PM -0600, Gustavo A. R. Silva wrote:
> Add missing break statement in order to prevent the code from falling
> through to case SMU_Discrete_DpmTable.
> 
> This bug was found thanks to the ongoing efforts to enable
> -Wimplicit-fallthrough.
> 
> Fixes: 34a564eaf528 ("drm/amd/powerplay: implement fw image related smum 
> interface for Polaris.")
> Cc: sta...@vger.kernel.org
> Signed-off-by: Gustavo A. R. Silva 

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c 
> b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
> index 52abca065764..222fb79d319e 100644
> --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
> +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
> @@ -2330,6 +2330,7 @@ static uint32_t polaris10_get_offsetof(uint32_t type, 
> uint32_t member)
>   case DRAM_LOG_BUFF_SIZE:
>   return offsetof(SMU74_SoftRegisters, 
> DRAM_LOG_BUFF_SIZE);
>   }
> + break;
>   case SMU_Discrete_DpmTable:
>   switch (member) {
>   case UvdBootLevel:
> -- 
> 2.21.0
> 
> ___
> amd-gfx mailing list
> amd-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Re: [RFC PATCH 0/6] do not store GPU address in TTM

2020-02-13 Thread Huang Rui
On Thu, Feb 13, 2020 at 01:01:57PM +0100, Nirmoy Das wrote:
> With this patch series I am trying to remove GPU address dependency in
> TTM and moving GPU address calculation to individual drm drivers.
> This is required[1] to continue the work started by Brian Welty to create
> struct drm_mem_region which can be leverage by DRM cgroup controller to 
> manage memory
> limits.
> 
> 
> I have only manage to test amdgpu driver as I only have GPU for that.
> I might be doing something really stupid while calculeting gpu offset for
> some of the drivers so please be patient and let me know how can I improve
> that.
> 
> [1] 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.mail-archive.com%2Fdri-devel%40lists.freedesktop.org%2Fmsg272238.htmldata=02%7C01%7Cray.huang%40amd.com%7Cad8c8464b13e4764556008d7b07c3344%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637171919805856109sdata=zlA%2FHePGKcILKg7Ezc9CGc%2FWXJkRa5xmrBznvJcAomk%3Dreserved=0

Looks good for me as well for amd part.

Acked-by: Huang Rui 

> 
> Nirmoy Das (6):
>   drm/amdgpu: move ttm bo->offset to amdgpu_bo
>   drm/radeon: don't use ttm bo->offset
>   drm/vmwgfx: don't use ttm bo->offset
>   drm/nouveau: don't use ttm bo->offset
>   drm/qxl: don't use ttm bo->offset
>   drm/ttm: do not keep GPU dependent addresses
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  | 22 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 29 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  4 +--
>  drivers/gpu/drm/nouveau/dispnv04/crtc.c |  6 ++---
>  drivers/gpu/drm/nouveau/dispnv04/disp.c |  2 +-
>  drivers/gpu/drm/nouveau/dispnv04/overlay.c  |  6 ++---
>  drivers/gpu/drm/nouveau/dispnv50/base507c.c |  2 +-
>  drivers/gpu/drm/nouveau/dispnv50/core507d.c |  2 +-
>  drivers/gpu/drm/nouveau/dispnv50/ovly507e.c |  2 +-
>  drivers/gpu/drm/nouveau/dispnv50/wndw.c |  2 +-
>  drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_abi16.c |  8 +++---
>  drivers/gpu/drm/nouveau/nouveau_bo.c|  1 +
>  drivers/gpu/drm/nouveau/nouveau_bo.h|  3 +++
>  drivers/gpu/drm/nouveau/nouveau_chan.c  |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_fbcon.c |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_gem.c   | 10 +++
>  drivers/gpu/drm/qxl/qxl_drv.h   |  6 ++---
>  drivers/gpu/drm/qxl/qxl_kms.c   |  3 +++
>  drivers/gpu/drm/qxl/qxl_object.h|  5 
>  drivers/gpu/drm/qxl/qxl_ttm.c   |  9 ---
>  drivers/gpu/drm/radeon/radeon.h |  1 +
>  drivers/gpu/drm/radeon/radeon_object.h  | 16 +++-
>  drivers/gpu/drm/radeon/radeon_ttm.c |  4 +--
>  drivers/gpu/drm/ttm/ttm_bo.c|  7 -
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c  |  4 +--
>  drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c |  2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c|  2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c  |  2 --
>  include/drm/ttm/ttm_bo_api.h|  2 --
>  include/drm/ttm/ttm_bo_driver.h |  1 -
>  33 files changed, 99 insertions(+), 72 deletions(-)
> 
> --
> 2.25.0
> 
> ___
> amd-gfx mailing list
> amd-...@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Cray.huang%40amd.com%7Cad8c8464b13e4764556008d7b07c3344%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637171919805856109sdata=lnJkwlCEbUmtsBhBY94rB3hRgaYg4ENQ0DNTXxxwPL4%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/ttm: nuke invalidate_caches callback

2020-01-13 Thread Huang Rui
On Mon, Jan 13, 2020 at 10:45:25PM +0800, Christian König wrote:
> Ping? Just a trivial cleanup.
> 
> Am 10.01.20 um 16:09 schrieb Christian König:
> > Another completely unused feature.
> >
> > Signed-off-by: Christian König 

Reviewed-by: Huang Rui 

> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|  6 --
> >   drivers/gpu/drm/nouveau/nouveau_bo.c   |  8 
> >   drivers/gpu/drm/qxl/qxl_ttm.c  |  6 --
> >   drivers/gpu/drm/radeon/radeon_ttm.c|  6 --
> >   drivers/gpu/drm/ttm/ttm_bo.c   |  9 +
> >   drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  6 --
> >   include/drm/ttm/ttm_bo_driver.h| 15 ---
> >   7 files changed, 1 insertion(+), 55 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 445de594c214..7c4b1cbd9a50 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -68,11 +68,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object 
> > *bo,
> >   static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
> >   static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
> >   
> > -static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t 
> > flags)
> > -{
> > -   return 0;
> > -}
> > -
> >   /**
> >* amdgpu_init_mem_type - Initialize a memory manager for a specific type 
> > of
> >* memory request.
> > @@ -1637,7 +1632,6 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
> > .ttm_tt_create = _ttm_tt_create,
> > .ttm_tt_populate = _ttm_tt_populate,
> > .ttm_tt_unpopulate = _ttm_tt_unpopulate,
> > -   .invalidate_caches = _invalidate_caches,
> > .init_mem_type = _init_mem_type,
> > .eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
> > .evict_flags = _evict_flags,
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
> > b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > index f8015e0318d7..81668104595f 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > @@ -646,13 +646,6 @@ nouveau_ttm_tt_create(struct ttm_buffer_object *bo, 
> > uint32_t page_flags)
> > return nouveau_sgdma_create_ttm(bo, page_flags);
> >   }
> >   
> > -static int
> > -nouveau_bo_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
> > -{
> > -   /* We'll do this from user space. */
> > -   return 0;
> > -}
> > -
> >   static int
> >   nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
> >  struct ttm_mem_type_manager *man)
> > @@ -1696,7 +1689,6 @@ struct ttm_bo_driver nouveau_bo_driver = {
> > .ttm_tt_create = _ttm_tt_create,
> > .ttm_tt_populate = _ttm_tt_populate,
> > .ttm_tt_unpopulate = _ttm_tt_unpopulate,
> > -   .invalidate_caches = nouveau_bo_invalidate_caches,
> > .init_mem_type = nouveau_bo_init_mem_type,
> > .eviction_valuable = ttm_bo_eviction_valuable,
> > .evict_flags = nouveau_bo_evict_flags,
> > diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
> > index 16a5e903533d..62a5e424971b 100644
> > --- a/drivers/gpu/drm/qxl/qxl_ttm.c
> > +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
> > @@ -48,11 +48,6 @@ static struct qxl_device *qxl_get_qdev(struct 
> > ttm_bo_device *bdev)
> > return qdev;
> >   }
> >   
> > -static int qxl_invalidate_caches(struct ttm_bo_device *bdev, uint32_t 
> > flags)
> > -{
> > -   return 0;
> > -}
> > -
> >   static int qxl_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
> >  struct ttm_mem_type_manager *man)
> >   {
> > @@ -256,7 +251,6 @@ static void qxl_bo_move_notify(struct ttm_buffer_object 
> > *bo,
> >   
> >   static struct ttm_bo_driver qxl_bo_driver = {
> > .ttm_tt_create = _ttm_tt_create,
> > -   .invalidate_caches = _invalidate_caches,
> > .init_mem_type = _init_mem_type,
> > .eviction_valuable = ttm_bo_eviction_valuable,
> > .evict_flags = _evict_flags,
> > diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
> > b/drivers/gpu/drm/radeon/radeon_ttm.c
> > index f4af67035673..40282bf0adbe 100644
> > --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> > +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> > @@ -66,11 +66,6 @@ static struct radeon_device *radeon_get_rdev(struct 
> > ttm_bo_de

Re: [PATCH] drm/radeon: remove three set but not used variable

2019-12-27 Thread Huang Rui
On Thu, Dec 26, 2019 at 08:07:50PM +0800, yu kuai wrote:
> Fixes gcc '-Wunused-but-set-variable' warning:
> 
> drivers/gpu/drm/radeon/radeon_atombios.c: In function
> ‘radeon_get_atom_connector_info_from_object_table’:
> drivers/gpu/drm/radeon/radeon_atombios.c:651:26: warning: variable
> ‘grph_obj_num’ set but not used [-Wunused-but-set-variable]
> drivers/gpu/drm/radeon/radeon_atombios.c:651:13: warning: variable
> ‘grph_obj_id’ set but not used [-Wunused-but-set-variable]
> drivers/gpu/drm/radeon/radeon_atombios.c:573:37: warning: variable
> ‘con_obj_type’ set but not used [-Wunused-but-set-variable]
> 
> They are never used, and so can be removed.
> 
> Signed-off-by: yu kuai 

Thanks!

Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/radeon/radeon_atombios.c | 15 ++-
>  1 file changed, 2 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c 
> b/drivers/gpu/drm/radeon/radeon_atombios.c
> index 072e6daedf7a..848ef68d9086 100644
> --- a/drivers/gpu/drm/radeon/radeon_atombios.c
> +++ b/drivers/gpu/drm/radeon/radeon_atombios.c
> @@ -570,7 +570,7 @@ bool 
> radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
>   path_size += le16_to_cpu(path->usSize);
>  
>   if (device_support & le16_to_cpu(path->usDeviceTag)) {
> - uint8_t con_obj_id, con_obj_num, con_obj_type;
> + uint8_t con_obj_id, con_obj_num;
>  
>   con_obj_id =
>   (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK)
> @@ -578,9 +578,6 @@ bool 
> radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
>   con_obj_num =
>   (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK)
>   >> ENUM_ID_SHIFT;
> - con_obj_type =
> - (le16_to_cpu(path->usConnObjectId) &
> -  OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
>  
>   /* TODO CV support */
>   if (le16_to_cpu(path->usDeviceTag) ==
> @@ -648,15 +645,7 @@ bool 
> radeon_get_atom_connector_info_from_object_table(struct drm_device *dev)
>   router.ddc_valid = false;
>   router.cd_valid = false;
>   for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); 
> j++) {
> - uint8_t grph_obj_id, grph_obj_num, 
> grph_obj_type;
> -
> - grph_obj_id =
> - (le16_to_cpu(path->usGraphicObjIds[j]) &
> -  OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
> - grph_obj_num =
> - (le16_to_cpu(path->usGraphicObjIds[j]) &
> -  ENUM_ID_MASK) >> ENUM_ID_SHIFT;
> - grph_obj_type =
> + uint8_t grph_obj_type =
>   (le16_to_cpu(path->usGraphicObjIds[j]) &
>OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
>  
> -- 
> 2.17.2
> 
> ___
> amd-gfx mailing list
> amd-...@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Cray.huang%40amd.com%7C8d9d146eebca472e5e5908d78a10933f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637129676103813665sdata=0xRhHO2UOIbfEzYV8HTGtSTHFw%2F8R66Tfy44YviKpmQ%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/ttm: Use scnprintf() for avoiding potential buffer overflow

2020-03-11 Thread Huang Rui
On Wed, Mar 11, 2020 at 03:34:52PM +0800, Takashi Iwai wrote:
> Since snprintf() returns the would-be-output size instead of the
> actual output size, the succeeding calls may go beyond the given
> buffer limit.  Fix it by replacing with scnprintf().
> 
> Signed-off-by: Takashi Iwai 

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index bf876faea592..faefaaef7909 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -604,7 +604,7 @@ static struct dma_pool *ttm_dma_pool_init(struct device 
> *dev, gfp_t flags,
>   p = pool->name;
>   for (i = 0; i < ARRAY_SIZE(t); i++) {
>   if (type & t[i]) {
> - p += snprintf(p, sizeof(pool->name) - (p - pool->name),
> + p += scnprintf(p, sizeof(pool->name) - (p - pool->name),
> "%s", n[i]);
>   }
>   }
> -- 
> 2.16.4
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/ttm: fix false positive assert

2020-03-09 Thread Huang Rui
On Mon, Mar 09, 2020 at 07:49:15PM +0800, Christian König wrote:
> Pierre-eric, just a gentle ping on this? Could I get a tested-by?
> 
> Ray can you ack or even review this?
> 
> Thanks,
> Christian.
> 
> Am 06.03.20 um 13:41 schrieb Christian König:
> > The assert sometimes incorrectly triggers when pinned BOs are destroyed.
> >
> > Signed-off-by: Christian König 

Reviewed-by: Huang Rui 

> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c | 5 ++---
> >   1 file changed, 2 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 2445e2bd6267..ca5a8d01ff1f 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -151,8 +151,6 @@ static void ttm_bo_add_mem_to_lru(struct 
> > ttm_buffer_object *bo,
> > struct ttm_bo_device *bdev = bo->bdev;
> > struct ttm_mem_type_manager *man;
> >   
> > -   dma_resv_assert_held(bo->base.resv);
> > -
> > if (!list_empty(>lru))
> > return;
> >   
> > @@ -611,7 +609,8 @@ static void ttm_bo_release(struct kref *kref)
> >  */
> > if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
> > bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
> > -   ttm_bo_move_to_lru_tail(bo, NULL);
> > +   ttm_bo_del_from_lru(bo);
> > +   ttm_bo_add_mem_to_lru(bo, >mem);
> > }
> >   
> > kref_init(>kref);
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2] drm/ttm: clean up ttm_trace_dma_map/ttm_trace_dma_unmap (v2)

2020-04-07 Thread Huang Rui
ttm_trace_dma_map/ttm_trace_dma_unmap is never used anymore.

v2: remove the file completely

Signed-off-by: Huang Rui 
---
 include/drm/ttm/ttm_debug.h | 31 ---
 1 file changed, 31 deletions(-)
 delete mode 100644 include/drm/ttm/ttm_debug.h

diff --git a/include/drm/ttm/ttm_debug.h b/include/drm/ttm/ttm_debug.h
deleted file mode 100644
index b5e460f..000
--- a/include/drm/ttm/ttm_debug.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/**
- *
- * Copyright (c) 2017 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **/
-/*
- * Authors: Tom St Denis 
- */
-extern void ttm_trace_dma_map(struct device *dev, struct ttm_dma_tt *tt);
-extern void ttm_trace_dma_unmap(struct device *dev, struct ttm_dma_tt *tt);
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 1/2] drm/ttm: clean up ttm_trace_dma_map/ttm_trace_dma_unmap

2020-04-07 Thread Huang Rui
ttm_trace_dma_map/ttm_trace_dma_unmap is never used anymore. Move the pr_fmt
prefix into this header.

Signed-off-by: Huang Rui 
---
 include/drm/ttm/ttm_debug.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/drm/ttm/ttm_debug.h b/include/drm/ttm/ttm_debug.h
index b5e460f..bd7cf37 100644
--- a/include/drm/ttm/ttm_debug.h
+++ b/include/drm/ttm/ttm_debug.h
@@ -27,5 +27,5 @@
 /*
  * Authors: Tom St Denis 
  */
-extern void ttm_trace_dma_map(struct device *dev, struct ttm_dma_tt *tt);
-extern void ttm_trace_dma_unmap(struct device *dev, struct ttm_dma_tt *tt);
+
+#define pr_fmt(fmt) "[TTM] " fmt
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH 2/2] drm/ttm: abstract all ttm prefix into ttm_debug header

2020-04-07 Thread Huang Rui
Using the debug header instead of macro at the start of the files.

Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/ttm/ttm_agp_backend.c| 3 +--
 drivers/gpu/drm/ttm/ttm_bo.c | 3 +--
 drivers/gpu/drm/ttm/ttm_bo_vm.c  | 3 +--
 drivers/gpu/drm/ttm/ttm_memory.c | 3 +--
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 3 +--
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 3 +--
 drivers/gpu/drm/ttm/ttm_tt.c | 3 +--
 drivers/gpu/drm/vmwgfx/ttm_object.c  | 3 +--
 8 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c 
b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index 6050dc8..53fa96f 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -30,8 +30,7 @@
  *  Keith Packard.
  */
 
-#define pr_fmt(fmt) "[TTM] " fmt
-
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index ca5a8d0..469a3f1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -29,8 +29,7 @@
  * Authors: Thomas Hellstrom 
  */
 
-#define pr_fmt(fmt) "[TTM] " fmt
-
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index eebb4c0..fa5e237 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -29,8 +29,7 @@
  * Authors: Thomas Hellstrom 
  */
 
-#define pr_fmt(fmt) "[TTM] " fmt
-
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index acd63b7..f51d70f 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -26,8 +26,7 @@
  *
  **/
 
-#define pr_fmt(fmt) "[TTM] " fmt
-
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index b40a467..4363420 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -31,8 +31,7 @@
  * - doesn't track currently in use pages
  */
 
-#define pr_fmt(fmt) "[TTM] " fmt
-
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index bf876fa..0017d6d 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -33,8 +33,7 @@
  *   when freed).
  */
 
-#define pr_fmt(fmt) "[TTM] " fmt
-
+#include 
 #include 
 #include 
 #include  /* for seq_printf */
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2ec448e..4fa8a51 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -29,8 +29,7 @@
  * Authors: Thomas Hellstrom 
  */
 
-#define pr_fmt(fmt) "[TTM] " fmt
-
+#include 
 #include 
 #include 
 #include 
diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c 
b/drivers/gpu/drm/vmwgfx/ttm_object.c
index 1607778..cd77370 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_object.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_object.c
@@ -57,8 +57,7 @@
  * for fast lookup of ref objects given a base object.
  */
 
-#define pr_fmt(fmt) "[TTM] " fmt
-
+#include 
 #include 
 #include 
 #include 
-- 
2.7.4

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 1/2] drm/ttm: clean up ttm_trace_dma_map/ttm_trace_dma_unmap

2020-04-07 Thread Huang Rui
On Tue, Apr 07, 2020 at 03:06:01PM +0800, Koenig, Christian wrote:
> Am 07.04.20 um 08:44 schrieb Huang Rui:
> > ttm_trace_dma_map/ttm_trace_dma_unmap is never used anymore. Move the pr_fmt
> > prefix into this header.
> >
> > Signed-off-by: Huang Rui 
> > ---
> >   include/drm/ttm/ttm_debug.h | 4 ++--
> >   1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/drm/ttm/ttm_debug.h b/include/drm/ttm/ttm_debug.h
> > index b5e460f..bd7cf37 100644
> > --- a/include/drm/ttm/ttm_debug.h
> > +++ b/include/drm/ttm/ttm_debug.h
> > @@ -27,5 +27,5 @@
> >   /*
> >* Authors: Tom St Denis 
> >*/
> > -extern void ttm_trace_dma_map(struct device *dev, struct ttm_dma_tt *tt);
> > -extern void ttm_trace_dma_unmap(struct device *dev, struct ttm_dma_tt *tt);
> 
> I would just completely remove the file since it isn't used any more.
> 
> > +
> > +#define pr_fmt(fmt) "[TTM] " fmt
> 
> Oh, that is most likely not a good idea. The pr_fmt define should be set 
> for each file individually or otherwise we could accidentally include 
> the file in a driver.
> 

OK. I will modify the patch to remove the header completely.

Thanks,
Ray

> Regards,
> Christian.
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/ttm: clean up the inteface which is not used

2020-03-25 Thread Huang Rui
On Thu, Mar 26, 2020 at 01:30:31AM +0800, Christian König wrote:
> Am 25.03.20 um 18:27 schrieb Alex Deucher:
> > On Wed, Mar 25, 2020 at 1:20 PM Christian König
> >  wrote:
> >> Am 25.03.20 um 16:34 schrieb Huang Rui:
> >>> invalidate_caches is actually not used, so clean it up.
> >>>
> >>> Signed-off-by: Huang Rui 
> >> Already had the same patch around for a while, looks like I've just
> >> forgot to commit it.
> >>
> >> Reviewed-by: Christian König 
> >>
> > Is it already in drm-misc and just hasn't made it into
> > amd-staging-drm-next yet?  I can try and rebase next week if so.
> 
> Ah! Yeah there it is:
> 
> > commit 5e791166d377c539db0f889e7793204912c374da
> > Author: Christian König 
> > Date:   Fri Jan 10 16:09:54 2020 +0100
> >
> >     drm/ttm: nuke invalidate_caches callback
> >
> >     Another completely unused feature.
> >
> >     Signed-off-by: Christian König 
> >     Reviewed-by: Huang Rui 
> >     Link: 
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F348265%2Fdata=02%7C01%7Cray.huang%40amd.com%7C544ea2a584b94dd75d8808d7d0e23993%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637207542363429381sdata=j9xr%2B%2F0apX3fcSVWRfljfRD%2BJVDzMe7tZ1%2FKVqZysjo%3Dreserved=0
> 
> Looks like we haven't merged that into amd-staging-drm-next yet.

I also forgot to look at this patch before. :-)
OK, let's use your orignal patch.

Thanks,
Ray

> 
> Christian.
> 
> >
> > Alex
> >
> >
> >>> ---
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|  6 --
> >>>drivers/gpu/drm/nouveau/nouveau_bo.c   |  7 ---
> >>>drivers/gpu/drm/qxl/qxl_ttm.c  |  6 --
> >>>drivers/gpu/drm/radeon/radeon_ttm.c|  6 --
> >>>drivers/gpu/drm/ttm/ttm_bo.c   |  8 +---
> >>>drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  6 --
> >>>include/drm/ttm/ttm_bo_driver.h| 13 -
> >>>7 files changed, 1 insertion(+), 51 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >>> index cd2bde6..b397148 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >>> @@ -62,11 +62,6 @@
> >>>
> >>>#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
> >>>
> >>> -static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t 
> >>> flags)
> >>> -{
> >>> - return 0;
> >>> -}
> >>> -
> >>>/**
> >>> * amdgpu_init_mem_type - Initialize a memory manager for a specific 
> >>> type of
> >>> * memory request.
> >>> @@ -1746,7 +1741,6 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
> >>>.ttm_tt_create = _ttm_tt_create,
> >>>.ttm_tt_populate = _ttm_tt_populate,
> >>>.ttm_tt_unpopulate = _ttm_tt_unpopulate,
> >>> - .invalidate_caches = _invalidate_caches,
> >>>.init_mem_type = _init_mem_type,
> >>>.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
> >>>.evict_flags = _evict_flags,
> >>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
> >>> b/drivers/gpu/drm/nouveau/nouveau_bo.c
> >>> index 1b62ccc..7dd94e6 100644
> >>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> >>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> >>> @@ -647,13 +647,6 @@ nouveau_ttm_tt_create(struct ttm_buffer_object *bo, 
> >>> uint32_t page_flags)
> >>>}
> >>>
> >>>static int
> >>> -nouveau_bo_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
> >>> -{
> >>> - /* We'll do this from user space. */
> >>> - return 0;
> >>> -}
> >>> -
> >>> -static int
> >>>nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
> >>> struct ttm_mem_type_manager *man)
> >>>{
> >>> diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
> >>> index 16a5e90..62a5e42 100644
> >>> --- a/drivers/gpu/drm/qxl/qxl_ttm.c
> >>> +++ b/drivers/gpu/drm/qxl/qxl_ttm.c
> >>> @@ -48,11 +48,6 @@ static s

[PATCH] drm/ttm: clean up the inteface which is not used

2020-03-25 Thread Huang Rui
invalidate_caches is actually not used, so clean it up.

Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|  6 --
 drivers/gpu/drm/nouveau/nouveau_bo.c   |  7 ---
 drivers/gpu/drm/qxl/qxl_ttm.c  |  6 --
 drivers/gpu/drm/radeon/radeon_ttm.c|  6 --
 drivers/gpu/drm/ttm/ttm_bo.c   |  8 +---
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  6 --
 include/drm/ttm/ttm_bo_driver.h| 13 -
 7 files changed, 1 insertion(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index cd2bde6..b397148 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -62,11 +62,6 @@
 
 #define AMDGPU_TTM_VRAM_MAX_DW_READ(size_t)128
 
-static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-   return 0;
-}
-
 /**
  * amdgpu_init_mem_type - Initialize a memory manager for a specific type of
  * memory request.
@@ -1746,7 +1741,6 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
.ttm_tt_create = _ttm_tt_create,
.ttm_tt_populate = _ttm_tt_populate,
.ttm_tt_unpopulate = _ttm_tt_unpopulate,
-   .invalidate_caches = _invalidate_caches,
.init_mem_type = _init_mem_type,
.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
.evict_flags = _evict_flags,
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 1b62ccc..7dd94e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -647,13 +647,6 @@ nouveau_ttm_tt_create(struct ttm_buffer_object *bo, 
uint32_t page_flags)
 }
 
 static int
-nouveau_bo_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-   /* We'll do this from user space. */
-   return 0;
-}
-
-static int
 nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 struct ttm_mem_type_manager *man)
 {
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 16a5e90..62a5e42 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -48,11 +48,6 @@ static struct qxl_device *qxl_get_qdev(struct ttm_bo_device 
*bdev)
return qdev;
 }
 
-static int qxl_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-   return 0;
-}
-
 static int qxl_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 struct ttm_mem_type_manager *man)
 {
@@ -256,7 +251,6 @@ static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
 
 static struct ttm_bo_driver qxl_bo_driver = {
.ttm_tt_create = _ttm_tt_create,
-   .invalidate_caches = _invalidate_caches,
.init_mem_type = _init_mem_type,
.eviction_valuable = ttm_bo_eviction_valuable,
.evict_flags = _evict_flags,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
b/drivers/gpu/drm/radeon/radeon_ttm.c
index f4af6703..40282bf 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -66,11 +66,6 @@ static struct radeon_device *radeon_get_rdev(struct 
ttm_bo_device *bdev)
return rdev;
 }
 
-static int radeon_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-   return 0;
-}
-
 static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
struct ttm_mem_type_manager *man)
 {
@@ -774,7 +769,6 @@ static struct ttm_bo_driver radeon_bo_driver = {
.ttm_tt_create = _ttm_tt_create,
.ttm_tt_populate = _ttm_tt_populate,
.ttm_tt_unpopulate = _ttm_tt_unpopulate,
-   .invalidate_caches = _invalidate_caches,
.init_mem_type = _init_mem_type,
.eviction_valuable = ttm_bo_eviction_valuable,
.evict_flags = _evict_flags,
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2445e2b..fd09bbb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -343,14 +343,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object 
*bo,
}
 
 moved:
-   if (bo->evicted) {
-   if (bdev->driver->invalidate_caches) {
-   ret = bdev->driver->invalidate_caches(bdev, 
bo->mem.placement);
-   if (ret)
-   pr_err("Can not flush read caches\n");
-   }
+   if (bo->evicted)
bo->evicted = false;
-   }
 
if (bo->mem.mm_node)
bo->offset = (bo->mem.start << PAGE_SHIFT) +
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index d8ea3dd..3f3b2c7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -736,11 +736,6 @@ static struct ttm_tt *vmw_ttm_tt_create(struct 
ttm_buffer_

Re: [PATCH v2 -next] drm/amdkfd: Fix -Wunused-const-variable warning

2020-09-10 Thread Huang Rui
On Thu, Sep 10, 2020 at 03:50:06PM +0800, YueHaibing wrote:
> If KFD_SUPPORT_IOMMU_V2 is not set, gcc warns:
> 
> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device.c:121:37: warning: 
> ‘raven_device_info’ defined but not used [-Wunused-const-variable=]
>  static const struct kfd_device_info raven_device_info = {
>  ^~~~~~~~~
> 
> As Huang Rui suggested, Raven already has the fallback path,
> so it should be out of IOMMU v2 flag.
> 
> Suggested-by: Huang Rui 
> Signed-off-by: YueHaibing 

Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 0e71a0543f98..e3fc6ed7b79c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -503,8 +503,8 @@ static const struct kfd_device_info 
> *kfd_supported_devices[][2] = {
>  #ifdef KFD_SUPPORT_IOMMU_V2
>   [CHIP_KAVERI] = {_device_info, NULL},
>   [CHIP_CARRIZO] = {_device_info, NULL},
> - [CHIP_RAVEN] = {_device_info, NULL},
>  #endif
> + [CHIP_RAVEN] = {_device_info, NULL},
>   [CHIP_HAWAII] = {_device_info, NULL},
>   [CHIP_TONGA] = {_device_info, NULL},
>   [CHIP_FIJI] = {_device_info, _vf_device_info},
> -- 
> 2.17.1
> 
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH -next] drm/amdkfd: Fix -Wunused-const-variable warning

2020-09-10 Thread Huang Rui
On Thu, Sep 10, 2020 at 10:55:32AM +0800, YueHaibing wrote:
> If KFD_SUPPORT_IOMMU_V2 is not set, gcc warns:
> 
> drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_device.c:121:37: warning: 
> ‘raven_device_info’ defined but not used [-Wunused-const-variable=]
>  static const struct kfd_device_info raven_device_info = {
>  ^
> 
> Move it to ifdef block.
> 
> Signed-off-by: YueHaibing 
> ---

Raven already has the fallback path, so it should be out of IOMMU v2 flag.

You may want to move raven_device_info out of IOMMU v2 flag in 
kfd_supported_devices[][2] as well.

Thanks,
Ray

>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 0e71a0543f98..cae4df259e26 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -116,7 +116,6 @@ static const struct kfd_device_info carrizo_device_info = 
> {
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 2,
>  };
> -#endif
>  
>  static const struct kfd_device_info raven_device_info = {
>   .asic_family = CHIP_RAVEN,
> @@ -135,6 +134,7 @@ static const struct kfd_device_info raven_device_info = {
>   .num_xgmi_sdma_engines = 0,
>   .num_sdma_queues_per_engine = 2,
>  };
> +#endif
>  
>  static const struct kfd_device_info hawaii_device_info = {
>   .asic_family = CHIP_HAWAII,
> -- 
> 2.17.1
> 
> 
> ___
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-develdata=02%7C01%7Cray.huang%40amd.com%7Ce8805fd43e9a4ad33cd008d8555a567a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637353193795034052sdata=HqUGjUpQtfCkZ3wA1%2F6HTCZrn%2F4%2BuQORTobzaIYa%2BNs%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 01/11] drm/ttm: add ttm_bo_pin()/ttm_bo_unpin() v2

2020-09-22 Thread Huang Rui
On Tue, Sep 22, 2020 at 09:31:58PM +0800, Christian König wrote:
> As an alternative to the placement flag add a
> pin count to the ttm buffer object.
> 
> v2: add dma_resv_assert_help() calls
> 
> Signed-off-by: Christian König 

Series look good for me as well.

Reviewed-by: Huang Rui 

Only one comment:

We can modify the TOPDOWN offset as 21 since the NO_EVICT is removed.

 #define TTM_PL_FLAG_UNCACHED(1 << 17)
 #define TTM_PL_FLAG_WC  (1 << 18)
 #define TTM_PL_FLAG_CONTIGUOUS  (1 << 19)
-#define TTM_PL_FLAG_NO_EVICT(1 << 21)
 #define TTM_PL_FLAG_TOPDOWN (1 << 22)

Thanks,
Ray

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c  |  9 ++---
>  drivers/gpu/drm/ttm/ttm_bo_util.c |  2 +-
>  include/drm/ttm/ttm_bo_api.h  | 26 ++
>  3 files changed, 33 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 70b3bee27850..b82b49d43942 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -115,7 +115,7 @@ static void ttm_bo_add_mem_to_lru(struct 
> ttm_buffer_object *bo,
>   struct ttm_bo_device *bdev = bo->bdev;
>   struct ttm_resource_manager *man;
>  
> - if (!list_empty(>lru))
> + if (!list_empty(>lru) || bo->pin_count)
>   return;
>  
>   if (mem->placement & TTM_PL_FLAG_NO_EVICT)
> @@ -165,7 +165,8 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>   ttm_bo_del_from_lru(bo);
>   ttm_bo_add_mem_to_lru(bo, >mem);
>  
> - if (bulk && !(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
> + if (bulk && !(bo->mem.placement & TTM_PL_FLAG_NO_EVICT) &&
> + !bo->pin_count) {
>   switch (bo->mem.mem_type) {
>   case TTM_PL_TT:
>   ttm_bo_bulk_move_set_pos(>tt[bo->priority], bo);
> @@ -544,8 +545,9 @@ static void ttm_bo_release(struct kref *kref)
>* shrinkers, now that they are queued for
>* destruction.
>*/
> - if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) {
> + if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT || bo->pin_count) {
>   bo->mem.placement &= ~TTM_PL_FLAG_NO_EVICT;
> + bo->pin_count = 0;
>   ttm_bo_del_from_lru(bo);
>   ttm_bo_add_mem_to_lru(bo, >mem);
>   }
> @@ -1172,6 +1174,7 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
>   bo->moving = NULL;
>   bo->mem.placement = TTM_PL_FLAG_CACHED;
>   bo->acc_size = acc_size;
> + bo->pin_count = 0;
>   bo->sg = sg;
>   if (resv) {
>   bo->base.resv = resv;
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
> b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index fb2a25f8408f..1968df9743fc 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -352,7 +352,6 @@ static int ttm_buffer_object_transfer(struct 
> ttm_buffer_object *bo,
>   return -ENOMEM;
>  
>   fbo->base = *bo;
> - fbo->base.mem.placement |= TTM_PL_FLAG_NO_EVICT;
>  
>   ttm_bo_get(bo);
>   fbo->bo = bo;
> @@ -372,6 +371,7 @@ static int ttm_buffer_object_transfer(struct 
> ttm_buffer_object *bo,
>   kref_init(>base.kref);
>   fbo->base.destroy = _transfered_destroy;
>   fbo->base.acc_size = 0;
> + fbo->base.pin_count = 1;
>   if (bo->type != ttm_bo_type_sg)
>   fbo->base.base.resv = >base.base._resv;
>  
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 0f7cd21d6d74..33aca60870e2 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -157,6 +157,7 @@ struct ttm_buffer_object {
>  
>   struct dma_fence *moving;
>   unsigned priority;
> + unsigned pin_count;
>  
>   /**
>* Special members that are protected by the reserve lock
> @@ -606,6 +607,31 @@ static inline bool 
> ttm_bo_uses_embedded_gem_object(struct ttm_buffer_object *bo)
>   return bo->base.dev != NULL;
>  }
>  
> +/**
> + * ttm_bo_pin - Pin the buffer object.
> + * @bo: The buffer object to pin
> + *
> + * Make sure the buffer is not evicted any more during memory pressure.
> + */
> +static inline void ttm_bo_pin(struct ttm_buffer_object *bo)
> +{
> + dma_resv_assert_held(bo->base.resv);
> + ++bo->pin_count;
> +}
> +
> +/**
> + * ttm_bo_unpin - Unpin the buffer object.
> + * @bo: The buffer object to

Re: [PATCH] drm/ttm: update kernel-doc line comments

2020-09-18 Thread Huang Rui
On Fri, Sep 18, 2020 at 05:52:58PM +0800, Tian Tao wrote:
> Update kernel-doc line comments to fix warnings reported by make W=1.
> 
> drivers/gpu/drm/ttm/ttm_memory.c:271: warning: Function parameter or
> member 'glob' not described in 'ttm_shrink'
> drivers/gpu/drm/ttm/ttm_memory.c:271: warning: Function parameter or
> member 'from_wq' not described in 'ttm_shrink'
> drivers/gpu/drm/ttm/ttm_memory.c:271: warning: Function parameter or
> member 'extra' not described in 'ttm_shrink'
> drivers/gpu/drm/ttm/ttm_memory.c:271: warning: Function parameter or
> member 'ctx' not described in 'ttm_shrink'
> 
> Signed-off-by: Tian Tao 

Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_memory.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
> b/drivers/gpu/drm/ttm/ttm_memory.c
> index acd63b7..0b51773 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -259,7 +259,7 @@ static bool ttm_zones_above_swap_target(struct 
> ttm_mem_global *glob,
>   return false;
>  }
>  
> -/**
> +/*
>   * At this point we only support a single shrink callback.
>   * Extend this if needed, perhaps using a linked list of callbacks.
>   * Note that this function is reentrant:
> -- 
> 2.7.4
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/ttm: Warn on pinning without holding a reference

2020-10-28 Thread Huang Rui
On Wed, Oct 28, 2020 at 07:31:20PM +0800, Daniel Vetter wrote:
> Not technically a problem for ttm, but very likely a driver bug and
> pretty big time confusing for reviewing code.
> 
> So warn about it, both at cleanup time (so we catch these for sure)
> and at pin/unpin time (so we know who's the culprit).
> 
> Signed-off-by: Daniel Vetter 

Reviewed-by: Huang Rui 

> Cc: Christian Koenig 
> Cc: Huang Rui 
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 2 +-
>  include/drm/ttm/ttm_bo_api.h | 2 ++
>  2 files changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index f51b5e20fa17..a011072ab61d 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -509,7 +509,7 @@ static void ttm_bo_release(struct kref *kref)
>* shrinkers, now that they are queued for
>* destruction.
>*/
> - if (bo->pin_count) {
> + if (WARN_ON(bo->pin_count)) {
>   bo->pin_count = 0;
>   ttm_bo_del_from_lru(bo);
>   ttm_bo_add_mem_to_lru(bo, >mem);
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 37102e45e496..b45aee23d7d0 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -571,6 +571,7 @@ static inline bool ttm_bo_uses_embedded_gem_object(struct 
> ttm_buffer_object *bo)
>  static inline void ttm_bo_pin(struct ttm_buffer_object *bo)
>  {
>   dma_resv_assert_held(bo->base.resv);
> + WARN_ON_ONCE(!kref_read(>kref));
>   ++bo->pin_count;
>  }
>  
> @@ -584,6 +585,7 @@ static inline void ttm_bo_unpin(struct ttm_buffer_object 
> *bo)
>  {
>   dma_resv_assert_held(bo->base.resv);
>   WARN_ON_ONCE(!bo->pin_count);
> + WARN_ON_ONCE(!kref_read(>kref));
>   --bo->pin_count;
>  }
>  
> -- 
> 2.28.0
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 1/9] drm/ttm: new TT backend allocation pool v2

2020-10-27 Thread Huang Rui
On Mon, Oct 26, 2020 at 06:41:09PM +0100, Christian König wrote:
> This replaces the spaghetti code in the two existing page pools.
> 
> First of all depending on the allocation size it is between 3 (1GiB) and
> 5 (1MiB) times faster than the old implementation.
> 
> It makes better use of buddy pages to allow for larger physical contiguous
> allocations which should result in better TLB utilization at least for
> amdgpu.
> 
> Instead of a completely braindead approach of filling the pool with one
> CPU while another one is trying to shrink it we only give back freed
> pages.
> 
> This also results in much less locking contention and a trylock free MM
> shrinker callback, so we can guarantee that pages are given back to the
> system when needed.
> 
> Downside of this is that it takes longer for many small allocations until
> the pool is filled up. We could address this, but I couldn't find an use
> case where this actually matters. We also don't bother freeing large
> chunks of pages any more since the CPU overhead in that path isn't really
> that important.
> 
> The sysfs files are replaced with a single module parameter, allowing
> users to override how many pages should be globally pooled in TTM. This
> unfortunately breaks the UAPI slightly, but as far as we know nobody ever
> depended on this.
> 
> Zeroing memory coming from the pool was handled inconsistently. The
> alloc_pages() based pool was zeroing it, the dma_alloc_attr() based one
> wasn't. For now the new implementation isn't zeroing pages from the pool
> either and only sets the __GFP_ZERO flag when necessary.
> 
> The implementation has only 768 lines of code compared to the over 2600
> of the old one, and also allows for saving quite a bunch of code in the
> drivers since we don't need specialized handling there any more based on
> kernel config.
> 
> Additional to all of that there was a neat bug with IOMMU, coherent DMA
> mappings and huge pages which is now fixed in the new code as well.
> 
> v2: make ttm_pool_apply_caching static as reported by the kernel bot, add
> some more checks
> 
> Signed-off-by: Christian König 

Just verified them in my renoir platform, the page faults are gone.
Thanks!

Series are Tested-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/Makefile |   2 +-
>  drivers/gpu/drm/ttm/ttm_memory.c |   3 +
>  drivers/gpu/drm/ttm/ttm_pool.c   | 668 +++
>  include/drm/ttm/ttm_caching.h|   2 +
>  include/drm/ttm/ttm_pool.h   |  90 +
>  5 files changed, 764 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/ttm/ttm_pool.c
>  create mode 100644 include/drm/ttm/ttm_pool.h
> 
> diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
> index 90c0da88cc98..0096bacbcf32 100644
> --- a/drivers/gpu/drm/ttm/Makefile
> +++ b/drivers/gpu/drm/ttm/Makefile
> @@ -5,7 +5,7 @@
>  ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \
>   ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
>   ttm_execbuf_util.o ttm_page_alloc.o ttm_range_manager.o \
> - ttm_resource.o
> + ttm_resource.o ttm_pool.o
>  ttm-$(CONFIG_AGP) += ttm_agp_backend.o
>  ttm-$(CONFIG_DRM_TTM_DMA_PAGE_POOL) += ttm_page_alloc_dma.o
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
> b/drivers/gpu/drm/ttm/ttm_memory.c
> index 69cf622e79e5..3012d0388c51 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -38,6 +38,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define TTM_MEMORY_ALLOC_RETRIES 4
>  
> @@ -453,6 +454,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
>   }
>   ttm_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE));
>   ttm_dma_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE));
> + ttm_pool_mgr_init(glob->zone_kernel->max_mem/(2*PAGE_SIZE));
>   return 0;
>  out_no_zone:
>   ttm_mem_global_release(glob);
> @@ -467,6 +469,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
>   /* let the page allocator first stop the shrink work. */
>   ttm_page_alloc_fini();
>   ttm_dma_page_alloc_fini();
> + ttm_pool_mgr_fini();
>  
>   flush_workqueue(glob->swap_queue);
>   destroy_workqueue(glob->swap_queue);
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> new file mode 100644
> index ..d25712e3ad3b
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -0,0 +1,668 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Copyright 2020 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy

Re: [PATCH 1/9] drm/ttm: new TT backend allocation pool v2

2020-10-27 Thread Huang Rui
On Mon, Oct 26, 2020 at 06:41:09PM +0100, Christian König wrote:
> This replaces the spaghetti code in the two existing page pools.
> 
> First of all depending on the allocation size it is between 3 (1GiB) and
> 5 (1MiB) times faster than the old implementation.
> 
> It makes better use of buddy pages to allow for larger physical contiguous
> allocations which should result in better TLB utilization at least for
> amdgpu.
> 
> Instead of a completely braindead approach of filling the pool with one
> CPU while another one is trying to shrink it we only give back freed
> pages.
> 
> This also results in much less locking contention and a trylock free MM
> shrinker callback, so we can guarantee that pages are given back to the
> system when needed.
> 
> Downside of this is that it takes longer for many small allocations until
> the pool is filled up. We could address this, but I couldn't find an use
> case where this actually matters. We also don't bother freeing large
> chunks of pages any more since the CPU overhead in that path isn't really
> that important.
> 
> The sysfs files are replaced with a single module parameter, allowing
> users to override how many pages should be globally pooled in TTM. This
> unfortunately breaks the UAPI slightly, but as far as we know nobody ever
> depended on this.
> 
> Zeroing memory coming from the pool was handled inconsistently. The
> alloc_pages() based pool was zeroing it, the dma_alloc_attr() based one
> wasn't. For now the new implementation isn't zeroing pages from the pool
> either and only sets the __GFP_ZERO flag when necessary.
> 
> The implementation has only 768 lines of code compared to the over 2600
> of the old one, and also allows for saving quite a bunch of code in the
> drivers since we don't need specialized handling there any more based on
> kernel config.
> 
> Additional to all of that there was a neat bug with IOMMU, coherent DMA
> mappings and huge pages which is now fixed in the new code as well.
> 
> v2: make ttm_pool_apply_caching static as reported by the kernel bot, add
> some more checks
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/ttm/Makefile |   2 +-
>  drivers/gpu/drm/ttm/ttm_memory.c |   3 +
>  drivers/gpu/drm/ttm/ttm_pool.c   | 668 +++
>  include/drm/ttm/ttm_caching.h|   2 +
>  include/drm/ttm/ttm_pool.h   |  90 +
>  5 files changed, 764 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/ttm/ttm_pool.c
>  create mode 100644 include/drm/ttm/ttm_pool.h
> 
> diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
> index 90c0da88cc98..0096bacbcf32 100644
> --- a/drivers/gpu/drm/ttm/Makefile
> +++ b/drivers/gpu/drm/ttm/Makefile
> @@ -5,7 +5,7 @@
>  ttm-y := ttm_memory.o ttm_tt.o ttm_bo.o \
>   ttm_bo_util.o ttm_bo_vm.o ttm_module.o \
>   ttm_execbuf_util.o ttm_page_alloc.o ttm_range_manager.o \
> - ttm_resource.o
> + ttm_resource.o ttm_pool.o
>  ttm-$(CONFIG_AGP) += ttm_agp_backend.o
>  ttm-$(CONFIG_DRM_TTM_DMA_PAGE_POOL) += ttm_page_alloc_dma.o
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
> b/drivers/gpu/drm/ttm/ttm_memory.c
> index 69cf622e79e5..3012d0388c51 100644
> --- a/drivers/gpu/drm/ttm/ttm_memory.c
> +++ b/drivers/gpu/drm/ttm/ttm_memory.c
> @@ -38,6 +38,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define TTM_MEMORY_ALLOC_RETRIES 4
>  
> @@ -453,6 +454,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
>   }
>   ttm_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE));
>   ttm_dma_page_alloc_init(glob, glob->zone_kernel->max_mem/(2*PAGE_SIZE));
> + ttm_pool_mgr_init(glob->zone_kernel->max_mem/(2*PAGE_SIZE));
>   return 0;
>  out_no_zone:
>   ttm_mem_global_release(glob);
> @@ -467,6 +469,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
>   /* let the page allocator first stop the shrink work. */
>   ttm_page_alloc_fini();
>   ttm_dma_page_alloc_fini();
> + ttm_pool_mgr_fini();
>  
>   flush_workqueue(glob->swap_queue);
>   destroy_workqueue(glob->swap_queue);
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> new file mode 100644
> index ..d25712e3ad3b
> --- /dev/null
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -0,0 +1,668 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Copyright 2020 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be 

Re: drm/ttm: new TT backend allocation pool

2020-10-26 Thread Huang Rui
On Sun, Oct 25, 2020 at 11:40:47PM +0800, Christian König wrote:
> This replaces the spaghetti code in the two existing page pools.
> 
> First of all depending on the allocation size it is between 3 (1GiB) and
> 5 (1MiB) times faster than the old implementation.
> 
> It makes better use of buddy pages to allow for larger physical contiguous
> allocations which should result in better TLB utilization at least for amdgpu.
> 
> Instead of a completely braindead approach of filling the pool with one CPU
> while another one is trying to shrink it we only give back freed pages.
> 
> This also results in much less locking contention and a trylock free MM
> shrinker callback, so we can guarantee that pages are given back to the system
> when needed.
> 
> Downside of this is that it takes longer for many small allocations until the
> pool is filled up. We could address this, but I couldn't find an use case
> where this actually matters. And we don't bother freeing large chunks of pages
> any more.
> 
> The sysfs files are replaced with a single module parameter, allowing users to
> override how many pages should be globally pooled in TTM. This unfortunately
> breaks the UAPI slightly, but as far as we know nobody ever depended on this.
>
> Zeroing memory coming from the pool was handled inconsistently. The
> alloc_pages() based pool was zeroing it, the dma_alloc_attr() based one 
> wasn't.
> The new implementation isn't zeroing pages from the pool either and only sets
> the __GFP_ZERO flag when necessary.
> 
> The implementation has only 753 lines of code compared to the over 2600 of the
> old one, and also allows for saving quite a bunch of code in the drivers since
> we don't need specialized handling there any more based on kernel config.
>   
> Additional to all of that there was a neat bug with IOMMU, coherent DMA
> mappings and huge pages which is now fixed in the new code as well.

Let me give a test your patch set in my renoir and new vangogh platform
with iommu enabled. (I actually encountered many page faults in legacy
ttm_page_alloc_dma implementation with swiotlb=1)

Thanks,
Ray
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm: amdgpu: pm: Mark vangogh_clk_dpm_is_enabled() as static

2021-01-12 Thread Huang Rui
On Wed, Jan 13, 2021 at 03:57:22AM +0800, Souptick Joarder wrote:
> kernel test robot throws below warnings ->
> 
> drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu11/vangogh_ppt.c:594:6:
> warning: no previous prototype for 'vangogh_clk_dpm_is_enabled'
> [-Wmissing-prototypes]
> drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu11/vangogh_ppt.c:594:6:
> warning: no previous prototype for function 'vangogh_clk_dpm_is_enabled'
> [-Wmissing-prototypes]
> 
> Mark vangogh_clk_dpm_is_enabled() as static.
> 
> Reported-by: kernel test robot 
> Signed-off-by: Souptick Joarder 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> index 75ddcad..3ffe56e 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> @@ -610,7 +610,7 @@ static int vangogh_get_profiling_clk_mask(struct 
> smu_context *smu,
>   return 0;
>  }
>  
> -bool vangogh_clk_dpm_is_enabled(struct smu_context *smu,
> +static bool vangogh_clk_dpm_is_enabled(struct smu_context *smu,
>   enum smu_clk_type clk_type)

Ah, I have another patch which will use this function in another file.

Thanks,
Ray

>  {
>   enum smu_feature_mask feature_id = 0;
> -- 
> 1.9.1
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/ttm: make the pool shrinker lock a mutex

2021-01-12 Thread Huang Rui
On Tue, Jan 12, 2021 at 06:49:18PM +0800, Christian König wrote:
> Ping? Ray can I get an acked-by? It's an important bug fix.
> 
> Thanks,
> Christian.
> 
> Am 11.01.21 um 14:57 schrieb Christian König:
> > set_pages_wb() might sleep and so we can't do this in an atomic context.
> >
> > Signed-off-by: Christian König 
> > Reported-by: Mikhail Gavrilov 
> > Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3")

Sorry, missed this mail yesterday.

Patch looks good for me.

Reviewed-by: Huang Rui 

> > ---
> >   drivers/gpu/drm/ttm/ttm_pool.c | 20 ++--
> >   1 file changed, 10 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > index a00b7ab9c14c..6a6eeba423d1 100644
> > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > @@ -66,7 +66,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER];
> >   static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> >   static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> >   
> > -static spinlock_t shrinker_lock;
> > +static struct mutex shrinker_lock;
> >   static struct list_head shrinker_list;
> >   static struct shrinker mm_shrinker;
> >   
> > @@ -249,9 +249,9 @@ static void ttm_pool_type_init(struct ttm_pool_type 
> > *pt, struct ttm_pool *pool,
> > spin_lock_init(>lock);
> > INIT_LIST_HEAD(>pages);
> >   
> > -   spin_lock(_lock);
> > +   mutex_lock(_lock);
> > list_add_tail(>shrinker_list, _list);
> > -   spin_unlock(_lock);
> > +   mutex_unlock(_lock);
> >   }
> >   
> >   /* Remove a pool_type from the global shrinker list and free all pages */
> > @@ -259,9 +259,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
> >   {
> > struct page *p, *tmp;
> >   
> > -   spin_lock(_lock);
> > +   mutex_lock(_lock);
> > list_del(>shrinker_list);
> > -   spin_unlock(_lock);
> > +   mutex_unlock(_lock);
> >   
> > list_for_each_entry_safe(p, tmp, >pages, lru)
> > ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> > @@ -302,7 +302,7 @@ static unsigned int ttm_pool_shrink(void)
> > unsigned int num_freed;
> > struct page *p;
> >   
> > -   spin_lock(_lock);
> > +   mutex_lock(_lock);
> > pt = list_first_entry(_list, typeof(*pt), shrinker_list);
> >   
> > p = ttm_pool_type_take(pt);
> > @@ -314,7 +314,7 @@ static unsigned int ttm_pool_shrink(void)
> > }
> >   
> > list_move_tail(>shrinker_list, _list);
> > -   spin_unlock(_lock);
> > +   mutex_unlock(_lock);
> >   
> > return num_freed;
> >   }
> > @@ -564,7 +564,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct 
> > seq_file *m)
> >   {
> > unsigned int i;
> >   
> > -   spin_lock(_lock);
> > +   mutex_lock(_lock);
> >   
> > seq_puts(m, "\t ");
> > for (i = 0; i < MAX_ORDER; ++i)
> > @@ -600,7 +600,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct 
> > seq_file *m)
> > seq_printf(m, "\ntotal\t: %8lu of %8lu\n",
> >atomic_long_read(_pages), page_pool_size);
> >   
> > -   spin_unlock(_lock);
> > +   mutex_unlock(_lock);
> >   
> > return 0;
> >   }
> > @@ -644,7 +644,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> > if (!page_pool_size)
> > page_pool_size = num_pages;
> >   
> > -   spin_lock_init(_lock);
> > +   mutex_init(_lock);
> > INIT_LIST_HEAD(_list);
> >   
> > for (i = 0; i < MAX_ORDER; ++i) {
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/ttm: cleanup BO size handling

2020-12-08 Thread Huang Rui
On Tue, Dec 08, 2020 at 12:33:00AM +0800, Christian König wrote:
> Based on an idea from Dave, but cleaned up a bit.
> 
> We had multiple fields for essentially the same thing.
> 
> Now bo->size is the original size of the BO in arbitrary
> units, usually bytes.
> 
> bo->mem.num_pages is the size in number of pages in the
> resource domain of bo->mem.mem_type.
> 
> Signed-off-by: Christian König 

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c|  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  4 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  6 ++--
>  drivers/gpu/drm/amd/amdgpu/mes_v10_1.c|  2 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.c  | 10 +++---
>  drivers/gpu/drm/nouveau/nouveau_display.c |  8 ++---
>  drivers/gpu/drm/nouveau/nouveau_prime.c   |  4 +--
>  drivers/gpu/drm/nouveau/nv17_fence.c  |  2 +-
>  drivers/gpu/drm/nouveau/nv50_fence.c  |  2 +-
>  drivers/gpu/drm/qxl/qxl_object.h  |  2 +-
>  drivers/gpu/drm/radeon/radeon_cs.c|  3 +-
>  drivers/gpu/drm/radeon/radeon_object.c| 13 
>  drivers/gpu/drm/radeon/radeon_object.h|  4 +--
>  drivers/gpu/drm/radeon/radeon_prime.c |  4 +--
>  drivers/gpu/drm/radeon/radeon_trace.h |  2 +-
>  drivers/gpu/drm/radeon/radeon_ttm.c   |  2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c  | 33 ++-
>  drivers/gpu/drm/ttm/ttm_bo_util.c | 11 +++
>  drivers/gpu/drm/ttm/ttm_bo_vm.c   |  6 ++--
>  drivers/gpu/drm/ttm/ttm_tt.c  |  2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_blit.c  |  4 +--
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c|  6 ++--
>  drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c   |  2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c   |  4 +--
>  drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c |  5 ++-
>  drivers/gpu/drm/vmwgfx/vmwgfx_kms.c   |  2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c|  8 ++---
>  drivers/gpu/drm/vmwgfx/vmwgfx_resource.c  |  2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_shader.c|  3 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c  |  4 +--
>  drivers/gpu/drm/vmwgfx/vmwgfx_surface.c   |  7 ++--
>  include/drm/ttm/ttm_bo_api.h  |  6 ++--
>  include/drm/ttm/ttm_resource.h|  1 -
>  36 files changed, 82 insertions(+), 100 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index e5919efca870..c4c93f19d273 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -269,7 +269,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct 
> dma_buf_attachment *attach,
>   case TTM_PL_TT:
>   sgt = drm_prime_pages_to_sg(obj->dev,
>   bo->tbo.ttm->pages,
> - bo->tbo.num_pages);
> + bo->tbo.ttm->num_pages);
>   if (IS_ERR(sgt))
>   return sgt;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 056cb87d09ea..52bcd1b5582f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -121,7 +121,7 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
>  {
>   struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>  
> - if (bo->num_pages != 1 || bo->ttm->caching == ttm_cached)
> + if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
>   return AMDGPU_BO_INVALID_OFFSET;
>  
>   if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index c6c9723d3d8a..381ecc4788d5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -787,7 +787,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
>   if (r < 0)
>   return r;
>  
> - r = ttm_bo_kmap(>tbo, 0, bo->tbo.num_pages, >kmap);
> + r = ttm_bo_kmap(>tbo, 0, bo->tbo.mem.num_pages, >kmap);
>   if (r)
>   return r;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index ed47

Re: [PATCH] drm/ttm: fix DMA32 handling in the global page pool

2020-11-19 Thread Huang Rui
On Thu, Nov 19, 2020 at 05:21:51PM +0800, Christian König wrote:
> Ping, can I get an rb or at least Acked-by for this?
> 
> Thanks in advance,
> Christian.
> 
> Am 17.11.20 um 16:53 schrieb Christian König:
> > When we have mixed DMA32 and non DMA32 device in one system
> > it could otherwise happen that the DMA32 device gets pages
> > it can't work with.
> >
> > Signed-off-by: Christian König 

Looks good for me.

Reviewed-by: Huang Rui 

> > ---
> >   drivers/gpu/drm/ttm/ttm_pool.c | 22 ++
> >   1 file changed, 22 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > index 1b96780b4989..5455b2044759 100644
> > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > @@ -63,6 +63,9 @@ static atomic_long_t allocated_pages;
> >   static struct ttm_pool_type global_write_combined[MAX_ORDER];
> >   static struct ttm_pool_type global_uncached[MAX_ORDER];
> >   
> > +static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> > +static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> > +
> >   static spinlock_t shrinker_lock;
> >   static struct list_head shrinker_list;
> >   static struct shrinker mm_shrinker;
> > @@ -290,8 +293,14 @@ static struct ttm_pool_type 
> > *ttm_pool_select_type(struct ttm_pool *pool,
> >   #ifdef CONFIG_X86
> > switch (caching) {
> > case ttm_write_combined:
> > +   if (pool->use_dma32)
> > +   return _dma32_write_combined[order];
> > +
> > return _write_combined[order];
> > case ttm_uncached:
> > +   if (pool->use_dma32)
> > +   return _dma32_uncached[order];
> > +
> > return _uncached[order];
> > default:
> > break;
> > @@ -570,6 +579,11 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct 
> > seq_file *m)
> > seq_puts(m, "uc\t:");
> > ttm_pool_debugfs_orders(global_uncached, m);
> >   
> > +   seq_puts(m, "wc 32\t:");
> > +   ttm_pool_debugfs_orders(global_dma32_write_combined, m);
> > +   seq_puts(m, "uc 32\t:");
> > +   ttm_pool_debugfs_orders(global_dma32_uncached, m);
> > +
> > for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
> > seq_puts(m, "DMA ");
> > switch (i) {
> > @@ -640,6 +654,11 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> > ttm_pool_type_init(_write_combined[i], NULL,
> >ttm_write_combined, i);
> > ttm_pool_type_init(_uncached[i], NULL, ttm_uncached, i);
> > +
> > +   ttm_pool_type_init(_dma32_write_combined[i], NULL,
> > +  ttm_write_combined, i);
> > +   ttm_pool_type_init(_dma32_uncached[i], NULL,
> > +  ttm_uncached, i);
> > }
> >   
> > mm_shrinker.count_objects = ttm_pool_shrinker_count;
> > @@ -660,6 +679,9 @@ void ttm_pool_mgr_fini(void)
> > for (i = 0; i < MAX_ORDER; ++i) {
> > ttm_pool_type_fini(_write_combined[i]);
> > ttm_pool_type_fini(_uncached[i]);
> > +
> > +   ttm_pool_type_fini(_dma32_write_combined[i]);
> > +   ttm_pool_type_fini(_dma32_uncached[i]);
> > }
> >   
> > unregister_shrinker(_shrinker);
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: 5.11-rc1 TTM list corruption

2021-01-04 Thread Huang Rui
On Mon, Jan 04, 2021 at 06:58:02PM +0800, Borislav Petkov wrote:
> On Fri, Jan 01, 2021 at 03:34:28PM +0100, Christian K?nig wrote:
> > Going to double check the code, but can you reproduce this issue
> > reliable?
> 
> Lemme find a test box which can trigger it too - the splat happened
> on my workstation and I'd like to avoid debugging there for obvious
> reasons.

Hi Boris, Christian,

I am reproducing this issue as well, are you using a Raven board?

Thanks,
Ray

> 
> Thx.
> 
> -- 
> Regards/Gruss,
> Boris.
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquettedata=04%7C01%7Cray.huang%40amd.com%7C33b48c914b5b4672ac7308d8b09f9a03%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637453546869304657%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=e3Fj4KGz5n0D9O0zGApDTfstJpNmeu6HSJN2oa8iSKA%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: 5.11-rc1 TTM list corruption

2021-01-05 Thread Huang Rui
On Tue, Jan 05, 2021 at 06:31:38PM +0800, Borislav Petkov wrote:
> Hi,
> 
> On Tue, Jan 05, 2021 at 12:12:13PM +0800, Huang Rui wrote:
> > I am reproducing this issue as well, are you using a Raven board?
> 
> I have no clue what Raven is. The workstation I triggered it once on, has:
> 
> [7.563968] [drm] radeon kernel modesetting enabled.
> [7.581417] [drm] initializing kernel modesetting (CAICOS 0x1002:0x6779 
> 0x174B:0xE164 0x00).

Ah, this asic is a bit old and still use radeon driver. So we didn't
reproduce it on amdgpu driver. I don't have such the old asic in my hand.
May we know whether this issue can be duplicated after SI which is used
amdgpu module (not sure whether you have recent APU or GPU)?

Thanks,
Ray

> [7.609217] [drm] Detected VRAM RAM=2048M, BAR=256M
> [7.614031] [drm] RAM width 64bits DDR
> [7.639665] [drm] radeon: 2048M of VRAM memory ready
> [7.644557] [drm] radeon: 1024M of GTT memory ready.
> [7.649451] [drm] Loading CAICOS Microcode
> [7.653548] [drm] Internal thermal controller without fan control
> [7.661221] [drm] radeon: dpm initialized
> [7.665227] [drm] GART: num cpu pages 262144, num gpu pages 262144
> [7.671821] [drm] enabling PCIE gen 2 link speeds, disable with 
> radeon.pcie_gen2=0
> [7.703858] [drm] PCIE GART of 1024M enabled (table at 0x00162000).
> [7.749689] [drm] radeon: irq initialized.
> [7.769826] [drm] ring test on 0 succeeded in 1 usecs
> [7.774797] [drm] ring test on 3 succeeded in 3 usecs
> [7.955500] [drm] ring test on 5 succeeded in 1 usecs
> [7.960468] [drm] UVD initialized successfully.
> [7.965047] [drm] ib test on ring 0 succeeded in 0 usecs
> [7.970316] [drm] ib test on ring 3 succeeded in 0 usecs
> [8.626877] [drm] ib test on ring 5 succeeded
> [8.631376] [drm] Radeon Display Connectors
> [8.635496] [drm] Connector 0:
> [8.638503] [drm]   HDMI-A-1
> [8.641339] [drm]   HPD2
> [8.643835] [drm]   DDC: 0x6440 0x6440 0x6444 0x6444 0x6448 0x6448 0x644c 
> 0x644c
> [8.651102] [drm]   Encoders:
> [8.654022] [drm] DFP1: INTERNAL_UNIPHY1
> [8.658224] [drm] Connector 1:
> [8.661232] [drm]   DVI-D-1
> [8.663982] [drm]   HPD4
> [8.666479] [drm]   DDC: 0x6460 0x6460 0x6464 0x6464 0x6468 0x6468 0x646c 
> 0x646c
> [8.673745] [drm]   Encoders:
> [8.676665] [drm] DFP2: INTERNAL_UNIPHY
> [8.680782] [drm] Connector 2:
> [8.683789] [drm]   VGA-1
> [8.686369] [drm]   DDC: 0x6430 0x6430 0x6434 0x6434 0x6438 0x6438 0x643c 
> 0x643c
> [8.693636] [drm]   Encoders:
> [8.696555] [drm] CRT1: INTERNAL_KLDSCP_DAC1
> [8.788923] [drm] fb mappable at 0xE0363000
> [8.793036] [drm] vram apper at 0xE000
> [8.797064] [drm] size 9216000
> [8.800071] [drm] fb depth is 24
> [8.803249] [drm]pitch is 7680
> [8.807106] fbcon: radeondrmfb (fb0) is primary device
> [8.918927] radeon :1d:00.0: [drm] fb0: radeondrmfb frame buffer device
> [8.938461] [drm] Initialized radeon 2.50.0 20080528 for :1d:00.0 on 
> minor 0
> 
> HTH.
> 
> -- 
> Regards/Gruss,
> Boris.
> 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpeople.kernel.org%2Ftglx%2Fnotes-about-netiquettedata=04%7C01%7Cray.huang%40amd.com%7C31b8dcd4040e4a49380e08d8b16517ad%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637454395066317813%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=al4lLGA%2BCdHK4HzO8M5VJthY8Iv71xQ0TsDGwJpgs1A%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: 5.11-rc1 TTM list corruption

2021-01-05 Thread Huang Rui
On Tue, Jan 05, 2021 at 07:43:51PM +0800, Borislav Petkov wrote:
> On Tue, Jan 05, 2021 at 07:08:52PM +0800, Huang Rui wrote:
> > Ah, this asic is a bit old and still use radeon driver. So we didn't
> > reproduce it on amdgpu driver. I don't have such the old asic in my hand.
> > May we know whether this issue can be duplicated after SI which is used
> > amdgpu module (not sure whether you have recent APU or GPU)?
> 
> The latest I have (I think it is the latest) is:
> 
> [1.826102] [drm] initializing kernel modesetting (RENOIR 0x1002:0x1636 
> 0x17AA:0x5099 0xD1).
> 
> and so far that hasn't triggered it. Which makes sense because that
> thing uses amdgpu:
> 
> [1.810260] [drm] amdgpu kernel modesetting enabled.

Yes! Renoir is late enough for amdgpu kernel module. :-)
Please let us know if you still encounter the issue.

Thanks,
Ray
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/ttm: device naming cleanup

2021-01-22 Thread Huang Rui
On Thu, Jan 21, 2021 at 07:58:18PM +0800, Christian König wrote:
> Rename ttm_bo_device to ttm_device.
> Rename ttm_bo_driver to ttm_device_funcs.
> Rename ttm_bo_global to ttm_global.
> 
> Move global and device related functions to ttm_device.[ch].
> 
> No functional change.
> 
> Signed-off-by: Christian König 

Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h   |   2 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  |   6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c  |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  26 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h   |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   8 +-
>  drivers/gpu/drm/drm_gem_vram_helper.c |  14 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.c  |  20 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.h  |   2 +-
>  drivers/gpu/drm/nouveau/nouveau_drv.h |   2 +-
>  drivers/gpu/drm/nouveau/nouveau_sgdma.c   |   6 +-
>  drivers/gpu/drm/nouveau/nouveau_ttm.c |  10 +-
>  drivers/gpu/drm/nouveau/nouveau_ttm.h |   8 +-
>  drivers/gpu/drm/qxl/qxl_drv.h |   4 +-
>  drivers/gpu/drm/qxl/qxl_release.c |   6 +-
>  drivers/gpu/drm/qxl/qxl_ttm.c |  19 +-
>  drivers/gpu/drm/radeon/radeon.h   |   6 +-
>  drivers/gpu/drm/radeon/radeon_object.c|   2 +-
>  drivers/gpu/drm/radeon/radeon_ttm.c   |  38 +-
>  drivers/gpu/drm/ttm/Makefile  |   2 +-
>  drivers/gpu/drm/ttm/ttm_agp_backend.c |   2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c  | 256 +++---
>  drivers/gpu/drm/ttm/ttm_bo_util.c |  24 +-
>  drivers/gpu/drm/ttm/ttm_bo_vm.c   |  24 +-
>  drivers/gpu/drm/ttm/ttm_device.c  | 195 +++
>  drivers/gpu/drm/ttm/ttm_execbuf_util.c|   8 +-
>  drivers/gpu/drm/ttm/ttm_range_manager.c   |   4 +-
>  drivers/gpu/drm/ttm/ttm_resource.c|   4 +-
>  drivers/gpu/drm/ttm/ttm_tt.c  |  26 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_blit.c  |   4 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c|   4 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |  16 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |   4 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_resource.c  |   2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c|  14 +-
>  include/drm/drm_gem_vram_helper.h |   6 +-
>  include/drm/ttm/ttm_bo_api.h  |  35 +-
>  include/drm/ttm/ttm_bo_driver.h   | 328 +-
>  include/drm/ttm/ttm_device.h  | 319 +
>  include/drm/ttm/ttm_resource.h|   4 +-
>  include/drm/ttm/ttm_tt.h  |  10 +-
>  41 files changed, 759 insertions(+), 715 deletions(-)
>  create mode 100644 drivers/gpu/drm/ttm/ttm_device.c
>  create mode 100644 include/drm/ttm/ttm_device.h
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index f77443cd9c17..ab4ac3b2651e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1053,7 +1053,7 @@ static inline struct drm_device *adev_to_drm(struct 
> amdgpu_device *adev)
>   return >ddev;
>  }
>  
> -static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device 
> *bdev)
> +static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_device *bdev)
>  {
>   return container_of(bdev, struct amdgpu_device, mman.bdev);
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> index 3107b9575929..5af464933976 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
> @@ -40,13 +40,13 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
>   * All the BOs in a process share an eviction fence. When process X wants
>   * to map VRAM memory but TTM can't find enough space, TTM will attempt to
>   * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
> - * by calling ttm_bo_driver->eviction_valuable().
> + * by calling ttm_device_funcs->eviction_valuable().
>   *
> - * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
> + * ttm_device_funcs->eviction_valuable() - will return false if the BO 
> belongs
>   *  to process X. Otherwise, it will return true to indicate BO can be
>   *  evicted by TTM.
>   *
> - * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
> + * If ttm_device_funcs->eviction_valuable returns true, then TTM will 
> continue
>   * the evcition process for that 

Re: [PATCH][next] drm/amdgpu: Fix masking binary not operator on two mask operations

2021-01-24 Thread Huang Rui
On Fri, Jan 22, 2021 at 11:00:22PM +0800, Colin King wrote:
> From: Colin Ian King 
> 
> Currently the ! operator is incorrectly being used to flip bits on
> mask values. Fix this by using the bit-wise ~ operator instead.
> 
> Addresses-Coverity: ("Logical vs. bitwise operator")
> Fixes: 3c9a7b7d6e75 ("drm/amdgpu: update mmhub mgcg for mmhub_v2_3")
> Signed-off-by: Colin Ian King 

Thanks.

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c 
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
> index 1961745e89c7..ab9be5ad5a5f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
> @@ -531,12 +531,12 @@ mmhub_v2_3_update_medium_grain_light_sleep(struct 
> amdgpu_device *adev,
>  
>   if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) {
>   data &= ~MM_ATC_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK;
> - data1 &= !(DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
> + data1 &= ~(DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
>   DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK |
>   DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK |
>   DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK |
>   DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK);
> - data2 &= !(DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
> + data2 &= ~(DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_MASK |
>   DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK |
>   DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK |
>   DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK |
> -- 
> 2.29.2
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm: amdgpu: pm: Mark vangogh_clk_dpm_is_enabled() as static

2021-01-12 Thread Huang Rui
On Wed, Jan 13, 2021 at 10:13:02AM +0800, Alex Deucher wrote:
> On Tue, Jan 12, 2021 at 8:19 PM Huang Rui  wrote:
> >
> > On Wed, Jan 13, 2021 at 03:57:22AM +0800, Souptick Joarder wrote:
> > > kernel test robot throws below warnings ->
> > >
> > > drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu11/vangogh_ppt.c:594:6:
> > > warning: no previous prototype for 'vangogh_clk_dpm_is_enabled'
> > > [-Wmissing-prototypes]
> > > drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu11/vangogh_ppt.c:594:6:
> > > warning: no previous prototype for function 'vangogh_clk_dpm_is_enabled'
> > > [-Wmissing-prototypes]
> > >
> > > Mark vangogh_clk_dpm_is_enabled() as static.
> > >
> > > Reported-by: kernel test robot 
> > > Signed-off-by: Souptick Joarder 
> > > ---
> > >  drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
> > > b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> > > index 75ddcad..3ffe56e 100644
> > > --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> > > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> > > @@ -610,7 +610,7 @@ static int vangogh_get_profiling_clk_mask(struct 
> > > smu_context *smu,
> > >   return 0;
> > >  }
> > >
> > > -bool vangogh_clk_dpm_is_enabled(struct smu_context *smu,
> > > +static bool vangogh_clk_dpm_is_enabled(struct smu_context *smu,
> > >   enum smu_clk_type clk_type)
> >
> > Ah, I have another patch which will use this function in another file.
> >
> 
> I can drop it if you plan to land those patches soon.

Thanks Alex. Yes, I will upload them after verify them on the new firmware
today.

Thanks,
Ray

> 
> Alex
> 
> 
> > Thanks,
> > Ray
> >
> > >  {
> > >   enum smu_feature_mask feature_id = 0;
> > > --
> > > 1.9.1
> > >
> > ___
> > amd-gfx mailing list
> > amd-...@lists.freedesktop.org
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cray.huang%40amd.com%7C19fce6891f2d4f6df7de08d8b768c8a9%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637461007972405505%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=v2JGTkklMHOE2hN1s4dYZ1hT7ctLeUHpwkpn1M3nyi8%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 5.11 regression fix] drm/ttm: fix combining __GFP_HIGHMEM and __GFP_DMA32 flag for DMA32 pools

2021-01-12 Thread Huang Rui
On Wed, Jan 13, 2021 at 02:32:49AM +0800, Hans de Goede wrote:
> GFP_TRANSHUGE_LIGHT includes __GFP_HIGHMEM and combining
> __GFP_HIGHMEM with __GFP_DMA32 is not allowed.
> 
> So we must not set add GFP_TRANSHUGE_LIGHT to the gfp_flags when
> allocating pages from a dma32 pool.
> 
> This fixes the following oops when using a driver which uses DMA32
> pools such as the vboxvideo driver:
> 
> [  419.852194] [ cut here ]
> [  419.852200] kernel BUG at include/linux/gfp.h:457!
> [  419.852208] invalid opcode:  [#4] SMP PTI
> [  419.852212] CPU: 0 PID: 1522 Comm: Xorg Tainted: G  D   
> 5.11.0-rc2+ #187
> [  419.852214] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS 
> VirtualBox 12/01/2006
> [  419.852216] RIP: 0010:__alloc_pages_nodemask+0x31a/0x3d0
> [  419.85] Code: 00 00 8b 05 a8 3b 93 01 85 c0 0f 85 03 fe ff ff 89 e8 44 
> 89 fa c1 e8 03 80 ca 80 83 e0 03 83 f8 01 44 0f 44 fa e9 e9 fd ff ff <0f> 0b 
> 0f 0b e9 79 fd ff ff 31 c0 e9 88 fd ff ff e8 41 ad fb ff 48
> [  419.852224] RSP: :b1164096bc60 EFLAGS: 00010247
> [  419.852227] RAX:  RBX:  RCX: 
> e8e8
> [  419.852229] RDX:  RSI: 0006 RDI: 
> 00192dc6
> [  419.852230] RBP: 00192dc6 R08:  R09: 
> 
> [  419.852232] R10: 0017 R11: 7ff303d0a000 R12: 
> 0009
> [  419.852233] R13: 0009 R14: 8be4cafe0880 R15: 
> 8be5c26fe000
> [  419.852235] FS:  7ff3046e0f00() GS:8be5dbc0() 
> knlGS:
> [  419.852237] CS:  0010 DS:  ES:  CR0: 80050033
> [  419.852239] CR2: 7ff303d0a000 CR3: 0afd8004 CR4: 
> 000706f0
> [  419.852243] DR0:  DR1:  DR2: 
> 
> [  419.852244] DR3:  DR6: fffe0ff0 DR7: 
> 0400
> [  419.852246] Call Trace:
> [  419.852252]  ttm_pool_alloc+0x2e8/0x5f0 [ttm]
> [  419.852261]  ttm_tt_populate+0x9f/0xe0 [ttm]
> [  419.852267]  ttm_bo_vm_fault_reserved+0x236/0x6e0 [ttm]
> [  419.852274]  ttm_bo_vm_fault+0x4a/0xe0 [ttm]
> [  419.852279]  __do_fault+0x37/0x110
> [  419.852283]  handle_mm_fault+0x1493/0x1990
> [  419.852288]  do_user_addr_fault+0x1c7/0x4c0
> [  419.852292]  exc_page_fault+0x67/0x250
> [  419.852295]  ? asm_exc_page_fault+0x8/0x30
> [  419.852299]  asm_exc_page_fault+0x1e/0x30
> [  419.852301] RIP: 0033:0x7ff304f3cdf8
> [  419.852304] Code: 83 c0 04 83 fa 03 7e ea a8 0f 75 ee 83 fa 7f 7e e1 83 c2 
> 80 89 d6 c1 ee 07 8d 4e 01 48 c1 e1 07 48 01 c1 0f 1f 80 00 00 00 00 <0f> 29 
> 00 48 83 e8 80 0f 29 40 90 0f 29 40 a0 0f 29 40 b0 0f 29 40
> [  419.852306] RSP: 002b:7ffec360e7d8 EFLAGS: 00010206
> [  419.852308] RAX: 7ff303d0a000 RBX: 02e2 RCX: 
> 7ff303d0b300
> [  419.852309] RDX: 12c0 RSI: 0025 RDI: 
> 
> [  419.852311] RBP: 1340 R08:  R09: 
> 
> [  419.852312] R10: 7ff303d0a000 R11: 1340 R12: 
> 7ff303d0a000
> [  419.852313] R13: 556665f1eb30 R14:  R15: 
> 
> [  419.852318] Modules linked in: xt_CHECKSUM xt_MASQUERADE xt_conntrack 
> ipt_REJECT nf_nat_tftp nf_conntrack_tftp tun bridge stp llc nft_objref 
> nf_conntrack_netbios_ns nf_conntrack_broadcast nft_fib_inet nft_fib_ipv4 
> nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject 
> nft_ct nft_chain_nat rfkill ip6table_nat ip6table_mangle ip6table_raw 
> ip6table_security iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 
> nf_defrag_ipv4 iptable_mangle iptable_raw iptable_security ip_set nf_tables 
> vboxsf nfnetlink ip6table_filter ip6_tables iptable_filter sunrpc vfat fat 
> intel_rapl_msr joydev intel_rapl_common intel_powerclamp crct10dif_pclmul 
> crc32_pclmul ghash_clmulni_intel snd_intel8x0 rapl snd_ac97_codec ac97_bus 
> snd_seq snd_seq_device snd_pcm pcspkr snd_timer snd soundcore i2c_piix4 
> vboxguest ip_tables vboxvideo drm_vram_helper drm_kms_helper cec 
> drm_ttm_helper ttm crc32c_intel serio_raw e1000 drm drm_privacy_screen_helper 
> ata_generic pata_acpi video fuse
> [  419.852375] ---[ end trace 511e5346897d9526 ]---
> 
> Note in case of the vboxvideo driver the DMA32 pool is allocated through
> drm_vram_helper_alloc_mm() which is also used by the bochs and
> hisilicon/hibmc drivers.
> 
> Cc: Christian König 
> Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3")
> Signed-off-by: Hans de Goede 

Thanks.

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 5 +++--
>  1 file changed, 3 insertion

Re: [PATCH] drm: amdgpu: pm: Mark vangogh_clk_dpm_is_enabled() as static

2021-01-12 Thread Huang Rui
On Wed, Jan 13, 2021 at 10:21:26AM +0800, Huang Rui wrote:
> On Wed, Jan 13, 2021 at 10:13:02AM +0800, Alex Deucher wrote:
> > On Tue, Jan 12, 2021 at 8:19 PM Huang Rui  wrote:
> > >
> > > On Wed, Jan 13, 2021 at 03:57:22AM +0800, Souptick Joarder wrote:
> > > > kernel test robot throws below warnings ->
> > > >
> > > > drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu11/vangogh_ppt.c:594:6:
> > > > warning: no previous prototype for 'vangogh_clk_dpm_is_enabled'
> > > > [-Wmissing-prototypes]
> > > > drivers/gpu/drm/amd/amdgpu/../pm/swsmu/smu11/vangogh_ppt.c:594:6:
> > > > warning: no previous prototype for function 'vangogh_clk_dpm_is_enabled'
> > > > [-Wmissing-prototypes]
> > > >
> > > > Mark vangogh_clk_dpm_is_enabled() as static.
> > > >
> > > > Reported-by: kernel test robot 
> > > > Signed-off-by: Souptick Joarder 
> > > > ---
> > > >  drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
> > > > b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> > > > index 75ddcad..3ffe56e 100644
> > > > --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> > > > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
> > > > @@ -610,7 +610,7 @@ static int vangogh_get_profiling_clk_mask(struct 
> > > > smu_context *smu,
> > > >   return 0;
> > > >  }
> > > >
> > > > -bool vangogh_clk_dpm_is_enabled(struct smu_context *smu,
> > > > +static bool vangogh_clk_dpm_is_enabled(struct smu_context *smu,
> > > >   enum smu_clk_type clk_type)
> > >
> > > Ah, I have another patch which will use this function in another file.
> > >
> > 
> > I can drop it if you plan to land those patches soon.
> 
> Thanks Alex. Yes, I will upload them after verify them on the new firmware
> today.

Sorry Alex, I miss read the function name as "cclk_dpm".
This patch is good, please go forward to apply it.

Reviewed-by: Huang Rui 

Thanks,
Ray

> 
> Thanks,
> Ray
> 
> > 
> > Alex
> > 
> > 
> > > Thanks,
> > > Ray
> > >
> > > >  {
> > > >   enum smu_feature_mask feature_id = 0;
> > > > --
> > > > 1.9.1
> > > >
> > > ___
> > > amd-gfx mailing list
> > > amd-...@lists.freedesktop.org
> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cray.huang%40amd.com%7C19fce6891f2d4f6df7de08d8b768c8a9%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637461007972405505%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=v2JGTkklMHOE2hN1s4dYZ1hT7ctLeUHpwkpn1M3nyi8%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 1/2] drm/radeon: stop re-init the TTM page pool

2021-01-05 Thread Huang Rui
On Wed, Jan 06, 2021 at 02:23:08AM +0800, Christian König wrote:
> Drivers are not supposed to init the page pool directly any more.
> 
> Signed-off-by: Christian König 

Series are Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/radeon/radeon_ttm.c | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c 
> b/drivers/gpu/drm/radeon/radeon_ttm.c
> index d4328ff57757..35b715f82ed8 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -729,9 +729,6 @@ int radeon_ttm_init(struct radeon_device *rdev)
>   }
>   rdev->mman.initialized = true;
>  
> - ttm_pool_init(>mman.bdev.pool, rdev->dev, rdev->need_swiotlb,
> -   dma_addressing_limited(>pdev->dev));
> -
>   r = radeon_ttm_init_vram(rdev);
>   if (r) {
>   DRM_ERROR("Failed initializing VRAM heap.\n");
> -- 
> 2.25.1
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/nouveau: init the base GEM fields for internal BOs

2021-06-10 Thread Huang Rui
On Thu, Jun 10, 2021 at 04:08:40PM +0800, Koenig, Christian wrote:
> 
> 
> Am 09.06.21 um 19:45 schrieb Mikko Perttunen:
> > On 6/9/21 8:29 PM, Christian König wrote:
> >> TTMs buffer objects are based on GEM objects for quite a while
> >> and rely on initializing those fields before initializing the TTM BO.
> >>
> >> Noveau now doesn't init the GEM object for internally allocated BOs,
> >
> > Nouveau
> >
> >> so make sure that we at least initialize some necessary fields.
> >>
> >> Signed-off-by: Christian König 
> >> ---
> >>   drivers/gpu/drm/nouveau/nouveau_bo.c | 6 ++
> >>   1 file changed, 6 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
> >> b/drivers/gpu/drm/nouveau/nouveau_bo.c
> >> index 520b1ea9d16c..085023624fb0 100644
> >> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> >> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> >> @@ -149,6 +149,8 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
> >>    */
> >>   if (bo->base.dev)
> >>   drm_gem_object_release(>base);
> >> +    else
> >> +    dma_resv_fini(>base._resv);
> >>     kfree(nvbo);
> >>   }
> >> @@ -330,6 +332,10 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 
> >> size, int align,
> >>   if (IS_ERR(nvbo))
> >>   return PTR_ERR(nvbo);
> >>   +    nvbo->bo.base.size = size;
> >> +    dma_resv_init(>bo.base._resv);
> >> +    drm_vma_node_reset(>bo.base.vma_node);
> >> +
> >>   ret = nouveau_bo_init(nvbo, size, align, domain, sg, robj);
> >>   if (ret)
> >>   return ret;
> >>
> >
> > That works, thanks for the fix!
> >
> > Tested-by: Mikko Perttunen 

Reviewed-by: Huang Rui 

> 
> Thanks. Can anybody give me an rb that I can push this to drm-misc-next 
> before the weekend?
> 
> Regards,
> Christian.
> 
> >
> > Mikko
> 


Re: [PATCH] drm/ttm: Explain why ttm_bo_add_move_fence uses a shared slot

2021-05-19 Thread Huang Rui
On Wed, May 19, 2021 at 04:24:09PM +0800, Daniel Vetter wrote:
> Motivated because I got confused and Christian confirmed why this
> works. I think this is non-obvious enough that it merits a slightly
> longer comment.
> 
> Cc: Christian König 
> Cc: Christian Koenig 
> Cc: Huang Rui 
> Cc: Thomas Hellström 
> Signed-off-by: Daniel Vetter 

Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index ca1b098b6a56..51a94fd63bd7 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -682,7 +682,9 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  }
>  
>  /*
> - * Add the last move fence to the BO and reserve a new shared slot.
> + * Add the last move fence to the BO and reserve a new shared slot. We only 
> use
> + * a shared slot to avoid unecessary sync and rely on the subsequent bo move 
> to
> + * either stall or use an exclusive fence respectively set bo->moving.
>   */
>  static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>struct ttm_resource_manager *man,
> -- 
> 2.31.0
> 


Re: your mail

2021-04-07 Thread Huang Rui
On Wed, Apr 07, 2021 at 09:27:46AM +0800, songqiang wrote:

Please add the description in the commit message and subject.

Thanks,
Ray

> Signed-off-by: songqiang 
> ---
>  drivers/gpu/drm/ttm/ttm_page_alloc.c | 18 ++
>  1 file changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
> b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index 14660f723f71..f3698f0ad4d7 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -736,8 +736,16 @@ static void ttm_put_pages(struct page **pages, unsigned 
> npages, int flags,
>   if (++p != pages[i + j])
>   break;
>  
> - if (j == HPAGE_PMD_NR)
> + if (j == HPAGE_PMD_NR) {
>   order = HPAGE_PMD_ORDER;
> + for (j = 1; j < HPAGE_PMD_NR; ++j)
> + page_ref_dec(pages[i+j]);
> + }
>   }
>  #endif
>  
> @@ -868,10 +876,12 @@ static int ttm_get_pages(struct page **pages, unsigned 
> npages, int flags,
>   p = alloc_pages(huge_flags, HPAGE_PMD_ORDER);
>   if (!p)
>   break;
> -
> - for (j = 0; j < HPAGE_PMD_NR; ++j)
> + for (j = 0; j < HPAGE_PMD_NR; ++j) {
>   pages[i++] = p++;
> -
> + if (j > 0)
> + page_ref_inc(pages[i-1]);
> + }
>   npages -= HPAGE_PMD_NR;
>   }
>   }
> 
> 
> 
> ___
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-develdata=04%7C01%7Cray.huang%40amd.com%7C4ccc617b77d746db5af108d8f98db612%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637533734805563118%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=9bSP90LYdJyJYJYmuphVmqk%2B3%2FE4JPrtXkQTbxwAt68%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH V2] drm: amd: pm: Mundane typo fixes in the file amdgpu_pm.c

2021-03-14 Thread Huang Rui
On Mon, Mar 15, 2021 at 11:21:36AM +0800, Bhaskar Chowdhury wrote:
> 
> s/"an minimum"/"a minimum"/
> s/"an maxmum"/"a maximum"/
> 
> Signed-off-by: Bhaskar Chowdhury 

Reviewed-by: Huang Rui 

> ---
>  Changes from V1:
>   Randy's suggestion to adjust the subject line text
>   And missed out a spell too,which now included
> 
>  drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c 
> b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> index 5fa65f191a37..308249ae1a22 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> @@ -3315,9 +3315,9 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct 
> device *dev,
>   *
>   * - pwm1_max: pulse width modulation fan control maximum level (255)
>   *
> - * - fan1_min: an minimum value Unit: revolution/min (RPM)
> + * - fan1_min: a minimum value Unit: revolution/min (RPM)
>   *
> - * - fan1_max: an maxmum value Unit: revolution/max (RPM)
> + * - fan1_max: a maximum value Unit: revolution/max (RPM)
>   *
>   * - fan1_input: fan speed in RPM
>   *
> --
> 2.30.2
> 
> ___
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Fdri-develdata=04%7C01%7Cray.huang%40amd.com%7C7815a224727f4c9b556008d8e76182b4%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637513753293707291%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=ApeKXDkijsihPcGSoI9v8ypRcsUXSb2Y7%2FpKsm3c4Xo%3Dreserved=0
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 2/3] drm/ttm: remove swap LRU v3

2021-03-18 Thread Huang Rui
On Thu, Mar 18, 2021 at 08:47:18PM +0800, Christian König wrote:
> Instead evict round robin from each devices SYSTEM and TT domain.
> 
> v2: reorder num_pages access reported by Dan's script
> v3: fix rebase fallout, num_pages should be 32bit
> 
> Signed-off-by: Christian König 

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c| 29 --
>  drivers/gpu/drm/ttm/ttm_bo_util.c   |  1 -
>  drivers/gpu/drm/ttm/ttm_device.c| 60 +
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
>  include/drm/ttm/ttm_bo_api.h|  1 -
>  include/drm/ttm/ttm_bo_driver.h |  1 -
>  include/drm/ttm/ttm_device.h|  7 +---
>  7 files changed, 48 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 66e00b404ec3..3673157527ff 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -73,7 +73,6 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object 
> *bo)
>  {
>   struct ttm_device *bdev = bo->bdev;
>  
> - list_del_init(>swap);
>   list_del_init(>lru);
>  
>   if (bdev->funcs->del_from_lru_notify)
> @@ -105,16 +104,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object 
> *bo,
>  
>   man = ttm_manager_type(bdev, mem->mem_type);
>   list_move_tail(>lru, >lru[bo->priority]);
> - if (man->use_tt && bo->ttm &&
> - !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
> -  TTM_PAGE_FLAG_SWAPPED))) {
> - struct list_head *swap;
> -
> - swap = _glob.swap_lru[bo->priority];
> - list_move_tail(>swap, swap);
> - } else {
> - list_del_init(>swap);
> - }
>  
>   if (bdev->funcs->del_from_lru_notify)
>   bdev->funcs->del_from_lru_notify(bo);
> @@ -129,9 +118,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>   ttm_bo_bulk_move_set_pos(>vram[bo->priority], bo);
>   break;
>   }
> - if (bo->ttm && !(bo->ttm->page_flags &
> -  (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED)))
> - ttm_bo_bulk_move_set_pos(>swap[bo->priority], bo);
>   }
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
> @@ -169,20 +155,6 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move 
> *bulk)
>   list_bulk_move_tail(>lru[i], >first->lru,
>   >last->lru);
>   }
> -
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> - struct ttm_lru_bulk_move_pos *pos = >swap[i];
> - struct list_head *lru;
> -
> - if (!pos->first)
> - continue;
> -
> - dma_resv_assert_held(pos->first->base.resv);
> - dma_resv_assert_held(pos->last->base.resv);
> -
> - lru = _glob.swap_lru[i];
> - list_bulk_move_tail(lru, >first->swap, >last->swap);
> - }
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>  
> @@ -1065,7 +1037,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>   kref_init(>kref);
>   INIT_LIST_HEAD(>lru);
>   INIT_LIST_HEAD(>ddestroy);
> - INIT_LIST_HEAD(>swap);
>   bo->bdev = bdev;
>   bo->type = type;
>   bo->mem.mem_type = TTM_PL_SYSTEM;
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c 
> b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 031e5819fec4..a2a17c84ceb3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -303,7 +303,6 @@ static int ttm_buffer_object_transfer(struct 
> ttm_buffer_object *bo,
>   atomic_inc(_glob.bo_count);
>   INIT_LIST_HEAD(>base.ddestroy);
>   INIT_LIST_HEAD(>base.lru);
> - INIT_LIST_HEAD(>base.swap);
>   fbo->base.moving = NULL;
>   drm_vma_node_reset(>base.base.vma_node);
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c 
> b/drivers/gpu/drm/ttm/ttm_device.c
> index b1424189fdfb..2096a0fd9c35 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -67,7 +67,6 @@ static int ttm_global_init(void)
>   unsigned long num_pages;
>   struct sysinfo si;
>   int ret = 0;
> - unsigned i;
>  
>   mutex_lock(_global_mutex);
>   if (++ttm_glob_use_count > 1)
> @@ -90,8 +89,6 @@ static int ttm_global_init(void)
>   goto out;
>   }
>  
> - for (i

Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock

2021-03-18 Thread Huang Rui
On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
> Instead of having a global lock.
> 
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
>  drivers/gpu/drm/qxl/qxl_release.c  |  5 +--
>  drivers/gpu/drm/ttm/ttm_bo.c   | 49 --
>  drivers/gpu/drm/ttm/ttm_device.c   | 12 +++
>  drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
>  drivers/gpu/drm/ttm/ttm_resource.c |  9 +++--
>  include/drm/ttm/ttm_bo_driver.h|  4 +--
>  include/drm/ttm/ttm_device.h   |  4 +--
>  8 files changed, 43 insertions(+), 56 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9d19078246c8..ae18c0e32347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
> *adev,
>   struct amdgpu_vm_bo_base *bo_base;
>  
>   if (vm->bulk_moveable) {
> - spin_lock(_glob.lru_lock);
> + spin_lock(>mman.bdev.lru_lock);

Could you please explain why do you want to instead of the global lock?

Thanks,
Ray

>   ttm_bo_bulk_move_lru_tail(>lru_bulk_move);
> - spin_unlock(_glob.lru_lock);
> + spin_unlock(>mman.bdev.lru_lock);
>   return;
>   }
>  
>   memset(>lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
>  
> - spin_lock(_glob.lru_lock);
> + spin_lock(>mman.bdev.lru_lock);
>   list_for_each_entry(bo_base, >idle, vm_status) {
>   struct amdgpu_bo *bo = bo_base->bo;
>  
> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
> *adev,
>   >shadow->tbo.mem,
>   >lru_bulk_move);
>   }
> - spin_unlock(_glob.lru_lock);
> + spin_unlock(>mman.bdev.lru_lock);
>  
>   vm->bulk_moveable = true;
>  }
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
> b/drivers/gpu/drm/qxl/qxl_release.c
> index f5845c96d414..b19f2f00b215 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct 
> qxl_release *release)
>  release->id | 0xf000, release->base.seqno);
>   trace_dma_fence_emit(>base);
>  
> - spin_lock(_glob.lru_lock);
> -
>   list_for_each_entry(entry, >bos, head) {
>   bo = entry->bo;
>  
>   dma_resv_add_shared_fence(bo->base.resv, >base);
> - ttm_bo_move_to_lru_tail(bo, >mem, NULL);
> + ttm_bo_move_to_lru_tail_unlocked(bo);
>   dma_resv_unlock(bo->base.resv);
>   }
> - spin_unlock(_glob.lru_lock);
>   ww_acquire_fini(>ticket);
>  }
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 3673157527ff..2d2ac532987e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct 
> ttm_buffer_object *bo)
>* reference it any more. The only tricky case is the trylock on
>* the resv object while holding the lru_lock.
>*/
> - spin_lock(_glob.lru_lock);
> + spin_lock(>bdev->lru_lock);
>   bo->base.resv = >base._resv;
> - spin_unlock(_glob.lru_lock);
> + spin_unlock(>bdev->lru_lock);
>   }
>  
>   return r;
> @@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>  
>   if (unlock_resv)
>   dma_resv_unlock(bo->base.resv);
> - spin_unlock(_glob.lru_lock);
> + spin_unlock(>bdev->lru_lock);
>  
>   lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
>30 * HZ);
> @@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>   else if (lret == 0)
>   return -EBUSY;
>  
> - spin_lock(_glob.lru_lock);
> + spin_lock(>bdev->lru_lock);
>   if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
>   /*
>* We raced, and lost, someone else holds the 
> reservation now,
> @@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>* delayed destruction would succeed, so just return 
> success
>* here.
>*/
> - spin_unlock(_glob.lru_lock);
> + spin_unlock(>bdev->lru_lock);
>   return 0;
>   }
>   ret = 0;
> @@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object 
> *bo,
>   if (ret || unlikely(list_empty(>ddestroy))) {
>  

Re: [PATCH 1/3] drm/ttm: move swapout logic around v2

2021-03-18 Thread Huang Rui
On Thu, Mar 18, 2021 at 08:47:17PM +0800, Christian K�nig wrote:
> Move the iteration of the global lru into the new function
> ttm_global_swapout() and use that instead in drivers.
> 
> v2: consistently return int
> 
> Signed-off-by: Christian K?nig 

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c| 57 -
>  drivers/gpu/drm/ttm/ttm_device.c| 29 +++
>  drivers/gpu/drm/ttm/ttm_tt.c|  2 +-
>  drivers/gpu/drm/vmwgfx/ttm_memory.c |  3 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c |  2 +-
>  include/drm/ttm/ttm_bo_api.h|  3 +-
>  include/drm/ttm/ttm_device.h|  2 +
>  7 files changed, 53 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 3c23e863a3f0..66e00b404ec3 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1193,56 +1193,35 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
>  }
>  EXPORT_SYMBOL(ttm_bo_wait);
>  
> -/*
> - * A buffer object shrink method that tries to swap out the first
> - * buffer object on the bo_global::swap_lru list.
> - */
> -int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags)
> +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx 
> *ctx,
> +gfp_t gfp_flags)
>  {
>   struct ttm_global *glob = _glob;
> - struct ttm_buffer_object *bo;
> - int ret = -EBUSY;
>   bool locked;
> - unsigned i;
> -
> - spin_lock(>lru_lock);
> - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> - list_for_each_entry(bo, >swap_lru[i], swap) {
> - if (!ttm_bo_evict_swapout_allowable(bo, ctx, ,
> - NULL))
> - continue;
> -
> - if (!ttm_bo_get_unless_zero(bo)) {
> - if (locked)
> - dma_resv_unlock(bo->base.resv);
> - continue;
> - }
> + int ret;
>  
> - ret = 0;
> - break;
> - }
> - if (!ret)
> - break;
> - }
> + if (!ttm_bo_evict_swapout_allowable(bo, ctx, , NULL))
> + return -EBUSY;
>  
> - if (ret) {
> - spin_unlock(>lru_lock);
> - return ret;
> + if (!ttm_bo_get_unless_zero(bo)) {
> + if (locked)
> + dma_resv_unlock(bo->base.resv);
> + return -EBUSY;
>   }
>  
>   if (bo->deleted) {
> - ret = ttm_bo_cleanup_refs(bo, false, false, locked);
> + ttm_bo_cleanup_refs(bo, false, false, locked);
>   ttm_bo_put(bo);
> - return ret;
> + return 0;
>   }
>  
>   ttm_bo_del_from_lru(bo);
> + /* TODO: Cleanup the locking */
>   spin_unlock(>lru_lock);
>  
> - /**
> + /*
>* Move to system cached
>*/
> -
>   if (bo->mem.mem_type != TTM_PL_SYSTEM) {
>   struct ttm_operation_ctx ctx = { false, false };
>   struct ttm_resource evict_mem;
> @@ -1262,29 +1241,26 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, 
> gfp_t gfp_flags)
>   }
>   }
>  
> - /**
> + /*
>* Make sure BO is idle.
>*/
> -
>   ret = ttm_bo_wait(bo, false, false);
>   if (unlikely(ret != 0))
>   goto out;
>  
>   ttm_bo_unmap_virtual(bo);
>  
> - /**
> + /*
>* Swap out. Buffer will be swapped in again as soon as
>* anyone tries to access a ttm page.
>*/
> -
>   if (bo->bdev->funcs->swap_notify)
>   bo->bdev->funcs->swap_notify(bo);
>  
>   ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>  out:
>  
> - /**
> -  *
> + /*
>* Unreserve without putting on LRU to avoid swapping out an
>* already swapped buffer.
>*/
> @@ -1293,7 +1269,6 @@ int ttm_bo_swapout(struct ttm_operation_ctx *ctx, gfp_t 
> gfp_flags)
>   ttm_bo_put(bo);
>   return ret;
>  }
> -EXPORT_SYMBOL(ttm_bo_swapout);
>  
>  void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
>  {
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c 
> b/drivers/gpu/drm/ttm/ttm_device.c
> index 95e1b7b1f2e6..b1424189fdfb 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -102,6 +102,35 @@ static int ttm_global_init(void)
>  

Re: [PATCH 3/3] drm/ttm: switch to per device LRU lock

2021-03-22 Thread Huang Rui
On Fri, Mar 19, 2021 at 08:10:21PM +0800, Christian König wrote:
> Am 19.03.21 um 05:32 schrieb Huang Rui:
> > On Thu, Mar 18, 2021 at 08:47:19PM +0800, Christian König wrote:
> >> Instead of having a global lock.
> >>
> >> Signed-off-by: Christian König 
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  8 ++---
> >>   drivers/gpu/drm/qxl/qxl_release.c  |  5 +--
> >>   drivers/gpu/drm/ttm/ttm_bo.c   | 49 --
> >>   drivers/gpu/drm/ttm/ttm_device.c   | 12 +++
> >>   drivers/gpu/drm/ttm/ttm_execbuf_util.c |  8 ++---
> >>   drivers/gpu/drm/ttm/ttm_resource.c |  9 +++--
> >>   include/drm/ttm/ttm_bo_driver.h|  4 +--
> >>   include/drm/ttm/ttm_device.h   |  4 +--
> >>   8 files changed, 43 insertions(+), 56 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 9d19078246c8..ae18c0e32347 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
> >> *adev,
> >>struct amdgpu_vm_bo_base *bo_base;
> >>   
> >>if (vm->bulk_moveable) {
> >> -  spin_lock(_glob.lru_lock);
> >> +  spin_lock(>mman.bdev.lru_lock);
> > Could you please explain why do you want to instead of the global lock?
> 
> Potentially less contention.
> 
> But you are right mentioning this in the commit message is a good idea.
> 

Thanks.

Patch is Acked-by: Huang Rui 

> Regards,
> Christian.
> 
> >
> > Thanks,
> > Ray
> >
> >>ttm_bo_bulk_move_lru_tail(>lru_bulk_move);
> >> -  spin_unlock(_glob.lru_lock);
> >> +  spin_unlock(>mman.bdev.lru_lock);
> >>return;
> >>}
> >>   
> >>memset(>lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
> >>   
> >> -  spin_lock(_glob.lru_lock);
> >> +  spin_lock(>mman.bdev.lru_lock);
> >>list_for_each_entry(bo_base, >idle, vm_status) {
> >>struct amdgpu_bo *bo = bo_base->bo;
> >>   
> >> @@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device 
> >> *adev,
> >>>shadow->tbo.mem,
> >>>lru_bulk_move);
> >>}
> >> -  spin_unlock(_glob.lru_lock);
> >> +  spin_unlock(>mman.bdev.lru_lock);
> >>   
> >>vm->bulk_moveable = true;
> >>   }
> >> diff --git a/drivers/gpu/drm/qxl/qxl_release.c 
> >> b/drivers/gpu/drm/qxl/qxl_release.c
> >> index f5845c96d414..b19f2f00b215 100644
> >> --- a/drivers/gpu/drm/qxl/qxl_release.c
> >> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> >> @@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct 
> >> qxl_release *release)
> >>   release->id | 0xf000, release->base.seqno);
> >>trace_dma_fence_emit(>base);
> >>   
> >> -  spin_lock(_glob.lru_lock);
> >> -
> >>list_for_each_entry(entry, >bos, head) {
> >>bo = entry->bo;
> >>   
> >>dma_resv_add_shared_fence(bo->base.resv, >base);
> >> -  ttm_bo_move_to_lru_tail(bo, >mem, NULL);
> >> +  ttm_bo_move_to_lru_tail_unlocked(bo);
> >>dma_resv_unlock(bo->base.resv);
> >>}
> >> -  spin_unlock(_glob.lru_lock);
> >>ww_acquire_fini(>ticket);
> >>   }
> >>   
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index 3673157527ff..2d2ac532987e 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct 
> >> ttm_buffer_object *bo)
> >> * reference it any more. The only tricky case is the trylock on
> >> * the resv object while holding the lru_lock.
> >> */
> >> -  spin_lock(_glob.lru_lock);
> >> +  spin_lock(>bdev->lru_lock);
> >>bo->base.resv = >base._resv;
> >> -  spin_unlock(_glob.lru_lock);
> >> +  spin_unlock(>bdev->lru_lock);
> >>}
> >>   
&g

Re: [PATCH 2/2] drm/ttm: optimize the pool shrinker a bit v2

2021-04-15 Thread Huang Rui
On Thu, Apr 15, 2021 at 07:56:24PM +0800, Christian König wrote:
> Switch back to using a spinlock again by moving the IOMMU unmap outside
> of the locked region.
> 
> v2: Add a comment explaining why we need sync_shrinkers().
> 
> Signed-off-by: Christian König 

Series look good for me as well.

Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 44 +-
>  1 file changed, 22 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index cb38b1a17b09..955836d569cc 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -70,7 +70,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER];
>  static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
>  static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
>  
> -static struct mutex shrinker_lock;
> +static spinlock_t shrinker_lock;
>  static struct list_head shrinker_list;
>  static struct shrinker mm_shrinker;
>  
> @@ -263,9 +263,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, 
> struct ttm_pool *pool,
>   spin_lock_init(>lock);
>   INIT_LIST_HEAD(>pages);
>  
> - mutex_lock(_lock);
> + spin_lock(_lock);
>   list_add_tail(>shrinker_list, _list);
> - mutex_unlock(_lock);
> + spin_unlock(_lock);
>  }
>  
>  /* Remove a pool_type from the global shrinker list and free all pages */
> @@ -273,9 +273,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt)
>  {
>   struct page *p;
>  
> - mutex_lock(_lock);
> + spin_lock(_lock);
>   list_del(>shrinker_list);
> - mutex_unlock(_lock);
> + spin_unlock(_lock);
>  
>   while ((p = ttm_pool_type_take(pt)))
>   ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> @@ -313,24 +313,19 @@ static struct ttm_pool_type 
> *ttm_pool_select_type(struct ttm_pool *pool,
>  static unsigned int ttm_pool_shrink(void)
>  {
>   struct ttm_pool_type *pt;
> - unsigned int num_freed;
>   struct page *p;
>  
> - mutex_lock(_lock);
> + spin_lock(_lock);
>   pt = list_first_entry(_list, typeof(*pt), shrinker_list);
> + list_move_tail(>shrinker_list, _list);
> + spin_unlock(_lock);
>  
>   p = ttm_pool_type_take(pt);
> - if (p) {
> - ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> - num_freed = 1 << pt->order;
> - } else {
> - num_freed = 0;
> - }
> -
> - list_move_tail(>shrinker_list, _list);
> - mutex_unlock(_lock);
> + if (!p)
> + return 0;
>  
> - return num_freed;
> + ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
> + return 1 << pt->order;
>  }
>  
>  /* Return the allocation order based for a page */
> @@ -530,6 +525,11 @@ void ttm_pool_fini(struct ttm_pool *pool)
>   for (j = 0; j < MAX_ORDER; ++j)
>   ttm_pool_type_fini(>caching[i].orders[j]);
>   }
> +
> + /* We removed the pool types from the LRU, but we need to also make sure
> +  * that no shrinker is concurrently freeing pages from the pool.
> +  */
> + sync_shrinkers();
>  }
>  
>  /* As long as pages are available make sure to release at least one */
> @@ -604,7 +604,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file 
> *m, void *data)
>  {
>   ttm_pool_debugfs_header(m);
>  
> - mutex_lock(_lock);
> + spin_lock(_lock);
>   seq_puts(m, "wc\t:");
>   ttm_pool_debugfs_orders(global_write_combined, m);
>   seq_puts(m, "uc\t:");
> @@ -613,7 +613,7 @@ static int ttm_pool_debugfs_globals_show(struct seq_file 
> *m, void *data)
>   ttm_pool_debugfs_orders(global_dma32_write_combined, m);
>   seq_puts(m, "uc 32\t:");
>   ttm_pool_debugfs_orders(global_dma32_uncached, m);
> - mutex_unlock(_lock);
> + spin_unlock(_lock);
>  
>   ttm_pool_debugfs_footer(m);
>  
> @@ -640,7 +640,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct 
> seq_file *m)
>  
>   ttm_pool_debugfs_header(m);
>  
> - mutex_lock(_lock);
> + spin_lock(_lock);
>   for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>   seq_puts(m, "DMA ");
>   switch (i) {
> @@ -656,7 +656,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct 
> seq_file *m)
>   }
>   ttm_pool_debugfs_orders(pool->caching[i].orders, m);
>   }
> - mutex_unlock(_lock);
> + spin

Re: [PATCH] drm/ttm: remove special handling for non GEM drivers

2021-04-19 Thread Huang Rui
On Mon, Apr 19, 2021 at 05:28:53PM +0800, Christian König wrote:
> vmwgfx is the only driver actually using this. Move the handling into
> the driver instead.
> 
> Signed-off-by: Christian König 

Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c   | 11 ---
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 10 ++
>  include/drm/ttm/ttm_bo_api.h   | 19 ---
>  3 files changed, 10 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 80831df0ef61..38183e227116 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -460,8 +460,6 @@ static void ttm_bo_release(struct kref *kref)
>  
>   atomic_dec(_glob.bo_count);
>   dma_fence_put(bo->moving);
> - if (!ttm_bo_uses_embedded_gem_object(bo))
> - dma_resv_fini(>base._resv);
>   bo->destroy(bo);
>  }
>  
> @@ -1056,15 +1054,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>   } else {
>   bo->base.resv = >base._resv;
>   }
> - if (!ttm_bo_uses_embedded_gem_object(bo)) {
> - /*
> -  * bo.base is not initialized, so we have to setup the
> -  * struct elements we want use regardless.
> -  */
> - bo->base.size = size;
> - dma_resv_init(>base._resv);
> - drm_vma_node_reset(>base.vma_node);
> - }
>   atomic_inc(_glob.bo_count);
>  
>   /*
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c 
> b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index 50e529a01677..587314d57991 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -460,6 +460,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
>   WARN_ON(vmw_bo->dirty);
>   WARN_ON(!RB_EMPTY_ROOT(_bo->res_tree));
>   vmw_bo_unmap(vmw_bo);
> + dma_resv_fini(>base._resv);
>   kfree(vmw_bo);
>  }
>  
> @@ -512,6 +513,11 @@ int vmw_bo_create_kernel(struct vmw_private *dev_priv, 
> unsigned long size,
>   if (unlikely(ret))
>   goto error_free;
>  
> +
> + bo->base.size = size;
> + dma_resv_init(>base._resv);
> + drm_vma_node_reset(>base.vma_node);
> +
>   ret = ttm_bo_init_reserved(_priv->bdev, bo, size,
>  ttm_bo_type_device, placement, 0,
>  , NULL, NULL, NULL);
> @@ -570,6 +576,10 @@ int vmw_bo_init(struct vmw_private *dev_priv,
>   if (unlikely(ret))
>   return ret;
>  
> + vmw_bo->base.base.size = size;
> + dma_resv_init(_bo->base.base._resv);
> + drm_vma_node_reset(_bo->base.base.vma_node);
> +
>   ret = ttm_bo_init_reserved(bdev, _bo->base, size,
>  ttm_bo_type_device, placement,
>  0, , NULL, NULL, bo_free);
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 3587f660e8f4..e88da481a976 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -562,25 +562,6 @@ ssize_t ttm_bo_io(struct ttm_device *bdev, struct file 
> *filp,
>  int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx 
> *ctx,
>  gfp_t gfp_flags);
>  
> -/**
> - * ttm_bo_uses_embedded_gem_object - check if the given bo uses the
> - * embedded drm_gem_object.
> - *
> - * Most ttm drivers are using gem too, so the embedded
> - * ttm_buffer_object.base will be initialized by the driver (before
> - * calling ttm_bo_init).  It is also possible to use ttm without gem
> - * though (vmwgfx does that).
> - *
> - * This helper will figure whenever a given ttm bo is a gem object too
> - * or not.
> - *
> - * @bo: The bo to check.
> - */
> -static inline bool ttm_bo_uses_embedded_gem_object(struct ttm_buffer_object 
> *bo)
> -{
> - return bo->base.dev != NULL;
> -}
> -
>  /**
>   * ttm_bo_pin - Pin the buffer object.
>   * @bo: The buffer object to pin
> -- 
> 2.25.1
> 
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 1/1] drm: ttm: Don't bail from ttm_global_init if debugfs_create_dir fails

2021-08-16 Thread Huang Rui
On Wed, Aug 11, 2021 at 03:59:06AM +0800, Dan Moulding wrote:
> In 69de4421bb4c ("drm/ttm: Initialize debugfs from
> ttm_global_init()"), ttm_global_init was changed so that if creation
> of the debugfs global root directory fails, ttm_global_init will bail
> out early and return an error, leading to initialization failure of
> DRM drivers. However, not every system will be using debugfs. On such
> a system, debugfs directory creation can be expected to fail, but DRM
> drivers must still be usable. This changes it so that if creation of
> TTM's debugfs root directory fails, then no biggie: keep calm and
> carry on.
> 
> Fixes: 69de4421bb4c ("drm/ttm: Initialize debugfs from ttm_global_init()")
> Signed-off-by: Dan Moulding 

It looks ok for me.

Reviewed-by: Huang Rui 

Thanks,
Ray

> ---
>  drivers/gpu/drm/ttm/ttm_device.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c 
> b/drivers/gpu/drm/ttm/ttm_device.c
> index 74e3b460132b..2df59b3c2ea1 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -78,9 +78,7 @@ static int ttm_global_init(void)
>  
>   ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
>   if (IS_ERR(ttm_debugfs_root)) {
> - ret = PTR_ERR(ttm_debugfs_root);
>   ttm_debugfs_root = NULL;
> - goto out;
>   }
>  
>   /* Limit the number of pages in the pool to about 50% of the total
> -- 
> 2.31.1
> 


Re: [PATCH 0/1] Fix DRM driver initialization failure in kernel v5.14

2021-08-16 Thread Huang Rui
On Sat, Aug 14, 2021 at 12:50:14AM +0800, Dan Moulding wrote:
> Just a friendly reminder that this fix for a regression needs
> review. It should be a quick review.
> 
> It would probably be good to ensure this gets in before the final 5.14
> release, otherwise this is going to be a very visible regression for
> anyone that uses DRM and does not use debugfs.
> 

Just took a look at your patch, it's ok for me. Alex/Christian, could you
help to apply this fix if you have no concern?

Best Regards,
Ray


Re: [PATCH 2/5] drm/ttm: add busy and idle placement flags

2021-09-02 Thread Huang Rui
On Tue, Aug 31, 2021 at 07:21:07PM +0800, Christian König wrote:
> More flexible than the busy placements.
> 
> Signed-off-by: Christian König 

Patch 2 -> 5 are Acked-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c| 8 +++-
>  include/drm/ttm/ttm_placement.h | 6 ++
>  2 files changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 0a3127436f61..c7034040c67f 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -834,6 +834,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
>   const struct ttm_place *place = >placement[i];
>   struct ttm_resource_manager *man;
>  
> + if (place->flags & TTM_PL_FLAG_BUSY)
> + continue;
> +
>   man = ttm_manager_type(bdev, place->mem_type);
>   if (!man || !ttm_resource_manager_used(man))
>   continue;
> @@ -860,6 +863,9 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
>   const struct ttm_place *place = >busy_placement[i];
>   struct ttm_resource_manager *man;
>  
> + if (place->flags & TTM_PL_FLAG_IDLE)
> + continue;
> +
>   man = ttm_manager_type(bdev, place->mem_type);
>   if (!man || !ttm_resource_manager_used(man))
>   continue;
> @@ -869,7 +875,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
>   if (likely(!ret))
>   return 0;
>  
> - if (ret && ret != -EBUSY)
> + if (ret != -EBUSY)
>   goto error;
>   }
>  
> diff --git a/include/drm/ttm/ttm_placement.h b/include/drm/ttm/ttm_placement.h
> index 8995c9e4ec1b..63f7217354c0 100644
> --- a/include/drm/ttm/ttm_placement.h
> +++ b/include/drm/ttm/ttm_placement.h
> @@ -53,6 +53,12 @@
>  /* For multihop handling */
>  #define TTM_PL_FLAG_TEMPORARY   (1 << 2)
>  
> +/* Placement is only used when we are evicting */
> +#define TTM_PL_FLAG_BUSY (1 << 3)
> +
> +/* Placement is only used when we are not evicting */
> +#define TTM_PL_FLAG_IDLE (1 << 4)
> +
>  /**
>   * struct ttm_place
>   *
> -- 
> 2.25.1
> 


Re: [PATCH 1/5] drm/ttm: cleanup ttm_resource_compat

2021-09-02 Thread Huang Rui
On Tue, Aug 31, 2021 at 07:21:06PM +0800, Christian König wrote:
> Move that function into the resource handling and remove an unused parameter.
> 
> Signed-off-by: Christian König 

Looks this patch is separate from others in this series.

new_flags won't be used anymore.

Reviewed-by: Huang Rui 

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c   | 48 +
>  drivers/gpu/drm/ttm/ttm_resource.c | 49 ++
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 15 -
>  include/drm/ttm/ttm_bo_api.h   | 12 
>  include/drm/ttm/ttm_resource.h |  3 ++
>  5 files changed, 59 insertions(+), 68 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 3573f9e393be..0a3127436f61 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -924,57 +924,11 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object 
> *bo,
>   return ret;
>  }
>  
> -static bool ttm_bo_places_compat(const struct ttm_place *places,
> -  unsigned num_placement,
> -  struct ttm_resource *mem,
> -  uint32_t *new_flags)
> -{
> - unsigned i;
> -
> - if (mem->placement & TTM_PL_FLAG_TEMPORARY)
> - return false;
> -
> - for (i = 0; i < num_placement; i++) {
> - const struct ttm_place *heap = [i];
> -
> - if ((mem->start < heap->fpfn ||
> -  (heap->lpfn != 0 && (mem->start + mem->num_pages) > 
> heap->lpfn)))
> - continue;
> -
> - *new_flags = heap->flags;
> - if ((mem->mem_type == heap->mem_type) &&
> - (!(*new_flags & TTM_PL_FLAG_CONTIGUOUS) ||
> -  (mem->placement & TTM_PL_FLAG_CONTIGUOUS)))
> - return true;
> - }
> - return false;
> -}
> -
> -bool ttm_bo_mem_compat(struct ttm_placement *placement,
> -struct ttm_resource *mem,
> -uint32_t *new_flags)
> -{
> - if (ttm_bo_places_compat(placement->placement, placement->num_placement,
> -  mem, new_flags))
> - return true;
> -
> - if ((placement->busy_placement != placement->placement ||
> -  placement->num_busy_placement > placement->num_placement) &&
> - ttm_bo_places_compat(placement->busy_placement,
> -  placement->num_busy_placement,
> -  mem, new_flags))
> - return true;
> -
> - return false;
> -}
> -EXPORT_SYMBOL(ttm_bo_mem_compat);
> -
>  int ttm_bo_validate(struct ttm_buffer_object *bo,
>   struct ttm_placement *placement,
>   struct ttm_operation_ctx *ctx)
>  {
>   int ret;
> - uint32_t new_flags;
>  
>   dma_resv_assert_held(bo->base.resv);
>  
> @@ -987,7 +941,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
>   /*
>* Check whether we need to move buffer.
>*/
> - if (!ttm_bo_mem_compat(placement, bo->resource, _flags)) {
> + if (!ttm_resource_compat(bo->resource, placement)) {
>   ret = ttm_bo_move_buffer(bo, placement, ctx);
>   if (ret)
>   return ret;
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c 
> b/drivers/gpu/drm/ttm/ttm_resource.c
> index 2431717376e7..035d71332d18 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -67,6 +67,55 @@ void ttm_resource_free(struct ttm_buffer_object *bo, 
> struct ttm_resource **res)
>  }
>  EXPORT_SYMBOL(ttm_resource_free);
>  
> +static bool ttm_resource_places_compat(struct ttm_resource *res,
> +const struct ttm_place *places,
> +unsigned num_placement)
> +{
> + unsigned i;
> +
> + if (res->placement & TTM_PL_FLAG_TEMPORARY)
> + return false;
> +
> + for (i = 0; i < num_placement; i++) {
> + const struct ttm_place *heap = [i];
> +
> + if (res->start < heap->fpfn || (heap->lpfn &&
> + (res->start + res->num_pages) > heap->lpfn))
> + continue;
> +
> + if ((res->mem_type == heap->mem_type) &&
> + (!(heap->flags & TTM_PL_FLAG_CONTIGUOUS) ||
> +  (res->placement & TTM_PL_FLAG_CONTIGUOUS)))
> + return tr

[PATCH] drm/ttm: fix the type mismatch error on sparc64

2021-09-07 Thread Huang Rui
__fls() on sparc64 return "int", but here it is expected as "unsigned
long" (x86). It will cause the build errors because the warning becomes
fatal while it is using sparc configuration. As suggested by Linus, it
can use min_t instead of min to force the type as "unsigned int".

Suggested-by: Linus Torvalds 
Signed-off-by: Huang Rui 
Cc: Christian König 
---
 drivers/gpu/drm/ttm/ttm_pool.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index af1b41369626..c961a788b519 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -382,7 +382,8 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
else
gfp_flags |= GFP_HIGHUSER;
 
-   for (order = min(MAX_ORDER - 1UL, __fls(num_pages)); num_pages;
+   for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
+num_pages;
 order = min_t(unsigned int, order, __fls(num_pages))) {
bool apply_caching = false;
struct ttm_pool_type *pt;
-- 
2.25.1



Re: [PATCH 3/3] drm: fix the warnning of string style for scheduler trace.

2021-12-19 Thread Huang Rui
A soft reminder. May I know any comments of this patch, just a minor
warning fix?

Thanks,
Ray

On Mon, Dec 13, 2021 at 02:34:22PM +0800, Huang, Ray wrote:
> Use __string(), __assign_str() and __get_str() helpers in the TRACE_EVENT()
> instead of string definitions in gpu scheduler trace.
> 
> [  158.890890] [ cut here ]
> [  158.890899] fmt: 'entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
> job count:%d
>' current_buffer: 'Xorg-1588[001] .   
> 149.391136: drm_sched_job: entity=76f0d517, id=1, 
> fence=8dd56028, ring='
> [  158.890910] WARNING: CPU: 6 PID: 1617 at kernel/trace/trace.c:3830 
> trace_check_vprintf+0x481/0x4a0
> 
> Signed-off-by: Huang Rui 
> ---
>  drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 17 +
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h 
> b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> index 877ce9b127f1..4e397790c195 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> @@ -38,6 +38,7 @@ TRACE_EVENT(drm_sched_job,
>   TP_STRUCT__entry(
>__field(struct drm_sched_entity *, entity)
>__field(struct dma_fence *, fence)
> +  __string(name, sched_job->sched->name)
>__field(const char *, name)
>__field(uint64_t, id)
>__field(u32, job_count)
> @@ -48,14 +49,14 @@ TRACE_EVENT(drm_sched_job,
>  __entry->entity = entity;
>  __entry->id = sched_job->id;
>  __entry->fence = _job->s_fence->finished;
> -__entry->name = sched_job->sched->name;
> +__assign_str(name, sched_job->sched->name);
>  __entry->job_count = 
> spsc_queue_count(>job_queue);
>  __entry->hw_job_count = atomic_read(
>  _job->sched->hw_rq_count);
>  ),
>   TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
> job count:%d",
> __entry->entity, __entry->id,
> -   __entry->fence, __entry->name,
> +   __entry->fence, __get_str(name),
> __entry->job_count, __entry->hw_job_count)
>  );
>  
> @@ -65,7 +66,7 @@ TRACE_EVENT(drm_run_job,
>   TP_STRUCT__entry(
>__field(struct drm_sched_entity *, entity)
>__field(struct dma_fence *, fence)
> -  __field(const char *, name)
> +  __string(name, sched_job->sched->name)
>__field(uint64_t, id)
>__field(u32, job_count)
>__field(int, hw_job_count)
> @@ -75,14 +76,14 @@ TRACE_EVENT(drm_run_job,
>  __entry->entity = entity;
>  __entry->id = sched_job->id;
>  __entry->fence = _job->s_fence->finished;
> -__entry->name = sched_job->sched->name;
> +__assign_str(name, sched_job->sched->name);
>  __entry->job_count = 
> spsc_queue_count(>job_queue);
>  __entry->hw_job_count = atomic_read(
>  _job->sched->hw_rq_count);
>  ),
>   TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
> job count:%d",
> __entry->entity, __entry->id,
> -   __entry->fence, __entry->name,
> +   __entry->fence, __get_str(name),
> __entry->job_count, __entry->hw_job_count)
>  );
>  
> @@ -103,7 +104,7 @@ TRACE_EVENT(drm_sched_job_wait_dep,
>   TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence),
>   TP_ARGS(sched_job, fence),
>   TP_STRUCT__entry(
> -  __field(const char *,name)
> +  __string(name, sched_job->sched->name)
>__field(uint64_t, id)
>__field(struct dma_fence *, fence)
>__field(uint64_t, ctx)
> @@ -111,14 +112,14 @@ TRACE_EVENT(drm_sched_job_wait_dep,
>),
>  
&g

[PATCH 2/3] drm/amdgpu: fix the fence timeline null pointer

2021-12-12 Thread Huang Rui
Initialize the flags for embedded fence in the job at dma_fence_init().
Otherwise, we will go a wrong way in amdgpu_fence_get_timeline_name
callback and trigger a null pointer panic once we enabled the trace event
here.

[  156.131790] BUG: kernel NULL pointer dereference, address: 02a0
[  156.131804] #PF: supervisor read access in kernel mode
[  156.131811] #PF: error_code(0x) - not-present page
[  156.131817] PGD 0 P4D 0
[  156.131824] Oops:  [#1] PREEMPT SMP PTI
[  156.131832] CPU: 6 PID: 1404 Comm: sdma0 Tainted: G   OE 
5.16.0-rc1-custom #1
[  156.131842] Hardware name: Gigabyte Technology Co., Ltd. 
Z170XP-SLI/Z170XP-SLI-CF, BIOS F20 11/04/2016
[  156.131848] RIP: 0010:strlen+0x0/0x20
[  156.131859] Code: 89 c0 c3 0f 1f 80 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 
d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 
74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31
[  156.131872] RSP: 0018:9bd0018dbcf8 EFLAGS: 00010206
[  156.131880] RAX: 02a0 RBX: 8d0305ef01b0 RCX: 000b
[  156.131888] RDX: 8d03772ab924 RSI: 8d0305ef01b0 RDI: 02a0
[  156.131895] RBP: 9bd0018dbd60 R08: 8d03002094d0 R09: 
[  156.131901] R10: 005e R11: 0065 R12: 8d03002094d0
[  156.131907] R13: 001f R14: 00070018 R15: 0007
[  156.131914] FS:  () GS:8d062ed8() 
knlGS:
[  156.131923] CS:  0010 DS:  ES:  CR0: 80050033
[  156.131929] CR2: 02a0 CR3: 1120a005 CR4: 003706e0
[  156.131937] DR0:  DR1:  DR2: 
[  156.131942] DR3:  DR6: fffe0ff0 DR7: 0400
[  156.131949] Call Trace:
[  156.131953]  
[  156.131957]  ? trace_event_raw_event_dma_fence+0xcc/0x200
[  156.131973]  ? ring_buffer_unlock_commit+0x23/0x130
[  156.131982]  dma_fence_init+0x92/0xb0
[  156.131993]  amdgpu_fence_emit+0x10d/0x2b0 [amdgpu]
[  156.132302]  amdgpu_ib_schedule+0x2f9/0x580 [amdgpu]
[  156.132586]  amdgpu_job_run+0xed/0x220 [amdgpu]

Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..e2aa71904278 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -76,7 +76,7 @@ void amdgpu_fence_slab_fini(void)
 /*
  * Cast helper
  */
-static const struct dma_fence_ops amdgpu_fence_ops;
+static struct dma_fence_ops amdgpu_fence_ops;
 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
 {
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
@@ -158,21 +158,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence **f, struct amd
}
 
seq = ++ring->fence_drv.sync_seq;
-   if (job != NULL && job->job_run_counter) {
+   if (job && job->job_run_counter) {
/* reinit seq for resubmitted jobs */
fence->seqno = seq;
+   set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, >flags);
} else {
+   amdgpu_fence_ops.init_flags = 0;
+   if (job)
+   set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT,
+   _fence_ops.init_flags);
+
dma_fence_init(fence, _fence_ops,
>fence_drv.lock,
adev->fence_context + ring->idx,
seq);
}
 
-   if (job != NULL) {
-   /* mark this fence has a parent job */
-   set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, >flags);
-   }
-
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
   seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
@@ -720,7 +721,7 @@ static void amdgpu_fence_release(struct dma_fence *f)
call_rcu(>rcu, amdgpu_fence_free);
 }
 
-static const struct dma_fence_ops amdgpu_fence_ops = {
+static struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
-- 
2.25.1



[PATCH 1/3] dma-buf: make the flags can be configured during dma fence init

2021-12-12 Thread Huang Rui
In some user scenarios, the get_timeline_name callback uses the flags to
decide which way to return the timeline string name. Once the
trace_dma_fence_init event is enabled, it will call get_timeline_name
callback to dump the fence structure. However, at this moment, the flags
are always 0, and it might trigger some issues in get_timeline_name
callback implementation of different gpu driver. So make a member to
initialize the flags in dma_fence_init().

Signed-off-by: Huang Rui 
---
 drivers/dma-buf/dma-fence.c | 2 +-
 include/linux/dma-fence.h   | 7 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 066400ed8841..3e0622bf385f 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -952,7 +952,7 @@ dma_fence_init(struct dma_fence *fence, const struct 
dma_fence_ops *ops,
fence->lock = lock;
fence->context = context;
fence->seqno = seqno;
-   fence->flags = 0UL;
+   fence->flags = ops->init_flags;
fence->error = 0;
 
trace_dma_fence_init(fence);
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 1ea691753bd3..f9270c5bc07a 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -131,6 +131,13 @@ struct dma_fence_ops {
 */
bool use_64bit_seqno;
 
+   /**
+* @init_flags:
+*
+* The initial value of fence flags (A mask of DMA_FENCE_FLAG_* 
defined).
+*/
+   unsigned long init_flags;
+
/**
 * @get_driver_name:
 *
-- 
2.25.1



[PATCH 3/3] drm: fix the warnning of string style for scheduler trace.

2021-12-12 Thread Huang Rui
Use __string(), __assign_str() and __get_str() helpers in the TRACE_EVENT()
instead of string definitions in gpu scheduler trace.

[  158.890890] [ cut here ]
[  158.890899] fmt: 'entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
job count:%d
   ' current_buffer: 'Xorg-1588[001] .   
149.391136: drm_sched_job: entity=76f0d517, id=1, 
fence=8dd56028, ring='
[  158.890910] WARNING: CPU: 6 PID: 1617 at kernel/trace/trace.c:3830 
trace_check_vprintf+0x481/0x4a0

Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h 
b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
index 877ce9b127f1..4e397790c195 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
@@ -38,6 +38,7 @@ TRACE_EVENT(drm_sched_job,
TP_STRUCT__entry(
 __field(struct drm_sched_entity *, entity)
 __field(struct dma_fence *, fence)
+__string(name, sched_job->sched->name)
 __field(const char *, name)
 __field(uint64_t, id)
 __field(u32, job_count)
@@ -48,14 +49,14 @@ TRACE_EVENT(drm_sched_job,
   __entry->entity = entity;
   __entry->id = sched_job->id;
   __entry->fence = _job->s_fence->finished;
-  __entry->name = sched_job->sched->name;
+  __assign_str(name, sched_job->sched->name);
   __entry->job_count = 
spsc_queue_count(>job_queue);
   __entry->hw_job_count = atomic_read(
   _job->sched->hw_rq_count);
   ),
TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
job count:%d",
  __entry->entity, __entry->id,
- __entry->fence, __entry->name,
+ __entry->fence, __get_str(name),
  __entry->job_count, __entry->hw_job_count)
 );
 
@@ -65,7 +66,7 @@ TRACE_EVENT(drm_run_job,
TP_STRUCT__entry(
 __field(struct drm_sched_entity *, entity)
 __field(struct dma_fence *, fence)
-__field(const char *, name)
+__string(name, sched_job->sched->name)
 __field(uint64_t, id)
 __field(u32, job_count)
 __field(int, hw_job_count)
@@ -75,14 +76,14 @@ TRACE_EVENT(drm_run_job,
   __entry->entity = entity;
   __entry->id = sched_job->id;
   __entry->fence = _job->s_fence->finished;
-  __entry->name = sched_job->sched->name;
+  __assign_str(name, sched_job->sched->name);
   __entry->job_count = 
spsc_queue_count(>job_queue);
   __entry->hw_job_count = atomic_read(
   _job->sched->hw_rq_count);
   ),
TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
job count:%d",
  __entry->entity, __entry->id,
- __entry->fence, __entry->name,
+ __entry->fence, __get_str(name),
  __entry->job_count, __entry->hw_job_count)
 );
 
@@ -103,7 +104,7 @@ TRACE_EVENT(drm_sched_job_wait_dep,
TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence),
TP_ARGS(sched_job, fence),
TP_STRUCT__entry(
-__field(const char *,name)
+__string(name, sched_job->sched->name)
 __field(uint64_t, id)
 __field(struct dma_fence *, fence)
 __field(uint64_t, ctx)
@@ -111,14 +112,14 @@ TRACE_EVENT(drm_sched_job_wait_dep,
 ),
 
TP_fast_assign(
-  __entry->name = sched_job->sched->name;
+  __assign_str(name, sched_job->sched->name);
   __entry->id = sched_job->id;
   __entry->fence = fence;
   __entry->ctx = fence->context;
   __entry->seqno = fence->seqno;
   ),
TP_printk("

[PATCH] drm/amdgpu: fix mismatch warning between the prototype and function name

2021-12-15 Thread Huang Rui
Fix the typo to align with the prototype and function name.

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c:631: warning: expecting
prototype for amdgpu_fence_clear_job_fences(). Prototype was for
amdgpu_fence_driver_clear_job_fences() instead

Reported-by: kernel test robot 
Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index db41d16838b9..9afd11ca2709 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -622,7 +622,7 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
 }
 
 /**
- * amdgpu_fence_clear_job_fences - clear job embedded fences of ring
+ * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
  *
  * @ring: fence of the ring to be cleared
  *
-- 
2.25.1



Re: [PATCH v3] drm/amdgpu: introduce new amdgpu_fence object to indicate the job embedded fence

2021-12-15 Thread Huang Rui
On Wed, Dec 15, 2021 at 09:30:31PM +0800, kernel test robot wrote:
> Hi Huang,
> 
> I love your patch! Perhaps something to improve:
> 
> [auto build test WARNING on drm/drm-next]
> [also build test WARNING on drm-intel/for-linux-next drm-tip/drm-tip 
> v5.16-rc5 next-20211214]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit-scm.com%2Fdocs%2Fgit-format-patchdata=04%7C01%7Cray.huang%40amd.com%7C57b8f278a2a14c8dab7908d9bfcf2649%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637751718697946000%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=GHmkohA1RYdTxSAsM%2BGd1QhX%2BOREjXw0xuALuROXd7I%3Dreserved=0]
> 
> url:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2F0day-ci%2Flinux%2Fcommits%2FHuang-Rui%2Fdrm-amdgpu-introduce-new-amdgpu_fence-object-to-indicate-the-job-embedded-fence%2F20211215-143731data=04%7C01%7Cray.huang%40amd.com%7C57b8f278a2a14c8dab7908d9bfcf2649%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637751718697946000%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=LqI6BC9bWIb9iq9BKIpIZ8R%2FYB2m4x3OkytvPxundbw%3Dreserved=0
> base:   git://anongit.freedesktop.org/drm/drm drm-next
> config: x86_64-allyesconfig 
> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdownload.01.org%2F0day-ci%2Farchive%2F20211215%2F202112152115.sqAqnvG7-lkp%40intel.com%2Fconfigdata=04%7C01%7Cray.huang%40amd.com%7C57b8f278a2a14c8dab7908d9bfcf2649%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637751718697946000%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=5fJoEJtN2294YWevl8ys2VTHWh1AgaYtDOKuvg3Qi6w%3Dreserved=0)
> compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
> reproduce (this is a W=1 build):
> # 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2F0day-ci%2Flinux%2Fcommit%2Fa47becf231b123760625c45242e89f5e5b5b4915data=04%7C01%7Cray.huang%40amd.com%7C57b8f278a2a14c8dab7908d9bfcf2649%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637751718697946000%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=qbMYgGRhW%2BTUqKoJAodp5V2VNybzmekZLCinmUtQhho%3Dreserved=0
> git remote add linux-review 
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2F0day-ci%2Flinuxdata=04%7C01%7Cray.huang%40amd.com%7C57b8f278a2a14c8dab7908d9bfcf2649%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637751718697946000%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000sdata=eX6JF%2FjinJTZj9CzP4tvTA3Chd8NODf85oNlSdCpq14%3Dreserved=0
> git fetch --no-tags linux-review 
> Huang-Rui/drm-amdgpu-introduce-new-amdgpu_fence-object-to-indicate-the-job-embedded-fence/20211215-143731
> git checkout a47becf231b123760625c45242e89f5e5b5b4915
> # save the config file to linux build tree
> mkdir build_dir
> make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash 
> drivers/gpu/drm/amd/amdgpu/
> 
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot 
> 
> All warnings (new ones prefixed by >>):
> 
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c:631: warning: expecting 
> >> prototype for amdgpu_fence_clear_job_fences(). Prototype was for 
> >> amdgpu_fence_driver_clear_job_fences() instead
> 

Nice catch! Thank you. It's my typo and will fix this.

Thanks,
Ray

> 
> vim +631 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> 
>623
>624/**
>625 * amdgpu_fence_clear_job_fences - clear job embedded fences of 
> ring
>626 *
>627 * @ring: fence of the ring to be cleared
>628 *
>629 */
>630void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring 
> *ring)
>  > 631{
>632int i;
>633struct dma_fence *old, **ptr;
>634
>635for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
>636ptr = >fence_drv.fences[i];
>637old = rcu_dereference_protected(*ptr, 1);
>638if (old && old->ops == _job_fence_ops)
>639RCU_INIT_POINTER(*ptr, NULL);
>640}
>641}
>642
> 
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://nam11.safe

[PATCH v2] drm/amdgpu: introduce new amdgpu_fence object to indicate the job embedded fence

2021-12-14 Thread Huang Rui
The job embedded fence donesn't initialize the flags at
dma_fence_init(). Then we will go a wrong way in
amdgpu_fence_get_timeline_name callback and trigger a null pointer panic
once we enabled the trace event here. So introduce new amdgpu_fence
object to indicate the job embedded fence.

[  156.131790] BUG: kernel NULL pointer dereference, address: 02a0
[  156.131804] #PF: supervisor read access in kernel mode
[  156.131811] #PF: error_code(0x) - not-present page
[  156.131817] PGD 0 P4D 0
[  156.131824] Oops:  [#1] PREEMPT SMP PTI
[  156.131832] CPU: 6 PID: 1404 Comm: sdma0 Tainted: G   OE 
5.16.0-rc1-custom #1
[  156.131842] Hardware name: Gigabyte Technology Co., Ltd. 
Z170XP-SLI/Z170XP-SLI-CF, BIOS F20 11/04/2016
[  156.131848] RIP: 0010:strlen+0x0/0x20
[  156.131859] Code: 89 c0 c3 0f 1f 80 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 
d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 
74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31
[  156.131872] RSP: 0018:9bd0018dbcf8 EFLAGS: 00010206
[  156.131880] RAX: 02a0 RBX: 8d0305ef01b0 RCX: 000b
[  156.131888] RDX: 8d03772ab924 RSI: 8d0305ef01b0 RDI: 02a0
[  156.131895] RBP: 9bd0018dbd60 R08: 8d03002094d0 R09: 
[  156.131901] R10: 005e R11: 0065 R12: 8d03002094d0
[  156.131907] R13: 001f R14: 00070018 R15: 0007
[  156.131914] FS:  () GS:8d062ed8() 
knlGS:
[  156.131923] CS:  0010 DS:  ES:  CR0: 80050033
[  156.131929] CR2: 02a0 CR3: 1120a005 CR4: 003706e0
[  156.131937] DR0:  DR1:  DR2: 
[  156.131942] DR3:  DR6: fffe0ff0 DR7: 0400
[  156.131949] Call Trace:
[  156.131953]  
[  156.131957]  ? trace_event_raw_event_dma_fence+0xcc/0x200
[  156.131973]  ? ring_buffer_unlock_commit+0x23/0x130
[  156.131982]  dma_fence_init+0x92/0xb0
[  156.131993]  amdgpu_fence_emit+0x10d/0x2b0 [amdgpu]
[  156.132302]  amdgpu_ib_schedule+0x2f9/0x580 [amdgpu]
[  156.132586]  amdgpu_job_run+0xed/0x220 [amdgpu]

Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h|   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 117 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |   3 -
 4 files changed, 80 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9f017663ac50..fcaf6e9703f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -444,6 +444,7 @@ struct amdgpu_sa_bo {
 
 int amdgpu_fence_slab_init(void);
 void amdgpu_fence_slab_fini(void);
+bool is_job_embedded_fence(struct dma_fence *f);
 
 /*
  * IRQS.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5625f7736e37..444a19eb2248 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4483,9 +4483,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
 
ptr = >fence_drv.fences[j];
old = rcu_dereference_protected(*ptr, 1);
-   if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, 
>flags)) {
+   if (old && is_job_embedded_fence(old))
RCU_INIT_POINTER(*ptr, NULL);
-   }
}
/* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
amdgpu_fence_driver_force_completion(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..3a81249b5660 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -77,16 +77,28 @@ void amdgpu_fence_slab_fini(void)
  * Cast helper
  */
 static const struct dma_fence_ops amdgpu_fence_ops;
+static const struct dma_fence_ops amdgpu_job_fence_ops;
 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
 {
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
 
-   if (__f->base.ops == _fence_ops)
+   if (__f->base.ops == _fence_ops ||
+   __f->base.ops == _job_fence_ops)
return __f;
 
return NULL;
 }
 
+bool is_job_embedded_fence(struct dma_fence *f)
+{
+   struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+   if (__f->base.ops == _job_fence_ops)
+   return true;
+
+   return false;
+}
+
 /**
  * amdgpu_fence_write - write a fence value
  *
@@ -158,19 +170,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
dma_fence

[PATCH v3] drm/amdgpu: introduce new amdgpu_fence object to indicate the job embedded fence

2021-12-14 Thread Huang Rui
The job embedded fence donesn't initialize the flags at
dma_fence_init(). Then we will go a wrong way in
amdgpu_fence_get_timeline_name callback and trigger a null pointer panic
once we enabled the trace event here. So introduce new amdgpu_fence
object to indicate the job embedded fence.

[  156.131790] BUG: kernel NULL pointer dereference, address: 02a0
[  156.131804] #PF: supervisor read access in kernel mode
[  156.131811] #PF: error_code(0x) - not-present page
[  156.131817] PGD 0 P4D 0
[  156.131824] Oops:  [#1] PREEMPT SMP PTI
[  156.131832] CPU: 6 PID: 1404 Comm: sdma0 Tainted: G   OE 
5.16.0-rc1-custom #1
[  156.131842] Hardware name: Gigabyte Technology Co., Ltd. 
Z170XP-SLI/Z170XP-SLI-CF, BIOS F20 11/04/2016
[  156.131848] RIP: 0010:strlen+0x0/0x20
[  156.131859] Code: 89 c0 c3 0f 1f 80 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 
d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 
74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31
[  156.131872] RSP: 0018:9bd0018dbcf8 EFLAGS: 00010206
[  156.131880] RAX: 02a0 RBX: 8d0305ef01b0 RCX: 000b
[  156.131888] RDX: 8d03772ab924 RSI: 8d0305ef01b0 RDI: 02a0
[  156.131895] RBP: 9bd0018dbd60 R08: 8d03002094d0 R09: 
[  156.131901] R10: 005e R11: 0065 R12: 8d03002094d0
[  156.131907] R13: 001f R14: 00070018 R15: 0007
[  156.131914] FS:  () GS:8d062ed8() 
knlGS:
[  156.131923] CS:  0010 DS:  ES:  CR0: 80050033
[  156.131929] CR2: 02a0 CR3: 1120a005 CR4: 003706e0
[  156.131937] DR0:  DR1:  DR2: 
[  156.131942] DR3:  DR6: fffe0ff0 DR7: 0400
[  156.131949] Call Trace:
[  156.131953]  
[  156.131957]  ? trace_event_raw_event_dma_fence+0xcc/0x200
[  156.131973]  ? ring_buffer_unlock_commit+0x23/0x130
[  156.131982]  dma_fence_init+0x92/0xb0
[  156.131993]  amdgpu_fence_emit+0x10d/0x2b0 [amdgpu]
[  156.132302]  amdgpu_ib_schedule+0x2f9/0x580 [amdgpu]
[  156.132586]  amdgpu_job_run+0xed/0x220 [amdgpu]

Signed-off-by: Huang Rui 
---

V1 -> V2: add another amdgpu_fence_ops which is for job-embedded fence.
V2 -> V3: use amdgpu_fence_driver_clear_job_fences abstract the job fence
clearing operation.

---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  11 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  | 126 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |   4 +-
 3 files changed, 90 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5625f7736e37..fecf7a09e5a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4456,7 +4456,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 struct amdgpu_reset_context *reset_context)
 {
-   int i, j, r = 0;
+   int i, r = 0;
struct amdgpu_job *job = NULL;
bool need_full_reset =
test_bit(AMDGPU_NEED_FULL_RESET, _context->flags);
@@ -4478,15 +4478,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device 
*adev,
 
/*clear job fence from fence drv to avoid force_completion
 *leave NULL and vm flush fence in fence drv */
-   for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) {
-   struct dma_fence *old, **ptr;
+   amdgpu_fence_driver_clear_job_fences(ring);
 
-   ptr = >fence_drv.fences[j];
-   old = rcu_dereference_protected(*ptr, 1);
-   if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, 
>flags)) {
-   RCU_INIT_POINTER(*ptr, NULL);
-   }
-   }
/* after all hw jobs are reset, hw fence is meaningless, so 
force_completion */
amdgpu_fence_driver_force_completion(ring);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3b7e86ea7167..db41d16838b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -77,11 +77,13 @@ void amdgpu_fence_slab_fini(void)
  * Cast helper
  */
 static const struct dma_fence_ops amdgpu_fence_ops;
+static const struct dma_fence_ops amdgpu_job_fence_ops;
 static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
 {
struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
 
-   if (__f->base.ops == _fence_ops)
+   if (__f->base.ops == _fence_ops ||
+   __f->base.ops == _job_fence_ops)
r

Re: [PATCH 3/3] drm: fix the warnning of string style for scheduler trace.

2022-01-19 Thread Huang Rui
Ping~

Seems this patch is missed.

Thanks,
Ray

On Mon, Dec 13, 2021 at 02:34:22PM +0800, Huang, Ray wrote:
> Use __string(), __assign_str() and __get_str() helpers in the TRACE_EVENT()
> instead of string definitions in gpu scheduler trace.
> 
> [  158.890890] [ cut here ]
> [  158.890899] fmt: 'entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
> job count:%d
>' current_buffer: 'Xorg-1588[001] .   
> 149.391136: drm_sched_job: entity=76f0d517, id=1, 
> fence=8dd56028, ring='
> [  158.890910] WARNING: CPU: 6 PID: 1617 at kernel/trace/trace.c:3830 
> trace_check_vprintf+0x481/0x4a0
> 
> Signed-off-by: Huang Rui 
> ---
>  drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 17 +
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h 
> b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> index 877ce9b127f1..4e397790c195 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> @@ -38,6 +38,7 @@ TRACE_EVENT(drm_sched_job,
>   TP_STRUCT__entry(
>__field(struct drm_sched_entity *, entity)
>__field(struct dma_fence *, fence)
> +  __string(name, sched_job->sched->name)
>__field(const char *, name)
>__field(uint64_t, id)
>__field(u32, job_count)
> @@ -48,14 +49,14 @@ TRACE_EVENT(drm_sched_job,
>  __entry->entity = entity;
>  __entry->id = sched_job->id;
>  __entry->fence = _job->s_fence->finished;
> -__entry->name = sched_job->sched->name;
> +__assign_str(name, sched_job->sched->name);
>  __entry->job_count = 
> spsc_queue_count(>job_queue);
>  __entry->hw_job_count = atomic_read(
>  _job->sched->hw_rq_count);
>  ),
>   TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
> job count:%d",
> __entry->entity, __entry->id,
> -   __entry->fence, __entry->name,
> +   __entry->fence, __get_str(name),
> __entry->job_count, __entry->hw_job_count)
>  );
>  
> @@ -65,7 +66,7 @@ TRACE_EVENT(drm_run_job,
>   TP_STRUCT__entry(
>__field(struct drm_sched_entity *, entity)
>__field(struct dma_fence *, fence)
> -  __field(const char *, name)
> +  __string(name, sched_job->sched->name)
>__field(uint64_t, id)
>__field(u32, job_count)
>__field(int, hw_job_count)
> @@ -75,14 +76,14 @@ TRACE_EVENT(drm_run_job,
>  __entry->entity = entity;
>  __entry->id = sched_job->id;
>  __entry->fence = _job->s_fence->finished;
> -__entry->name = sched_job->sched->name;
> +__assign_str(name, sched_job->sched->name);
>  __entry->job_count = 
> spsc_queue_count(>job_queue);
>  __entry->hw_job_count = atomic_read(
>  _job->sched->hw_rq_count);
>  ),
>   TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw 
> job count:%d",
> __entry->entity, __entry->id,
> -   __entry->fence, __entry->name,
> +   __entry->fence, __get_str(name),
> __entry->job_count, __entry->hw_job_count)
>  );
>  
> @@ -103,7 +104,7 @@ TRACE_EVENT(drm_sched_job_wait_dep,
>   TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence),
>   TP_ARGS(sched_job, fence),
>   TP_STRUCT__entry(
> -  __field(const char *,name)
> +  __string(name, sched_job->sched->name)
>__field(uint64_t, id)
>__field(struct dma_fence *, fence)
>__field(uint64_t, ctx)
> @@ -111,14 +112,14 @@ TRACE_EVENT(drm_sched_job_wait_dep,
>),
>  
>   TP_fast_assign(
> - 

[PATCH] drm/virtio: add definition for venus capset

2023-09-15 Thread Huang Rui
This definition is used fro qemu, and qemu imports this marco in the
headers to enable venus for virtio gpu. So it should add it even kernel
doesn't use this.

Signed-off-by: Huang Rui 
---

Hi all,

We would like to add a new definition for venus capset, it will be used for
qemu. Please see details on below discussion:

https://lore.kernel.org/qemu-devel/b82982aa-5b9e-481e-9491-b9313877b...@daynix.com/

Thanks,
Ray

 include/uapi/linux/virtio_gpu.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index f556fde07b76..0e21f3998108 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit {
 
 #define VIRTIO_GPU_CAPSET_VIRGL 1
 #define VIRTIO_GPU_CAPSET_VIRGL2 2
+/* 3 is reserved for gfxstream */
+#define VIRTIO_GPU_CAPSET_VENUS 4
 
 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
 struct virtio_gpu_get_capset_info {
-- 
2.25.1



Re: [PATCH v2] drm/virtio: add definitions for gfxstream and venus capset

2023-10-10 Thread Huang Rui
On Tue, Oct 10, 2023 at 06:20:03PM +0800, Dmitry Osipenko wrote:
> Hi,
> 
> On 10/10/23 06:25, Huang Rui wrote:
> > These definitions are used fro qemu, and qemu imports this marco in the
> > headers to enable gfxstream or venus for virtio gpu. So it should add it
> > even kernel doesn't use this.
> > 
> > Signed-off-by: Huang Rui 
> > ---
> > 
> > Changes V1 -> V2:
> > - Add all capsets including gfxstream and venus in kernel header (Dmitry 
> > Osipenko)
> > 
> > v1: https://lore.kernel.org/lkml/20230915105918.3763061-1-ray.hu...@amd.com/
> > 
> >  include/uapi/linux/virtio_gpu.h | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/include/uapi/linux/virtio_gpu.h 
> > b/include/uapi/linux/virtio_gpu.h
> > index f556fde07b76..327792658bdc 100644
> > --- a/include/uapi/linux/virtio_gpu.h
> > +++ b/include/uapi/linux/virtio_gpu.h
> > @@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit {
> >  
> >  #define VIRTIO_GPU_CAPSET_VIRGL 1
> >  #define VIRTIO_GPU_CAPSET_VIRGL2 2
> > +#define VIRTIO_GPU_CAPSET_GFXSTREAM 3
> > +#define VIRTIO_GPU_CAPSET_VENUS 4
> >  
> >  /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
> >  struct virtio_gpu_get_capset_info {
> 
> By the "all" capsets, I meant to pick up all definitions from crosvm.
> There should be VIRTIO_GPU_CAPSET_DRM at minimum, could you please add it?
> 

Sure. Thanks for the reminder.

Best Regards,
Ray


[PATCH v3] drm/virtio: add new virtio gpu capset definitions

2023-10-10 Thread Huang Rui
These definitions are used fro qemu, and qemu imports this marco in the
headers to enable gfxstream, venus, cross domain, and drm (native
context) for virtio gpu. So it should add them even kernel doesn't use
this.

Signed-off-by: Huang Rui 
Reviewed-by: Akihiko Odaki 
---

Changes V1 -> V2:
- Add all capsets including gfxstream and venus in kernel header (Dmitry 
Osipenko)

Changes V2 -> V3:
- Add missed capsets including cross domain and drm (native context)
  (Dmitry Osipenko)

v1: https://lore.kernel.org/lkml/20230915105918.3763061-1-ray.hu...@amd.com/
v2: https://lore.kernel.org/lkml/20231010032553.1138036-1-ray.hu...@amd.com/

 include/uapi/linux/virtio_gpu.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index f556fde07b76..240911c8da31 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -309,6 +309,10 @@ struct virtio_gpu_cmd_submit {
 
 #define VIRTIO_GPU_CAPSET_VIRGL 1
 #define VIRTIO_GPU_CAPSET_VIRGL2 2
+#define VIRTIO_GPU_CAPSET_GFXSTREAM 3
+#define VIRTIO_GPU_CAPSET_VENUS 4
+#define VIRTIO_GPU_CAPSET_CROSS_DOMAIN 5
+#define VIRTIO_GPU_CAPSET_DRM 6
 
 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
 struct virtio_gpu_get_capset_info {
-- 
2.25.1



Re: [PATCH v3] drm/virtio: add new virtio gpu capset definitions

2023-10-10 Thread Huang Rui
On Tue, Oct 10, 2023 at 11:52:14PM +0800, Dmitry Osipenko wrote:
> On 10/10/23 18:40, Dmitry Osipenko wrote:
> > On 10/10/23 16:57, Huang Rui wrote:
> >> These definitions are used fro qemu, and qemu imports this marco in the
> >> headers to enable gfxstream, venus, cross domain, and drm (native
> >> context) for virtio gpu. So it should add them even kernel doesn't use
> >> this.
> >>
> >> Signed-off-by: Huang Rui 
> >> Reviewed-by: Akihiko Odaki 
> >> ---
> >>
> >> Changes V1 -> V2:
> >> - Add all capsets including gfxstream and venus in kernel header (Dmitry 
> >> Osipenko)
> >>
> >> Changes V2 -> V3:
> >> - Add missed capsets including cross domain and drm (native context)
> >>   (Dmitry Osipenko)
> >>
> >> v1: 
> >> https://lore.kernel.org/lkml/20230915105918.3763061-1-ray.hu...@amd.com/
> >> v2: 
> >> https://lore.kernel.org/lkml/20231010032553.1138036-1-ray.hu...@amd.com/
> >>
> >>  include/uapi/linux/virtio_gpu.h | 4 
> >>  1 file changed, 4 insertions(+)
> >>
> >> diff --git a/include/uapi/linux/virtio_gpu.h 
> >> b/include/uapi/linux/virtio_gpu.h
> >> index f556fde07b76..240911c8da31 100644
> >> --- a/include/uapi/linux/virtio_gpu.h
> >> +++ b/include/uapi/linux/virtio_gpu.h
> >> @@ -309,6 +309,10 @@ struct virtio_gpu_cmd_submit {
> >>  
> >>  #define VIRTIO_GPU_CAPSET_VIRGL 1
> >>  #define VIRTIO_GPU_CAPSET_VIRGL2 2
> >> +#define VIRTIO_GPU_CAPSET_GFXSTREAM 3
> > 
> > The GFXSTREAM capset isn't correct, it should be GFXSTREAM_VULKAN in
> > accordance to [1] and [2]. There are more capsets for GFXSTREAM.
> > 
> > [1]
> > https://github.com/google/crosvm/blob/main/rutabaga_gfx/src/rutabaga_utils.rs#L172
> > 
> > [2]
> > https://patchwork.kernel.org/project/qemu-devel/patch/20231006010835.444-7-gurchetansi...@chromium.org/
> 
> Though, maybe those are "rutabaga" capsets that not related to
> virtio-gpu because crosvm has another defs for virtio-gpu capsets [3].
> The DRM capset is oddly missing in [3] and code uses "rutabaga" capset
> for DRM and virtio-gpu.
> 
> [3]
> https://github.com/google/crosvm/blob/main/devices/src/virtio/gpu/protocol.rs#L416

Yes, [3] is the file that I referred to add these capsets definitions. And
it's defined as gfxstream not gfxstream_vulkan.

> 
> Gurchetan, could you please clarify which capsets definitions are
> related to virtio-gpu and gfxstream. The
> GFXSTREAM_VULKAN/GLES/MAGMA/COMPOSER or just the single GFXSTREAM?
> 

Gurchetan, may we have your insight?

Thanks,
Ray

> -- 
> Best regards,
> Dmitry
> 


[PATCH v2] drm/virtio: add definitions for gfxstream and venus capset

2023-10-09 Thread Huang Rui
These definitions are used fro qemu, and qemu imports this marco in the
headers to enable gfxstream or venus for virtio gpu. So it should add it
even kernel doesn't use this.

Signed-off-by: Huang Rui 
---

Changes V1 -> V2:
- Add all capsets including gfxstream and venus in kernel header (Dmitry 
Osipenko)

v1: https://lore.kernel.org/lkml/20230915105918.3763061-1-ray.hu...@amd.com/

 include/uapi/linux/virtio_gpu.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index f556fde07b76..327792658bdc 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit {
 
 #define VIRTIO_GPU_CAPSET_VIRGL 1
 #define VIRTIO_GPU_CAPSET_VIRGL2 2
+#define VIRTIO_GPU_CAPSET_GFXSTREAM 3
+#define VIRTIO_GPU_CAPSET_VENUS 4
 
 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
 struct virtio_gpu_get_capset_info {
-- 
2.25.1



[RFC PATCH 4/5] x86/xen: acpi registers gsi for xen pvh

2023-03-12 Thread Huang Rui
From: Chen Jiqian 

Add acpi_register_gsi_xen_pvh() to register gsi for PVH mode.
In addition to call acpi_register_gsi_ioapic(), it also setup
a map between gsi and vector in hypervisor side. So that,
when dgpu create an interrupt, hypervisor can correctly find
which guest domain to process interrupt by vector.

Signed-off-by: Chen Jiqian 
Signed-off-by: Huang Rui 
---
 arch/x86/include/asm/apic.h  |  7 ++
 arch/x86/include/asm/xen/pci.h   |  5 
 arch/x86/kernel/acpi/boot.c  |  2 +-
 arch/x86/pci/xen.c   | 39 
 drivers/xen/events/events_base.c |  2 ++
 5 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3415321c8240..f3bc5de1f1d4 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -179,6 +179,8 @@ extern bool apic_needs_pit(void);
 
 extern void apic_send_IPI_allbutself(unsigned int vector);
 
+extern int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
+   int trigger, int polarity);
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok 1
@@ -193,6 +195,11 @@ static inline void apic_intr_mode_init(void) { }
 static inline void lapic_assign_system_vectors(void) { }
 static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
 static inline bool apic_needs_pit(void) { return true; }
+static inline int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
+   int trigger, int polarity)
+{
+   return (int)gsi;
+}
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 9015b888edd6..aa8ded61fc2d 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -5,6 +5,7 @@
 #if defined(CONFIG_PCI_XEN)
 extern int __init pci_xen_init(void);
 extern int __init pci_xen_hvm_init(void);
+extern int __init pci_xen_pvh_init(void);
 #define pci_xen 1
 #else
 #define pci_xen 0
@@ -13,6 +14,10 @@ static inline int pci_xen_hvm_init(void)
 {
return -1;
 }
+static inline int pci_xen_pvh_init(void)
+{
+   return -1;
+}
 #endif
 #ifdef CONFIG_XEN_PV_DOM0
 int __init pci_xen_initial_domain(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 907cc98b1938..25ec48dd897e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -718,7 +718,7 @@ static int acpi_register_gsi_pic(struct device *dev, u32 
gsi,
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
+int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int trigger, int polarity)
 {
int irq = gsi;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index b94f727251b6..43b8b6d7147b 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -114,6 +114,38 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, 
u32 gsi,
 false /* no mapping of GSI to PIRQ */);
 }
 
+static int acpi_register_gsi_xen_pvh(struct device *dev, u32 gsi,
+   int trigger, int polarity)
+{
+   int irq;
+   int rc;
+   struct physdev_map_pirq map_irq;
+   struct physdev_setup_gsi setup_gsi;
+
+   irq = acpi_register_gsi_ioapic(dev, gsi, trigger, polarity);
+
+   map_irq.domid = DOMID_SELF;
+   map_irq.type = MAP_PIRQ_TYPE_GSI;
+   map_irq.index = gsi;
+   map_irq.pirq = gsi;
+
+   rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, _irq);
+   if (rc)
+   printk(KERN_ERR "xen map GSI: %u failed %d\n", gsi, rc);
+
+   setup_gsi.gsi = gsi;
+   setup_gsi.triggering = (trigger == ACPI_EDGE_SENSITIVE ? 0 : 1);
+   setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+
+   rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, _gsi);
+   if (rc == -EEXIST)
+   printk(KERN_INFO "Already setup the GSI :%u\n", gsi);
+   else if (rc)
+   printk(KERN_ERR "Failed to setup GSI :%u, err_code:%d\n", gsi, 
rc);
+
+   return irq;
+}
+
 #ifdef CONFIG_XEN_PV_DOM0
 static int xen_register_gsi(u32 gsi, int triggering, int polarity)
 {
@@ -554,6 +586,13 @@ int __init pci_xen_hvm_init(void)
return 0;
 }
 
+int __init pci_xen_pvh_init(void)
+{
+   __acpi_register_gsi = acpi_register_gsi_xen_pvh;
+   __acpi_unregister_gsi = NULL;
+   return 0;
+}
+
 #ifdef CONFIG_XEN_PV_DOM0
 int __init pci_xen_initial_domain(void)
 {
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index c443f04aaad7..48dff0ed9acd 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -2317,6 +2317,8 @@ void __init xen_init_IRQ(void)
xen_init_setup_upcall_vector();
   

[RFC PATCH 2/5] xen/grants: update initialization order of xen grant table

2023-03-12 Thread Huang Rui
The xen grant table will be initialied before parsing the PCI resources,
so xen_alloc_unpopulated_pages() ends up using a range from the PCI
window because Linux hasn't parsed the PCI information yet.

So modify the initialization order to make sure the real PCI resources
are parsed before.

Signed-off-by: Huang Rui 
---
 arch/x86/xen/grant-table.c | 2 +-
 drivers/xen/grant-table.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 1e681bf62561..64a04d1e70f5 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -165,5 +165,5 @@ static int __init xen_pvh_gnttab_setup(void)
 }
 /* Call it _before_ __gnttab_init as we need to initialize the
  * xen_auto_xlat_grant_frames first. */
-core_initcall(xen_pvh_gnttab_setup);
+fs_initcall_sync(xen_pvh_gnttab_setup);
 #endif
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index e1ec725c2819..6382112f3473 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1680,4 +1680,4 @@ static int __gnttab_init(void)
 }
 /* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
  * beforehand to initialize xen_auto_xlat_grant_frames. */
-core_initcall_sync(__gnttab_init);
+rootfs_initcall(__gnttab_init);
-- 
2.25.1



[RFC PATCH 1/5] x86/xen: disable swiotlb for xen pvh

2023-03-12 Thread Huang Rui
Xen PVH is the paravirtualized mode and takes advantage of hardware
virtualization support when possible. It will using the hardware IOMMU
support instead of xen-swiotlb, so disable swiotlb if current domain is
Xen PVH.

Signed-off-by: Huang Rui 
---
 arch/x86/kernel/pci-dma.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 30bbe4abb5d6..f5c73dd18f2a 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -74,6 +74,12 @@ static inline void __init pci_swiotlb_detect(void)
 #ifdef CONFIG_SWIOTLB_XEN
 static void __init pci_xen_swiotlb_init(void)
 {
+   /* Xen PVH domain won't use swiotlb */
+   if (xen_pvh_domain()) {
+   x86_swiotlb_enable = false;
+   return;
+   }
+
if (!xen_initial_domain() && !x86_swiotlb_enable)
return;
x86_swiotlb_enable = true;
@@ -86,7 +92,7 @@ static void __init pci_xen_swiotlb_init(void)
 
 int pci_xen_swiotlb_init_late(void)
 {
-   if (dma_ops == _swiotlb_dma_ops)
+   if (xen_pvh_domain() || dma_ops == _swiotlb_dma_ops)
return 0;
 
/* we can work with the default swiotlb */
-- 
2.25.1



[RFC PATCH 3/5] drm/amdgpu: set passthrough mode for xen pvh/hvm

2023-03-12 Thread Huang Rui
There is an second stage translation between the guest machine address
and host machine address in Xen PVH/HVM. The PCI bar address in the xen
guest kernel are not translated at the second stage on Xen PVH/HVM, so
it's not the real physical address that hardware would like to know, so
we need to set passthrough mode for Xen PVH/HVM as well.

Signed-off-by: Huang Rui 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f2e2cbaa7fde..7b4369eba19d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -743,7 +743,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device 
*adev)
 
if (!reg) {
/* passthrough mode exclus sriov mod */
-   if (is_virtual_machine() && !xen_initial_domain())
+   if (is_virtual_machine() &&
+   !(xen_initial_domain() && xen_pv_domain()))
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
 
-- 
2.25.1



[RFC PATCH 0/5] Add Xen PVH dom0 support for GPU

2023-03-12 Thread Huang Rui
Hi all,

Currently, we are working to add VirtIO GPU and Passthrough GPU support on
Xen. We expected to use HVM on domU and PVH on dom0. The x86 PVH dom0
support needs a few modifications on our APU platform. These functions
requires multiple software components support including kernel, xen, qemu,
mesa, and virglrenderer. Please see the patch series on Xen and QEMU bleow.

Xen: https://lists.xenproject.org/archives/html/xen-devel/2023-03/msg00714.html
QEMU: https://lists.nongnu.org/archive/html/qemu-devel/2023-03/msg03972.html

Kernel part mainly adds the PVH dom0 support:

1) Enable Xen PVH dom0 for AMDGPU

Please check patch 1 to 3, that enable Xen PVH dom0 on amdgpu. Because we
would like to use hardware IOMMU instead of swiotlb for buffer copy, PV
dom0 only supported swiotlb.

There still some workarounds in the kernel need to dig it out like below
https://git.kernel.org/pub/scm/linux/kernel/git/rui/linux.git/commit/?h=upstream-fox-xen=9bee65dd3498dfc6aad283d22ff641198b5c91ed

2) Add PCIe Passthrough (GPU) on Xen PVH dom0

Please check patch 4 to 5, this implements acpi_register_gsi_xen_pvh API to
register GSI for guest domU, amd make a new privcmd to handle the GSI from
the IRQ.

Below are the screenshot of these functions, please take a look.

Passthrough GPU: 
https://drive.google.com/file/d/17onr5gvDK8KM_LniHTSQEI2hGJZlI09L/view?usp=share_link
Venus: 
https://drive.google.com/file/d/1_lPq6DMwHu1JQv7LUUVRx31dBj0HJYcL/view?usp=share_link
Zink: 
https://drive.google.com/file/d/1FxLmKu6X7uJOxx1ZzwOm1yA6IL5WMGzd/view?usp=share_link

Repositories
Kernel: 
https://git.kernel.org/pub/scm/linux/kernel/git/rui/linux.git/log/?h=upstream-fox-xen
Xen: https://gitlab.com/huangrui123/xen/-/commits/upstream-for-xen
QEMU: https://gitlab.com/huangrui123/qemu/-/commits/upstream-for-xen
Mesa: https://gitlab.freedesktop.org/rui/mesa/-/commits/upstream-for-xen
Virglrenderer: 
https://gitlab.freedesktop.org/rui/virglrenderer/-/commits/upstream-for-xen

We are writting the documentation on xen wiki page, and will update it in
feature version.

Thanks,
Ray

Chen Jiqian (2):
  x86/xen: acpi registers gsi for xen pvh
  xen/privcmd: add IOCTL_PRIVCMD_GSI_FROM_IRQ

Huang Rui (3):
  x86/xen: disable swiotlb for xen pvh
  xen/grants: update initialization order of xen grant table
  drm/amdgpu: set passthrough mode for xen pvh/hvm

 arch/x86/include/asm/apic.h  |  7 
 arch/x86/include/asm/xen/pci.h   |  5 +++
 arch/x86/kernel/acpi/boot.c  |  2 +-
 arch/x86/kernel/pci-dma.c|  8 -
 arch/x86/pci/xen.c   | 43 
 arch/x86/xen/grant-table.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 +-
 drivers/xen/events/events_base.c | 39 +
 drivers/xen/grant-table.c|  2 +-
 drivers/xen/privcmd.c| 20 +++
 include/uapi/xen/privcmd.h   |  7 
 include/xen/events.h |  5 +++
 12 files changed, 138 insertions(+), 5 deletions(-)

-- 
2.25.1



[RFC PATCH 5/5] xen/privcmd: add IOCTL_PRIVCMD_GSI_FROM_IRQ

2023-03-12 Thread Huang Rui
From: Chen Jiqian 

When hypervisor get an interrupt, it needs interrupt's
gsi number instead of irq number. Gsi number is unique
in xen, but irq number is only unique in one domain.
So, we need to record the relationship between irq and
gsi when dom0 initialized pci devices, and provide syscall
IOCTL_PRIVCMD_GSI_FROM_IRQ to translate irq to gsi. So
that, we can map pirq successfully in hypervisor side.

Signed-off-by: Chen Jiqian 
Signed-off-by: Huang Rui 
---
 arch/x86/pci/xen.c   |  4 
 drivers/xen/events/events_base.c | 37 
 drivers/xen/privcmd.c| 20 +
 include/uapi/xen/privcmd.h   |  7 ++
 include/xen/events.h |  5 +
 5 files changed, 73 insertions(+)

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 43b8b6d7147b..3237961c7640 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -143,6 +143,10 @@ static int acpi_register_gsi_xen_pvh(struct device *dev, 
u32 gsi,
else if (rc)
printk(KERN_ERR "Failed to setup GSI :%u, err_code:%d\n", gsi, 
rc);
 
+   rc = xen_pvh_add_gsi_irq_map(gsi, irq);
+   if (rc == -EEXIST)
+   printk(KERN_INFO "Already map the GSI :%u and IRQ: %d\n", gsi, 
irq);
+
return irq;
 }
 
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 48dff0ed9acd..39a57fed2de3 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -967,6 +967,43 @@ int xen_irq_from_gsi(unsigned gsi)
 }
 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
 
+int xen_gsi_from_irq(unsigned irq)
+{
+   struct irq_info *info;
+
+   list_for_each_entry(info, _irq_list_head, list) {
+   if (info->type != IRQT_PIRQ)
+   continue;
+
+   if (info->irq == irq)
+   return info->u.pirq.gsi;
+   }
+
+   return -1;
+}
+EXPORT_SYMBOL_GPL(xen_gsi_from_irq);
+
+int xen_pvh_add_gsi_irq_map(unsigned gsi, unsigned irq)
+{
+   int tmp_irq;
+   struct irq_info *info;
+
+   tmp_irq = xen_irq_from_gsi(gsi);
+   if (tmp_irq != -1)
+   return -EEXIST;
+
+   info = kzalloc(sizeof(*info), GFP_KERNEL);
+   if (info == NULL)
+   panic("Unable to allocate metadata for GSI%d\n", gsi);
+
+   info->type = IRQT_PIRQ;
+   info->irq = irq;
+   info->u.pirq.gsi = gsi;
+   list_add_tail(>list, _irq_list_head);
+
+   return 0;
+}
+
 static void __unbind_from_irq(unsigned int irq)
 {
evtchn_port_t evtchn = evtchn_from_irq(irq);
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index e88e8f6f0a33..830e84451731 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "privcmd.h"
 
@@ -833,6 +834,21 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
return rc;
 }
 
+static long privcmd_ioctl_gsi_from_irq(struct file *file, void __user *udata)
+{
+   struct privcmd_gsi_from_irq kdata;
+
+   if (copy_from_user(, udata, sizeof(kdata)))
+   return -EFAULT;
+
+   kdata.gsi = xen_gsi_from_irq(kdata.irq);
+
+   if (copy_to_user(udata, , sizeof(kdata)))
+   return -EFAULT;
+
+   return 0;
+}
+
 static long privcmd_ioctl(struct file *file,
  unsigned int cmd, unsigned long data)
 {
@@ -868,6 +884,10 @@ static long privcmd_ioctl(struct file *file,
ret = privcmd_ioctl_mmap_resource(file, udata);
break;
 
+   case IOCTL_PRIVCMD_GSI_FROM_IRQ:
+   ret = privcmd_ioctl_gsi_from_irq(file, udata);
+   break;
+
default:
break;
}
diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h
index d2029556083e..55fe748bbfd7 100644
--- a/include/uapi/xen/privcmd.h
+++ b/include/uapi/xen/privcmd.h
@@ -98,6 +98,11 @@ struct privcmd_mmap_resource {
__u64 addr;
 };
 
+struct privcmd_gsi_from_irq {
+   __u32 irq;
+   __u32 gsi;
+};
+
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: _hypercall_t
@@ -125,5 +130,7 @@ struct privcmd_mmap_resource {
_IOC(_IOC_NONE, 'P', 6, sizeof(domid_t))
 #define IOCTL_PRIVCMD_MMAP_RESOURCE\
_IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource))
+#define IOCTL_PRIVCMD_GSI_FROM_IRQ \
+   _IOC(_IOC_NONE, 'P', 8, sizeof(struct privcmd_gsi_from_irq))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff --git a/include/xen/events.h b/include/xen/events.h
index 344081e71584..8377d8dfaa71 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -133,6 +133,11 @@ int xen_pirq_from_irq(unsigned irq);
 /* Return the irq allocated to the gsi */
 int xen_irq_from_gsi(unsigned gsi);
 
+/* Return the gsi from irq */
+int xen_gsi_from_irq(unsigned irq);
+
+int xen_pvh

Re: [RFC PATCH 1/5] x86/xen: disable swiotlb for xen pvh

2023-03-14 Thread Huang Rui
On Wed, Mar 15, 2023 at 08:52:30AM +0800, Stefano Stabellini wrote:
> On Mon, 13 Mar 2023, Jan Beulich wrote:
> > On 12.03.2023 13:01, Huang Rui wrote:
> > > Xen PVH is the paravirtualized mode and takes advantage of hardware
> > > virtualization support when possible. It will using the hardware IOMMU
> > > support instead of xen-swiotlb, so disable swiotlb if current domain is
> > > Xen PVH.
> > 
> > But the kernel has no way (yet) to drive the IOMMU, so how can it get
> > away without resorting to swiotlb in certain cases (like I/O to an
> > address-restricted device)?
> 
> I think Ray meant that, thanks to the IOMMU setup by Xen, there is no
> need for swiotlb-xen in Dom0. Address translations are done by the IOMMU
> so we can use guest physical addresses instead of machine addresses for
> DMA. This is a similar case to Dom0 on ARM when the IOMMU is available
> (see include/xen/arm/swiotlb-xen.h:xen_swiotlb_detect, the corresponding
> case is XENFEAT_not_direct_mapped).

Hi Jan, sorry to late reply. We are using the native kernel amdgpu and ttm
driver on Dom0, amdgpu/ttm would like to use IOMMU to allocate coherent
buffers for userptr that map the user space memory to gpu access, however,
swiotlb doesn't support this. In other words, with swiotlb, we only can
handle the buffer page by page.

Thanks,
Ray

> 
> Jurgen, what do you think? Would you rather make xen_swiotlb_detect
> common between ARM and x86?


Re: [PATCH] drm/virtio: add definition for venus capset

2023-12-17 Thread Huang Rui
On Sat, Dec 16, 2023 at 12:45:32AM +0800, Dmitry Osipenko wrote:
> On 11/19/23 06:46, Dmitry Osipenko wrote:
> > On 9/21/23 00:16, Dmitry Osipenko wrote:
> >> On 9/15/23 13:59, Huang Rui wrote:
> >>> This definition is used fro qemu, and qemu imports this marco in the
> >>> headers to enable venus for virtio gpu. So it should add it even kernel
> >>> doesn't use this.
> >>>
> >>> Signed-off-by: Huang Rui 
> >>> ---
> >>>
> >>> Hi all,
> >>>
> >>> We would like to add a new definition for venus capset, it will be used 
> >>> for
> >>> qemu. Please see details on below discussion:
> >>>
> >>> https://lore.kernel.org/qemu-devel/b82982aa-5b9e-481e-9491-b9313877b...@daynix.com/
> >>>
> >>> Thanks,
> >>> Ray
> >>>
> >>>  include/uapi/linux/virtio_gpu.h | 2 ++
> >>>  1 file changed, 2 insertions(+)
> >>>
> >>> diff --git a/include/uapi/linux/virtio_gpu.h 
> >>> b/include/uapi/linux/virtio_gpu.h
> >>> index f556fde07b76..0e21f3998108 100644
> >>> --- a/include/uapi/linux/virtio_gpu.h
> >>> +++ b/include/uapi/linux/virtio_gpu.h
> >>> @@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit {
> >>>  
> >>>  #define VIRTIO_GPU_CAPSET_VIRGL 1
> >>>  #define VIRTIO_GPU_CAPSET_VIRGL2 2
> >>> +/* 3 is reserved for gfxstream */
> >>> +#define VIRTIO_GPU_CAPSET_VENUS 4
> >>
> >> Could you please add all other capsets, so we won't needed to do it
> >> again in the future
> > 
> > I've opened request to update virtio-spec with the corrected/updated
> > capsets https://github.com/oasis-tcs/virtio-spec/issues/182. I'm
> > expecting that it will take some time until spec change will be merged
> > and now leaning to apply this v1 patch to not hold the Venus work.
> > 
> > Gerd, do you have objections? R-b/ack?
> 
> Applied patch to misc-next with edited commit message. Updating spec
> taking much time, not worth to hold this change longer. We'll add the
> rest of capsets later on. Thanks, Rui!
> 

Thank you, Dmitry! I will update corresponding patch in qemu, and send v6
qemu patches.

Best Regards,
Ray



Re: [PATCH V2] drm/ttm: increase ttm pre-fault value to PMD size

2024-06-04 Thread Huang Rui
On Tue, Jun 04, 2024 at 04:49:34PM +0800, Zhu, Lingshan wrote:
> ttm page fault handler ttm_bo_vm_fault_reserved() maps
> TTM_BO_VM_NUM_PREFAULT more pages beforehand
> due to the principle of locality.
> 
> However, on some platform the page faults are more costly, this
> patch intends to increase the number of ttm pre-fault to relieve
> the number of page faults.
> 
> When multiple levels of page table is supported, the new default
> value would be the PMD size, similar to huge page.
> 
> Signed-off-by: Zhu Lingshan 
> Reported-and-tested-by: Li Jingxiang 

Acked-by: Huang Rui 

> ---
>  include/drm/ttm/ttm_bo.h | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 6ccf96c91f3a..ef0f52f56ebc 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -39,7 +39,11 @@
>  #include "ttm_device.h"
>  
>  /* Default number of pre-faulted pages in the TTM fault handler */
> +#if CONFIG_PGTABLE_LEVELS > 2
> +#define TTM_BO_VM_NUM_PREFAULT (1 << (PMD_SHIFT - PAGE_SHIFT))
> +#else
>  #define TTM_BO_VM_NUM_PREFAULT 16
> +#endif
>  
>  struct iosys_map;
>  
> -- 
> 2.45.1
> 


Re: [PATCH] drm/ttm: increase ttm pre-fault value to PMD size

2024-05-29 Thread Huang Rui
On Thu, May 30, 2024 at 11:41:04AM +0800, Zhu, Lingshan wrote:
> ttm page fault handler ttm_bo_vm_fault_reserved() maps
> TTM_BO_VM_NUM_PREFAULT more pages beforehand
> due to the principle of locality.
> 
> However, on some platform the page faults are more costly, this
> patch intends to increase the number of ttm pre-fault to relieve
> the number of page faults.
> 
> When multiple levels of page table is supported, the new default
> value would be the PMD size, similar to huge page.
> 
> Signed-off-by: Zhu Lingshan 

Thanks Lingshan.

I suggested to add reported-by from Jiangxiang like that:

Reported-by: Jingxiang Li 

Jingxiang, could you please test this patch? We expect to have a Tested-by. :-)

Thanks,
Ray

> ---
>  include/drm/ttm/ttm_bo.h | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
> index 6ccf96c91f3a..c20ef44002da 100644
> --- a/include/drm/ttm/ttm_bo.h
> +++ b/include/drm/ttm/ttm_bo.h
> @@ -39,7 +39,11 @@
>  #include "ttm_device.h"
>  
>  /* Default number of pre-faulted pages in the TTM fault handler */
> -#define TTM_BO_VM_NUM_PREFAULT 16
> +#ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
> +#define TTM_BO_VM_NUM_PREFAULT (1 << (PMD_SHIFT - PAGE_SHIFT))
> +#else
> + #define TTM_BO_VM_NUM_PREFAULT 16
> +#endif
>  
>  struct iosys_map;
>  
> -- 
> 2.45.1
> 


<    1   2