[PATCH 1/2] drm/amdgpu/pm: correct the firmware flag address for SMU IP v13.0.4

2022-06-08 Thread Tim Huang
For SMU IP v13.0.4, the smnMP1_FIRMWARE_FLAGS address is different,
we need this to correct the reading address.

Signed-off-by: Tim Huang 
Reviewed-by: Yifan Zhang 
---
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h   |  1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index e3454a876cac..43de0a8d4bd9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -43,6 +43,7 @@
 
 /* address block */
 #define smnMP1_FIRMWARE_FLAGS  0x3010024
+#define smnMP1_V13_0_4_FIRMWARE_FLAGS  0x3010028
 #define smnMP0_FW_INTF 0x30101c0
 #define smnMP1_PUB_CTRL0x3010b14
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index f9c36d294448..fba0b87d01fb 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -273,8 +273,16 @@ int smu_v13_0_check_fw_status(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
uint32_t mp1_fw_flags;
 
-   mp1_fw_flags = RREG32_PCIE(MP1_Public |
-  (smnMP1_FIRMWARE_FLAGS & 0x));
+   switch (adev->ip_versions[MP1_HWIP][0]) {
+   case IP_VERSION(13, 0, 4):
+   mp1_fw_flags = RREG32_PCIE(MP1_Public |
+  (smnMP1_V13_0_4_FIRMWARE_FLAGS & 
0x));
+   break;
+   default:
+   mp1_fw_flags = RREG32_PCIE(MP1_Public |
+  (smnMP1_FIRMWARE_FLAGS & 
0x));
+   break;
+   }
 
if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >>
MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT)
-- 
2.25.1



[PATCH 2/2] drm/amdgpu/pm: remove the repeated EnableGfxImu message sending

2022-06-08 Thread Tim Huang
The EnableGfxImu message will be issued in the set_gfx_power_up_by_imu.

Signed-off-by: Tim Huang 
Reviewed-by: Yifan Zhang 
---
 .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c| 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 196670345552..82d3718d8324 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -219,15 +219,10 @@ static int smu_v13_0_4_system_features_control(struct 
smu_context *smu, bool en)
 {
struct amdgpu_device *adev = smu->adev;
int ret = 0;
-   /* SMU fw need this message to trigger IMU to complete the 
initialization */
-   if (en)
-   ret = smu_cmn_send_smc_msg(smu, SMU_MSG_EnableGfxImu, NULL);
-   else {
-   if (!adev->in_s0ix)
-   ret = smu_cmn_send_smc_msg(smu,
-  SMU_MSG_PrepareMp1ForUnload,
-  NULL);
-   }
+
+   if (!en && !adev->in_s0ix)
+   ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, 
NULL);
+
return ret;
 }
 
-- 
2.25.1



Re: [PATCH 1/8] drm: execution context for GEM buffers v2

2022-06-08 Thread Felix Kuehling



On 2022-05-04 03:47, Christian König wrote:

This adds the infrastructure for an execution context for GEM buffers
which is similar to the existinc TTMs execbuf util and intended to replace
it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
 overhead is unecessary and measureable.

Signed-off-by: Christian König 


I finally got around to catching up with this thread. See some questions 
and comments inline.




---
  Documentation/gpu/drm-mm.rst |  12 ++
  drivers/gpu/drm/Kconfig  |   7 +
  drivers/gpu/drm/Makefile |   2 +
  drivers/gpu/drm/drm_exec.c   | 295 +++
  include/drm/drm_exec.h   | 144 +
  5 files changed, 460 insertions(+)
  create mode 100644 drivers/gpu/drm/drm_exec.c
  create mode 100644 include/drm/drm_exec.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index f32ccce5722d..bf7dd2a78e9b 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -493,6 +493,18 @@ DRM Sync Objects
  .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
 :export:
  
+DRM Execution context

+=
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_exec.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :export:
+
  GPU Scheduler
  =
  
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig

index e88c497fa010..1b35c10df263 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -179,6 +179,12 @@ config DRM_TTM
  GPU memory types. Will be enabled automatically if a device driver
  uses it.
  
+config DRM_EXEC

+   tristate
+   depends on DRM
+   help
+ Execution context for command submissions
+
  config DRM_BUDDY
tristate
depends on DRM
@@ -252,6 +258,7 @@ config DRM_AMDGPU
select DRM_SCHED
select DRM_TTM
select DRM_TTM_HELPER
+   select DRM_EXEC
select POWER_SUPPLY
select HWMON
select BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 15fe3163f822..ee8573b683f3 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -37,6 +37,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += 
drm_panel_orientation_quirks.o
  #
  # Memory-management helpers
  #
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
  
  obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
  
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c

new file mode 100644
index ..ed2106c22786
--- /dev/null
+++ b/drivers/gpu/drm/drm_exec.c
@@ -0,0 +1,295 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * This component mainly abstracts the retry loop necessary for locking
+ * multiple GEM objects while preparing hardware operations (e.g. command
+ * submissions, page table updates etc..).
+ *
+ * If a contention is detected while locking a GEM object the cleanup procedure
+ * unlocks all previously locked GEM objects and locks the contended one first
+ * before locking any further objects.
+ *
+ * After an object is locked fences slots can optionally be reserved on the
+ * dma_resv object inside the GEM object.
+ *
+ * A typical usage pattern should look like this::
+ *
+ * struct drm_gem_object *obj;
+ * struct drm_exec exec;
+ * unsigned long index;
+ * int ret;
+ *
+ * drm_exec_init(, true);
+ * drm_exec_while_not_all_locked() {
+ * ret = drm_exec_prepare_obj(, boA, 1);
+ * drm_exec_continue_on_contention();
+ * if (ret)
+ * goto error;
+ *
+ * ret = drm_exec_lock(, boB, 1);


Where is drm_exec_lock? It's not in this patch.



+ * drm_exec_continue_on_contention();
+ * if (ret)
+ * goto error;
+ * }
+ *
+ * drm_exec_for_each_locked_object(, index, obj) {
+ * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ);
+ * ...
+ * }
+ * drm_exec_fini();
+ *
+ * See struct dma_exec for more details.
+ */
+
+/* Dummy value used to initially enter the retry loop */
+#define DRM_EXEC_DUMMY (void*)~0
+
+/* Initialize the drm_exec_objects container */
+static void drm_exec_objects_init(struct drm_exec_objects *container)
+{
+   container->objects = kmalloc(PAGE_SIZE, GFP_KERNEL);


Should this be kvmalloc? You're using kvrealloc and kvfree elsewhere.



+
+   /* If allocation here fails, just delay that till the first use */
+   container->max_objects = container->objects ?
+   PAGE_SIZE / sizeof(void *) : 0;
+   container->num_objects = 0;
+}
+
+/* Cleanup the drm_exec_objects container */
+static void 

Re: [PATCH 3/8] drm/amdkfd: switch over to using drm_exec

2022-06-08 Thread Felix Kuehling



On 2022-05-04 03:47, Christian König wrote:

Avoids quite a bit of logic and kmalloc overhead.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|   5 +-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 303 +++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  14 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   3 +
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c  |  32 +-
  5 files changed, 152 insertions(+), 205 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4cb14c2fe53f..3f3a994c68e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -25,12 +25,12 @@
  #ifndef AMDGPU_AMDKFD_H_INCLUDED
  #define AMDGPU_AMDKFD_H_INCLUDED
  
+#include 

  #include 
  #include 
  #include 
  #include 
  #include 
-#include 
  #include "amdgpu_sync.h"
  #include "amdgpu_vm.h"
  
@@ -66,8 +66,7 @@ struct kgd_mem {

struct dma_buf *dmabuf;
struct list_head attachments;
/* protected by amdkfd_process_info.lock */
-   struct ttm_validate_buffer validate_list;
-   struct ttm_validate_buffer resv_list;
+   struct list_head validate_list;
uint32_t domain;
unsigned int mapped_to_gpu_memory;
uint64_t va;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 3dc5ab2764ff..64ac4f8f49be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,6 +25,8 @@
  #include 
  #include 
  
+#include 

+
  #include "amdgpu_object.h"
  #include "amdgpu_gem.h"
  #include "amdgpu_vm.h"
@@ -770,28 +772,19 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem 
*mem,
struct amdkfd_process_info *process_info,
bool userptr)
  {
-   struct ttm_validate_buffer *entry = >validate_list;
-   struct amdgpu_bo *bo = mem->bo;
-
-   INIT_LIST_HEAD(>head);
-   entry->num_shared = 1;
-   entry->bo = >tbo;
-   mutex_lock(_info->lock);


You removed mutex_lock, but left mutex_unlock below. Other than that, 
this patch looks reasonable. But my eyes may have glazed over with this 
much churn.


Regards,
  Felix



if (userptr)
-   list_add_tail(>head, _info->userptr_valid_list);
+   list_add_tail(>validate_list,
+ _info->userptr_valid_list);
else
-   list_add_tail(>head, _info->kfd_bo_list);
+   list_add_tail(>validate_list, _info->kfd_bo_list);
mutex_unlock(_info->lock);
  }
  
  static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,

struct amdkfd_process_info *process_info)
  {
-   struct ttm_validate_buffer *bo_list_entry;
-
-   bo_list_entry = >validate_list;
mutex_lock(_info->lock);
-   list_del(_list_entry->head);
+   list_del(>validate_list);
mutex_unlock(_info->lock);
  }
  
@@ -875,13 +868,12 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,

   * object can track VM updates.
   */
  struct bo_vm_reservation_context {
-   struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
-   unsigned int n_vms; /* Number of VMs reserved   */
-   struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
-   struct ww_acquire_ctx ticket;   /* Reservation ticket   */
-   struct list_head list, duplicates;  /* BO lists */
-   struct amdgpu_sync *sync;   /* Pointer to sync object   */
-   bool reserved;  /* Whether BOs are reserved */
+   /* DRM execution context for the reservation */
+   struct drm_exec exec;
+   /* Number of VMs reserved */
+   unsigned int n_vms;
+   /* Pointer to sync object */
+   struct amdgpu_sync *sync;
  };
  
  enum bo_vm_match {

@@ -905,35 +897,24 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
  
  	WARN_ON(!vm);
  
-	ctx->reserved = false;

ctx->n_vms = 1;
ctx->sync = >sync;
-
-   INIT_LIST_HEAD(>list);
-   INIT_LIST_HEAD(>duplicates);
-
-   ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
-   if (!ctx->vm_pd)
-   return -ENOMEM;
-
-   ctx->kfd_bo.priority = 0;
-   ctx->kfd_bo.tv.bo = >tbo;
-   ctx->kfd_bo.tv.num_shared = 1;
-   list_add(>kfd_bo.tv.head, >list);
-
-   amdgpu_vm_get_pd_bo(vm, >list, >vm_pd[0]);
-
-   ret = ttm_eu_reserve_buffers(>ticket, >list,
-false, >duplicates);
-   if (ret) {
-   pr_err("Failed to reserve buffers in ttm.\n");
-   kfree(ctx->vm_pd);
-   ctx->vm_pd = NULL;
-   return ret;
+   drm_exec_init(>exec, true);
+   

Re: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero

2022-06-08 Thread Felix Kuehling

On 2022-06-08 16:03, Errabolu, Ramesh wrote:

[AMD Official Use Only - General]

My response is inline.

Regards,
Ramesh

-Original Message-
From: Kuehling, Felix 
Sent: Thursday, June 9, 2022 1:10 AM
To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh 
Subject: Re: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map 
count goes to zero


On 2022-06-08 07:51, Ramesh Errabolu wrote:

In existing code MMIO and DOORBELL BOs are unpinned without ensuring
the condition that their map count has reached zero. Unpinning without
checking this constraint could lead to an error while BO is being
freed. The patch fixes this issue.

Signed-off-by: Ramesh Errabolu 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++
   1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a1de900ba677..e5dc94b745b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1832,13 +1832,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
   
   	mutex_lock(>lock);
   
-	/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */

-   if (mem->alloc_flags &
-   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
-KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
-   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
-   }
-
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(>lock);
@@ -1855,7 +1848,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Make sure restore workers don't access the BO any more */
bo_list_entry = >validate_list;
mutex_lock(_info->lock);
-   list_del(_list_entry->head);
+   list_del_init(_list_entry->head);

Is this an unrelated fix? What is this needed for? I vaguely remember 
discussing this before, but can't remember the reason.

Ramesh: This fix is unrelated to P2P work. I brought this issue to attention 
while working on IOMMU support on DKMS branch. Basically a user could call 
free() before the map count goes to zero. The patch is trying fix that.


I get that, but I couldn't remember why I suggested list_del_init here. 
It has nothing to do with unpinning of BOs.


Now I recall that it had something to do with restarting the ioctl after 
it was interrupted by a signal. reserve_bo_and_cond_vms can fail with 
-ERESTARTSYS. In that case the ioctl is reentered. We need to make sure 
it doesn't crash the second time around. list_del will remove 
bo_list_entry from the list but leave the pointers dangling. The second 
time around it will probably cause corruption or an oops. Using 
list_del_init avoids that by initializing the prev and next pointers to 
NULL.


See one more little fix below.




Regards,
    Felix



mutex_unlock(_info->lock);
   
   	/* No more MMU notifiers */

@@ -1880,6 +1873,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
   
   	ret = unreserve_bo_and_vms(, false, false);


This unreserve_bo_and_vms call cannot fail because the wait parameter is 
false. If it did fail, the error handling would be broken. I'd add a 
WARN_ONCE to make that assumption explicit, and change the return at the 
end of this function to return 0. Basically, if we got this far, we are 
not turning back, and we should return success.


You could update the commit headline to be more general. Something like: 
Fix error handling in amdgpu_amdkfd_gpuvm_free_memory_of_gpu.


Regards,
  Felix


   
+	/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */

+   if (mem->alloc_flags &
+   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
+   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+
/* Free the sync object */
amdgpu_sync_free(>sync);
   


[pull] amdgpu, amdkfd drm-fixes-5.19

2022-06-08 Thread Alex Deucher
Hi Dave, Daniel,

Fixes for 5.19.

The following changes since commit bf23729c7a5f44f0e863666b9364a64741fd3241:

  Merge tag 'amd-drm-next-5.19-2022-05-26-2' of 
https://gitlab.freedesktop.org/agd5f/linux into drm-next (2022-06-01 10:56:11 
+1000)

are available in the Git repository at:

  https://gitlab.freedesktop.org/agd5f/linux.git 
tags/amd-drm-fixes-5.19-2022-06-08

for you to fetch changes up to 431d071286524bd4f9ba2e46b1be87b479220174:

  drm/amdgpu/mes: only invalid/prime icache when finish loading both pipe MES 
FWs. (2022-06-08 15:39:16 -0400)


amd-drm-fixes-5.19-2022-06-08:

amdgpu:
- DCN 3.1 golden settings fix
- eDP fixes
- DMCUB fixes
- GFX11 fixes and cleanups
- VCN fix for yellow carp
- GMC11 fixes
- RAS fixes
- GPUVM TLB flush fixes
- SMU13 fixes
- VCN3 AV1 regression fix
- VCN2 JPEG fix
- Other misc fixes

amdkfd:
- MMU notifier fix
- Support for more GC 10.3.x families
- Pinned BO handling fix
- Partial migration bug fix


Alex Deucher (1):
  drm/amdgpu: update VCN codec support for Yellow Carp

Alvin (1):
  drm/amd/display: Don't clear ref_dtbclk value

Aric Cyr (1):
  drm/amd/display: 3.2.187

Aurabindo Pillai (1):
  drm/amd/display: remove stale config guards

Candice Li (1):
  drm/amdgpu: Resolve RAS GFX error count issue after cold boot on Arcturus

Christian König (2):
  drm/amdgpu: fix limiting AV1 to the first instance on VCN3
  drm/amdgpu: always flush the TLB on gfx8

Evan Quan (2):
  drm/amd/pm: suppress compile warnings about possible unaligned accesses
  drm/amdgpu: suppress the compile warning about 64 bit type

Guchun Chen (1):
  Revert "drm/amdgpu: Ensure the DMA engine is deactivated during set ups"

Hung, Cruise (1):
  drm/amd/display: Fix DMUB outbox trace in S4 (#4465)

Ilya (1):
  drm/amd/display: Fix possible infinite loop in DP LT fallback

Jesse Zhang (1):
  drm/amdkfd:Fix fw version for 10.3.6

Jiapeng Chong (1):
  drm/amdgpu: make program_imu_rlc_ram static

Joseph Greathouse (1):
  drm/amdgpu: Add MODE register to wave debug info in gfx11

Lang Yu (1):
  drm/amdkfd: add pinned BOs to kfd_bo_list

Leung, Martin (1):
  drm/amd/display: revert Blank eDP on disable/enable drv

Mario Limonciello (1):
  drm/amdkfd: Add GC 10.3.6 and 10.3.7 KFD definitions

Mohammad Zafar Ziya (1):
  drm/amdgpu/jpeg2: Add jpeg vmid update under IB submit

Nicholas Kazlauskas (2):
  drm/amd/display: Pass the new context into disable OTG WA
  Revert "drm/amd/display: Pass the new context into disable OTG WA"

Philip Yang (3):
  drm/amdkfd: Use mmget_not_zero in MMU notifier
  drm/amdgpu: Update PDEs flush TLB if PTB/PDB moved
  drm/amdkfd: Fix partial migration bugs

Roman Li (1):
  drm/amdgpu: fix aper_base for APU

Sherry Wang (1):
  drm/amd/display: Read Golden Settings Table from VBIOS

Stanley.Yang (1):
  drm/amdgpu: fix ras supported check

Sunil Khatri (1):
  drm/amdgpu: enable tmz by default for GC 10.3.7

Yifan Zhang (1):
  drm/amdgpu/mes: only invalid/prime icache when finish loading both pipe 
MES FWs.

hengzhou (1):
  drm/amd/display: Wait DMCUB to idle state before reset.

sunliming (2):
  drm/amdgpu: fix a missing break in gfx_v11_0_handle_priv_fault
  drm/amdgpu: make gfx_v11_0_rlc_stop static

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c   |  13 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c|   9 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c|   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c|  32 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  13 ++-
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c |   6 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c |   6 ++
 drivers/gpu/drm/amd/amdgpu/imu_v11_0.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c |   6 +-
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h |   1 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c |  36 ---
 drivers/gpu/drm/amd/amdgpu/nv.c|   1 +
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 +
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c  |  17 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c  |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_device.c|  18 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c   |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   |   5 +-
 .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c   |  11 ++-
 .../amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h   |   2 +
 .../amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c |   7 +-
 .../amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c |   3 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c   | 106 +---
 drivers/gpu/drm/amd/display/dc/dc.h|   5 +-
 

RE: [PATCH 2/4] drm/amdkfd: Pass MES/RS64 information to sysfs

2022-06-08 Thread Sider, Graham
[AMD Official Use Only - General]

I was originally using the MES scheduler version in Thunk to check whether to 
use userptr or non-paged memory for the queue allocation. Just today though 
I've changed this to always use non-paged so this isn't needed anymore. Maybe 
instead we can think about adding this to debugfs, I agree.

Thanks,
Graham 

-Original Message-
From: Kuehling, Felix  
Sent: Wednesday, June 8, 2022 3:31 PM
To: Sider, Graham ; amd-gfx@lists.freedesktop.org
Cc: Yang, Philip ; Joshi, Mukul 
Subject: Re: [PATCH 2/4] drm/amdkfd: Pass MES/RS64 information to sysfs

On 2022-06-07 18:50, Graham Sider wrote:
> Make MES/RS64 CP enablement and MES scheduler/MES KIQ versions 
> available through sysfs.
>
> Signed-off-by: Graham Sider 

Why do we need to expose this to user mode applications? They don't interact 
directly with the scheduler or the KIQ. I cannot think of any reasonable 
conditions in usermode that would need the scheduler or KIQ version number. It 
may make sense in debugfs, but not here.

Regards,
   Felix


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 
>   1 file changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 02efae4f5549..51c8a285baaf 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -571,6 +571,14 @@ static ssize_t node_show(struct kobject *kobj, struct 
> attribute *attr,
> dev->gpu->sdma_fw_version);
>   sysfs_show_64bit_prop(buffer, offs, "unique_id",
> dev->gpu->adev->unique_id);
> + sysfs_show_32bit_prop(buffer, offs, "mes_enabled",
> +   (uint32_t)dev->gpu->adev->enable_mes);
> + sysfs_show_32bit_prop(buffer, offs, "rs64_cp_enabled",
> +   
> (uint32_t)dev->gpu->adev->gfx.rs64_enable);
> + sysfs_show_32bit_prop(buffer, offs, "mes_sched_version",
> +   dev->gpu->adev->mes.sched_version);
> + sysfs_show_32bit_prop(buffer, offs, "mes_kiq_version",
> +   dev->gpu->adev->mes.kiq_version);
>   
>   }
>   


RE: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero

2022-06-08 Thread Errabolu, Ramesh
[AMD Official Use Only - General]

My response is inline.

Regards,
Ramesh

-Original Message-
From: Kuehling, Felix  
Sent: Thursday, June 9, 2022 1:10 AM
To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh 
Subject: Re: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map 
count goes to zero


On 2022-06-08 07:51, Ramesh Errabolu wrote:
> In existing code MMIO and DOORBELL BOs are unpinned without ensuring 
> the condition that their map count has reached zero. Unpinning without 
> checking this constraint could lead to an error while BO is being 
> freed. The patch fixes this issue.
>
> Signed-off-by: Ramesh Errabolu 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++
>   1 file changed, 7 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index a1de900ba677..e5dc94b745b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1832,13 +1832,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   
>   mutex_lock(>lock);
>   
> - /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
> - if (mem->alloc_flags &
> - (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> -  KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
> - amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
> - }
> -
>   mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
>   is_imported = mem->is_imported;
>   mutex_unlock(>lock);
> @@ -1855,7 +1848,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   /* Make sure restore workers don't access the BO any more */
>   bo_list_entry = >validate_list;
>   mutex_lock(_info->lock);
> - list_del(_list_entry->head);
> + list_del_init(_list_entry->head);

Is this an unrelated fix? What is this needed for? I vaguely remember 
discussing this before, but can't remember the reason.

Ramesh: This fix is unrelated to P2P work. I brought this issue to attention 
while working on IOMMU support on DKMS branch. Basically a user could call 
free() before the map count goes to zero. The patch is trying fix that.

Regards,
   Felix


>   mutex_unlock(_info->lock);
>   
>   /* No more MMU notifiers */
> @@ -1880,6 +1873,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   
>   ret = unreserve_bo_and_vms(, false, false);
>   
> + /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
> + if (mem->alloc_flags &
> + (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +  KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> + amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
> +
>   /* Free the sync object */
>   amdgpu_sync_free(>sync);
>   


Re: [PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device

2022-06-08 Thread Felix Kuehling

On 2022-06-08 07:51, Ramesh Errabolu wrote:

The field is redundant and does not serve any functional role

Signed-off-by: Ramesh Errabolu 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 --
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 -
  drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 -
  3 files changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cbfb32b3d235..a5409531a2fd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
  
-			dev->io_link_count++;

dev->node_props.io_links_count++;
list_add_tail(>list, >io_link_props);
break;
@@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
-   to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(>list, _dev->io_link_props);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3e240b22ec91..304322ac39e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1819,7 +1819,6 @@ static void kfd_topology_update_io_links(int 
proximity_domain)
 */
if (iolink->node_to == proximity_domain) {
list_del(>list);
-   dev->io_link_count--;
dev->node_props.io_links_count--;
} else {
if (iolink->node_from > proximity_domain)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 2fb5664e1041..9f6c949186c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -130,7 +130,6 @@ struct kfd_topology_device {
struct list_headmem_props;
uint32_tcache_count;
struct list_headcache_props;
-   uint32_tio_link_count;
struct list_headio_link_props;
struct list_headp2p_link_props;
struct list_headperf_props;


Re: [PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero

2022-06-08 Thread Felix Kuehling



On 2022-06-08 07:51, Ramesh Errabolu wrote:

In existing code MMIO and DOORBELL BOs are unpinned without ensuring the
condition that their map count has reached zero. Unpinning without checking
this constraint could lead to an error while BO is being freed. The patch
fixes this issue.

Signed-off-by: Ramesh Errabolu 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++
  1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a1de900ba677..e5dc94b745b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1832,13 +1832,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
  
  	mutex_lock(>lock);
  
-	/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */

-   if (mem->alloc_flags &
-   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
-KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
-   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
-   }
-
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(>lock);
@@ -1855,7 +1848,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Make sure restore workers don't access the BO any more */
bo_list_entry = >validate_list;
mutex_lock(_info->lock);
-   list_del(_list_entry->head);
+   list_del_init(_list_entry->head);


Is this an unrelated fix? What is this needed for? I vaguely remember 
discussing this before, but can't remember the reason.


Regards,
  Felix



mutex_unlock(_info->lock);
  
  	/* No more MMU notifiers */

@@ -1880,6 +1873,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
  
  	ret = unreserve_bo_and_vms(, false, false);
  
+	/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */

+   if (mem->alloc_flags &
+   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
+   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+
/* Free the sync object */
amdgpu_sync_free(>sync);
  


Re: [PATCH 2/4] drm/amdkfd: Pass MES/RS64 information to sysfs

2022-06-08 Thread Felix Kuehling

On 2022-06-07 18:50, Graham Sider wrote:

Make MES/RS64 CP enablement and MES scheduler/MES KIQ versions available
through sysfs.

Signed-off-by: Graham Sider 


Why do we need to expose this to user mode applications? They don't 
interact directly with the scheduler or the KIQ. I cannot think of any 
reasonable conditions in usermode that would need the scheduler or KIQ 
version number. It may make sense in debugfs, but not here.


Regards,
  Felix



---
  drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 02efae4f5549..51c8a285baaf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -571,6 +571,14 @@ static ssize_t node_show(struct kobject *kobj, struct 
attribute *attr,
  dev->gpu->sdma_fw_version);
sysfs_show_64bit_prop(buffer, offs, "unique_id",
  dev->gpu->adev->unique_id);
+   sysfs_show_32bit_prop(buffer, offs, "mes_enabled",
+ (uint32_t)dev->gpu->adev->enable_mes);
+   sysfs_show_32bit_prop(buffer, offs, "rs64_cp_enabled",
+ 
(uint32_t)dev->gpu->adev->gfx.rs64_enable);
+   sysfs_show_32bit_prop(buffer, offs, "mes_sched_version",
+ dev->gpu->adev->mes.sched_version);
+   sysfs_show_32bit_prop(buffer, offs, "mes_kiq_version",
+ dev->gpu->adev->mes.kiq_version);
  
  	}
  


RE: [PATCH 07/16] drm/amd/display: Cap OLED brightness per max frame-average luminance

2022-06-08 Thread Li, Roman
[AMD Official Use Only - General]

Hi Aaron,

Yes, the panel brightness should reach max after we send max-fall value as 
backlight current peak.

Thanks,
Roman

> -Original Message-
> From: Aaron Ma 
> Sent: Wednesday, June 8, 2022 6:02 AM
> To: Mahfooz, Hamza ; Li, Roman
> 
> Cc: Pillai, Aurabindo ; Lakha, Bhawanpreet
> ; Wentland, Harry
> ; Siqueira, Rodrigo
> ; Li, Sun peng (Leo) ;
> Gutierrez, Agustin ; amd-
> g...@lists.freedesktop.org; Zuo, Jerry ; Kotarac, Pavle
> ; Zhuo, Qingqing (Lillian)
> ; Chiu, Solomon ;
> Wang, Chao-kai (Stylon) ; Lin, Wayne
> 
> Subject: Re: [PATCH 07/16] drm/amd/display: Cap OLED brightness per max
> frame-average luminance
>
> Hi Roman:
>
> Can the panel achieve the max peak luminance if it is limited in frame-average
> luminance?
>
> Regards,
> Aaron


Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update

2022-06-08 Thread Aurabindo Pillai



On 2022-06-08 13:47, Leo wrote:



On 2022-06-08 13:25, Pillai, Aurabindo wrote:

[AMD Official Use Only - General]


Isnt it cleaner to revert the original patch which removed the temporary 
variable and then instead allocate the clock_limits array on heap, and later 
freed at the end of the function?

--


I don't quite recall the details, but in FPU context, we should not malloc 
since it can fault/sleep. More info here: 
https://yarchive.net/comp/linux/kernel_fp.html


It looks like we dont have access to bw_ctx and hence the VBA struct 
inside the callback to update bounding box. If we can add that, then we 
could replicate what we did within DML functions like 
dml32_ModeSupportAndSystemConfigurationFull() to reduce the stack usage.


See the struct define:


struct dml32_ModeSupportAndSystemConfigurationFull {
unsigned int dummy_integer_array[8][DC__NUM_DPP__MAX];
double dummy_double_array[2][DC__NUM_DPP__MAX];
DmlPipe SurfParameters[DC__NUM_DPP__MAX];
double dummy_single[5];
double dummy_single2[5];
SOCParametersList mSOCParameters;
unsigned int MaximumSwathWidthSupportLuma;
unsigned int MaximumSwathWidthSupportChroma;
};

struct dummy_vars {
struct 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation


DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation;
struct dml32_ModeSupportAndSystemConfigurationFull 
dml32_ModeSupportAndSystemConfigurationFull;

};

We define the dummy vars needed in the format of 
function_name.variable_name. dummy_vars is added to vba struct and this 
can be access from bw_ctx.



Or we could also add a similar dummy struct to the soc resources 
(dcn2_1_soc etc) and add the temporary variable there. This way, we have 
a consistent solution for reducing stack usage.



- Leo



Regards,
Jay
--
*From:* Li, Sun peng (Leo) 
*Sent:* Wednesday, June 8, 2022 12:48 PM
*To:* amd-gfx@lists.freedesktop.org 
*Cc:* Wentland, Harry ; Pillai, Aurabindo 
; Siqueira, Rodrigo ; Li, Sun peng 
(Leo) 
*Subject:* [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding 
box update
  
From: Leo Li 


[Why]

There is a theoretical problem in prior patches for reducing the stack
size of *update_bw_bounding_box() functions.

By modifying the soc.clock_limits[n] struct directly, this can cause
unintended behavior as the for loop attempts to swap rows in
clock_limits[n]. A temporary struct is still required to make sure we
stay functinoally equivalent.

[How]

Add a temporary clock_limits table to the SOC struct, and use it when
swapping rows.

Signed-off-by: Leo Li 
---
  .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 33 +-
  .../amd/display/dc/dml/dcn301/dcn301_fpu.c    | 36 ++-
  .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 64 +++
  .../amd/display/dc/dml/display_mode_structs.h |  5 ++
  4 files changed, 82 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index c2fec0d85da4..e247b2270b1d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
  
  ASSERT(clk_table->num_entries);

  /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over 
null states later */
-   for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
-   dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i];
-   }
+   memcpy(_1_soc._clock_tmp, _1_soc.clock_limits,
+  sizeof(dcn2_1_soc.clock_limits));
  
  for (i = 0; i < clk_table->num_entries; i++) {

  /* loop backwards*/
@@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
  if (i == 1)
    

Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update

2022-06-08 Thread Harry Wentland



On 2022-06-08 12:48, sunpeng...@amd.com wrote:
> From: Leo Li 
> 
> [Why]
> 
> There is a theoretical problem in prior patches for reducing the stack
> size of *update_bw_bounding_box() functions.
> 
> By modifying the soc.clock_limits[n] struct directly, this can cause
> unintended behavior as the for loop attempts to swap rows in
> clock_limits[n]. A temporary struct is still required to make sure we
> stay functinoally equivalent.
> 
> [How]
> 
> Add a temporary clock_limits table to the SOC struct, and use it when
> swapping rows.
> 
> Signed-off-by: Leo Li 
> ---
>  .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 33 +-
>  .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 36 ++-
>  .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 64 +++
>  .../amd/display/dc/dml/display_mode_structs.h |  5 ++
>  4 files changed, 82 insertions(+), 56 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
> index c2fec0d85da4..e247b2270b1d 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
> @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
> clk_bw_params *bw_params
>  
>   ASSERT(clk_table->num_entries);
>   /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over 
> null states later */
> - for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
> - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i];
> - }

Hmm, this for loop didn't make sense. I gave my RB for the previous patch too 
quickly.

> + memcpy(_1_soc._clock_tmp, _1_soc.clock_limits,
> +sizeof(dcn2_1_soc.clock_limits));
>  
>   for (i = 0; i < clk_table->num_entries; i++) {
>   /* loop backwards*/
> @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, 
> struct clk_bw_params *bw_params
>   if (i == 1)
>   k++;
>  
> - dcn2_1_soc.clock_limits[k].state = k;
> - dcn2_1_soc.clock_limits[k].dcfclk_mhz = 
> clk_table->entries[i].dcfclk_mhz;
> - dcn2_1_soc.clock_limits[k].fabricclk_mhz = 
> clk_table->entries[i].fclk_mhz;
> - dcn2_1_soc.clock_limits[k].socclk_mhz = 
> clk_table->entries[i].socclk_mhz;
> - dcn2_1_soc.clock_limits[k].dram_speed_mts = 
> clk_table->entries[i].memclk_mhz * 2;
> + dcn2_1_soc._clock_tmp[k].state = k;
> + dcn2_1_soc._clock_tmp[k].dcfclk_mhz = 
> clk_table->entries[i].dcfclk_mhz;
> + dcn2_1_soc._clock_tmp[k].fabricclk_mhz = 
> clk_table->entries[i].fclk_mhz;
> + dcn2_1_soc._clock_tmp[k].socclk_mhz = 
> clk_table->entries[i].socclk_mhz;
> + dcn2_1_soc._clock_tmp[k].dram_speed_mts = 
> clk_table->entries[i].memclk_mhz * 2;
>  
> - dcn2_1_soc.clock_limits[k].dispclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
> - dcn2_1_soc.clock_limits[k].dppclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
> - dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
> - dcn2_1_soc.clock_limits[k].dscclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
> - dcn2_1_soc.clock_limits[k].dtbclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
> - dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
> - dcn2_1_soc.clock_limits[k].phyclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
> + dcn2_1_soc._clock_tmp[k].dispclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
> + dcn2_1_soc._clock_tmp[k].dppclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
> + dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
> + dcn2_1_soc._clock_tmp[k].dscclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
> + dcn2_1_soc._clock_tmp[k].dtbclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
> + dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
> + dcn2_1_soc._clock_tmp[k].phyclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
>  

I see why we need a tmp array and agree that we shouldn't allocate it inside DML
functions.

Reviewed-by: Harry Wentland 

Harry

>   k++;
>   }
> +
> + memcpy(_1_soc.clock_limits, _1_soc._clock_tmp,
> +sizeof(dcn2_1_soc.clock_limits));
> +
>   if (clk_table->num_entries) {
>   dcn2_1_soc.num_states = clk_table->num_entries + 1;
>   /* fill in min DF PState */
> diff --git 

Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update

2022-06-08 Thread Rodrigo Siqueira Jordao



On 2022-06-08 13:47, Leo wrote:



On 2022-06-08 13:25, Pillai, Aurabindo wrote:

[AMD Official Use Only - General]


Isnt it cleaner to revert the original patch which removed the temporary 
variable and then instead allocate the clock_limits array on heap, and later 
freed at the end of the function?

--


I don't quite recall the details, but in FPU context, we should not malloc 
since it can fault/sleep. More info here: 
https://yarchive.net/comp/linux/kernel_fp.html


Yeah... we cannot use malloc inside DML. I also think that we can just 
apply this patch on top of amd-staging-drm-next without reverting the 
old patches because we are unaware of any regression caused for those 
patches, afaik.


Thanks
Siqueira



- Leo



Regards,
Jay
--
*From:* Li, Sun peng (Leo) 
*Sent:* Wednesday, June 8, 2022 12:48 PM
*To:* amd-gfx@lists.freedesktop.org 
*Cc:* Wentland, Harry ; Pillai, Aurabindo 
; Siqueira, Rodrigo ; Li, Sun peng 
(Leo) 
*Subject:* [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding 
box update
  
From: Leo Li 


[Why]

There is a theoretical problem in prior patches for reducing the stack
size of *update_bw_bounding_box() functions.

By modifying the soc.clock_limits[n] struct directly, this can cause
unintended behavior as the for loop attempts to swap rows in
clock_limits[n]. A temporary struct is still required to make sure we
stay functinoally equivalent.

[How]

Add a temporary clock_limits table to the SOC struct, and use it when
swapping rows.

Signed-off-by: Leo Li 
---
  .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 33 +-
  .../amd/display/dc/dml/dcn301/dcn301_fpu.c    | 36 ++-
  .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 64 +++
  .../amd/display/dc/dml/display_mode_structs.h |  5 ++
  4 files changed, 82 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index c2fec0d85da4..e247b2270b1d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
  
  ASSERT(clk_table->num_entries);

  /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over 
null states later */
-   for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
-   dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i];
-   }
+   memcpy(_1_soc._clock_tmp, _1_soc.clock_limits,
+  sizeof(dcn2_1_soc.clock_limits));
  
  for (i = 0; i < clk_table->num_entries; i++) {

  /* loop backwards*/
@@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
  if (i == 1)
  k++;
  
-   dcn2_1_soc.clock_limits[k].state = k;

-   dcn2_1_soc.clock_limits[k].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
-   dcn2_1_soc.clock_limits[k].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
-   dcn2_1_soc.clock_limits[k].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
-   dcn2_1_soc.clock_limits[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
+   dcn2_1_soc._clock_tmp[k].state = k;
+   dcn2_1_soc._clock_tmp[k].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
+   dcn2_1_soc._clock_tmp[k].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
+   dcn2_1_soc._clock_tmp[k].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
  
-   dcn2_1_soc.clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;

-   dcn2_1_soc.clock_limits[k].dppclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-   

Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update

2022-06-08 Thread Leo


On 2022-06-08 13:25, Pillai, Aurabindo wrote:
> [AMD Official Use Only - General]
> 
> 
> Isnt it cleaner to revert the original patch which removed the temporary 
> variable and then instead allocate the clock_limits array on heap, and later 
> freed at the end of the function?
> 
> --

I don't quite recall the details, but in FPU context, we should not malloc 
since it can fault/sleep. More info here: 
https://yarchive.net/comp/linux/kernel_fp.html
- Leo

> 
> Regards,
> Jay
> --
> *From:* Li, Sun peng (Leo) 
> *Sent:* Wednesday, June 8, 2022 12:48 PM
> *To:* amd-gfx@lists.freedesktop.org 
> *Cc:* Wentland, Harry ; Pillai, Aurabindo 
> ; Siqueira, Rodrigo ; Li, 
> Sun peng (Leo) 
> *Subject:* [PATCH] drm/amd/display: Use pre-allocated temp struct for 
> bounding box update
>  
> From: Leo Li 
> 
> [Why]
> 
> There is a theoretical problem in prior patches for reducing the stack
> size of *update_bw_bounding_box() functions.
> 
> By modifying the soc.clock_limits[n] struct directly, this can cause
> unintended behavior as the for loop attempts to swap rows in
> clock_limits[n]. A temporary struct is still required to make sure we
> stay functinoally equivalent.
> 
> [How]
> 
> Add a temporary clock_limits table to the SOC struct, and use it when
> swapping rows.
> 
> Signed-off-by: Leo Li 
> ---
>  .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 33 +-
>  .../amd/display/dc/dml/dcn301/dcn301_fpu.c    | 36 ++-
>  .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 64 +++
>  .../amd/display/dc/dml/display_mode_structs.h |  5 ++
>  4 files changed, 82 insertions(+), 56 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
> b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
> index c2fec0d85da4..e247b2270b1d 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
> @@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
> clk_bw_params *bw_params
>  
>  ASSERT(clk_table->num_entries);
>  /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying 
> over null states later */
> -   for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
> -   dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i];
> -   }
> +   memcpy(_1_soc._clock_tmp, _1_soc.clock_limits,
> +  sizeof(dcn2_1_soc.clock_limits));
>  
>  for (i = 0; i < clk_table->num_entries; i++) {
>  /* loop backwards*/
> @@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, 
> struct clk_bw_params *bw_params
>  if (i == 1)
>  k++;
>  
> -   dcn2_1_soc.clock_limits[k].state = k;
> -   dcn2_1_soc.clock_limits[k].dcfclk_mhz = 
> clk_table->entries[i].dcfclk_mhz;
> -   dcn2_1_soc.clock_limits[k].fabricclk_mhz = 
> clk_table->entries[i].fclk_mhz;
> -   dcn2_1_soc.clock_limits[k].socclk_mhz = 
> clk_table->entries[i].socclk_mhz;
> -   dcn2_1_soc.clock_limits[k].dram_speed_mts = 
> clk_table->entries[i].memclk_mhz * 2;
> +   dcn2_1_soc._clock_tmp[k].state = k;
> +   dcn2_1_soc._clock_tmp[k].dcfclk_mhz = 
> clk_table->entries[i].dcfclk_mhz;
> +   dcn2_1_soc._clock_tmp[k].fabricclk_mhz = 
> clk_table->entries[i].fclk_mhz;
> +   dcn2_1_soc._clock_tmp[k].socclk_mhz = 
> clk_table->entries[i].socclk_mhz;
> +   dcn2_1_soc._clock_tmp[k].dram_speed_mts = 
> clk_table->entries[i].memclk_mhz * 2;
>  
> -   dcn2_1_soc.clock_limits[k].dispclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
> -   dcn2_1_soc.clock_limits[k].dppclk_mhz = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
> -   dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = 
> dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
> -   

Re: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update

2022-06-08 Thread Pillai, Aurabindo
[AMD Official Use Only - General]

Isnt it cleaner to revert the original patch which removed the temporary 
variable and then instead allocate the clock_limits array on heap, and later 
freed at the end of the function?

--

Regards,
Jay

From: Li, Sun peng (Leo) 
Sent: Wednesday, June 8, 2022 12:48 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Wentland, Harry ; Pillai, Aurabindo 
; Siqueira, Rodrigo ; Li, 
Sun peng (Leo) 
Subject: [PATCH] drm/amd/display: Use pre-allocated temp struct for bounding 
box update

From: Leo Li 

[Why]

There is a theoretical problem in prior patches for reducing the stack
size of *update_bw_bounding_box() functions.

By modifying the soc.clock_limits[n] struct directly, this can cause
unintended behavior as the for loop attempts to swap rows in
clock_limits[n]. A temporary struct is still required to make sure we
stay functinoally equivalent.

[How]

Add a temporary clock_limits table to the SOC struct, and use it when
swapping rows.

Signed-off-by: Leo Li 
---
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 33 +-
 .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 36 ++-
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 64 +++
 .../amd/display/dc/dml/display_mode_structs.h |  5 ++
 4 files changed, 82 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index c2fec0d85da4..e247b2270b1d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params

 ASSERT(clk_table->num_entries);
 /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over 
null states later */
-   for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
-   dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i];
-   }
+   memcpy(_1_soc._clock_tmp, _1_soc.clock_limits,
+  sizeof(dcn2_1_soc.clock_limits));

 for (i = 0; i < clk_table->num_entries; i++) {
 /* loop backwards*/
@@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
 if (i == 1)
 k++;

-   dcn2_1_soc.clock_limits[k].state = k;
-   dcn2_1_soc.clock_limits[k].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
-   dcn2_1_soc.clock_limits[k].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
-   dcn2_1_soc.clock_limits[k].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
-   dcn2_1_soc.clock_limits[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
+   dcn2_1_soc._clock_tmp[k].state = k;
+   dcn2_1_soc._clock_tmp[k].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
+   dcn2_1_soc._clock_tmp[k].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
+   dcn2_1_soc._clock_tmp[k].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;

-   dcn2_1_soc.clock_limits[k].dispclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-   dcn2_1_soc.clock_limits[k].dppclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-   dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
-   dcn2_1_soc.clock_limits[k].dscclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
-   dcn2_1_soc.clock_limits[k].dtbclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
-   dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
-   dcn2_1_soc.clock_limits[k].phyclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dispclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dppclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+   dcn2_1_soc._clock_tmp[k].dscclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dtbclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+   dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+   dcn2_1_soc._clock_tmp[k].phyclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;

 k++;
 }
+
+   memcpy(_1_soc.clock_limits, _1_soc._clock_tmp,
+  sizeof(dcn2_1_soc.clock_limits));
+
 if (clk_table->num_entries) {

[PATCH] drm/amd/display: Use pre-allocated temp struct for bounding box update

2022-06-08 Thread sunpeng.li
From: Leo Li 

[Why]

There is a theoretical problem in prior patches for reducing the stack
size of *update_bw_bounding_box() functions.

By modifying the soc.clock_limits[n] struct directly, this can cause
unintended behavior as the for loop attempts to swap rows in
clock_limits[n]. A temporary struct is still required to make sure we
stay functinoally equivalent.

[How]

Add a temporary clock_limits table to the SOC struct, and use it when
swapping rows.

Signed-off-by: Leo Li 
---
 .../drm/amd/display/dc/dml/dcn20/dcn20_fpu.c  | 33 +-
 .../amd/display/dc/dml/dcn301/dcn301_fpu.c| 36 ++-
 .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c  | 64 +++
 .../amd/display/dc/dml/display_mode_structs.h |  5 ++
 4 files changed, 82 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index c2fec0d85da4..e247b2270b1d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -2015,9 +2015,8 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
 
ASSERT(clk_table->num_entries);
/* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over 
null states later */
-   for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
-   dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i];
-   }
+   memcpy(_1_soc._clock_tmp, _1_soc.clock_limits,
+  sizeof(dcn2_1_soc.clock_limits));
 
for (i = 0; i < clk_table->num_entries; i++) {
/* loop backwards*/
@@ -2032,22 +2031,26 @@ void dcn21_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_params
if (i == 1)
k++;
 
-   dcn2_1_soc.clock_limits[k].state = k;
-   dcn2_1_soc.clock_limits[k].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
-   dcn2_1_soc.clock_limits[k].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
-   dcn2_1_soc.clock_limits[k].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
-   dcn2_1_soc.clock_limits[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
+   dcn2_1_soc._clock_tmp[k].state = k;
+   dcn2_1_soc._clock_tmp[k].dcfclk_mhz = 
clk_table->entries[i].dcfclk_mhz;
+   dcn2_1_soc._clock_tmp[k].fabricclk_mhz = 
clk_table->entries[i].fclk_mhz;
+   dcn2_1_soc._clock_tmp[k].socclk_mhz = 
clk_table->entries[i].socclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dram_speed_mts = 
clk_table->entries[i].memclk_mhz * 2;
 
-   dcn2_1_soc.clock_limits[k].dispclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-   dcn2_1_soc.clock_limits[k].dppclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-   dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
-   dcn2_1_soc.clock_limits[k].dscclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
-   dcn2_1_soc.clock_limits[k].dtbclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
-   dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
-   dcn2_1_soc.clock_limits[k].phyclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dispclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dppclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dram_bw_per_chan_gbps = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+   dcn2_1_soc._clock_tmp[k].dscclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+   dcn2_1_soc._clock_tmp[k].dtbclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+   dcn2_1_soc._clock_tmp[k].phyclk_d18_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+   dcn2_1_soc._clock_tmp[k].phyclk_mhz = 
dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
 
k++;
}
+
+   memcpy(_1_soc.clock_limits, _1_soc._clock_tmp,
+  sizeof(dcn2_1_soc.clock_limits));
+
if (clk_table->num_entries) {
dcn2_1_soc.num_states = clk_table->num_entries + 1;
/* fill in min DF PState */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
index 62cf283d9f41..e4863f0bf0f6 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c
@@ -254,6 +254,9 @@ void dcn301_update_bw_bounding_box(struct dc *dc, struct 
clk_bw_params *bw_param
 
dc_assert_fp_enabled();
 
+   

Re: [RFC PATCH v2 00/27] DRM.debug on DYNAMIC_DEBUG, add trace events

2022-06-08 Thread Daniel Vetter
On Mon, Jun 06, 2022 at 08:59:36AM -0600, jim.cro...@gmail.com wrote:
> On Wed, May 25, 2022 at 9:02 AM Daniel Vetter  wrote:
> 
> > On Mon, May 16, 2022 at 04:56:13PM -0600, Jim Cromie wrote:
> > > DRM.debug API is 23 macros, issuing 10 exclusive categories of debug
> > > messages.  By rough count, they are used 5140 times in the kernel.
> > > These all call drm_dbg or drm_devdbg, which call drm_debug_enabled(),
> > > which checks bits in global __drm_debug.  Some of these are page-flips
> > > and vblanks, and get called often.
> > >
> > > DYNAMIC_DEBUG (with CONFIG_JUMP_LABEL) is built to avoid this kind of
> > > work, with NOOPd jump/callsites.
> > >
> > > This patchset is RFC because:
> > > - it touches 2.5 subsystems: dyndbg, drm, tracefs (new events)
> > > - dyndbg class support is built for drm, needs it for validation
> > > - new api, used by drm
> > > - big memory impact, with 5100 new pr-debug callsites.
> > > - drm class bikeshedding opportunities
> > > - others, names etc.
> >
> > Thanks a lot for keeping on pushing this!
> 
> 
> > >
> > > DYNAMIC_DEBUG:
> > >
> 
> 
> 
> > > RFC:
> > >
> > > dynamic_debug_register_classes() cannot act early enough to be in
> > > effect at module-load.  So this will not work as you'd reasonably
> > > expect:
> > >
> > >   modprobe test_dynamic_debug dyndbg='+pfm; class FOO +pfmlt'
> > >
> > > The 1st query:+pfm will be enabled during load, but in the 2nd query,
> > > "class FOO" will be unknown at load time.  Early class enablement
> > > would be nice.  DYNAMIC_DEBUG_CLASSES is a static initializer, which
> > > is certainly early enough, but Im missing a trick, suggestions?
> >
> > So maybe I'm just totally overloading this work here so feel free to
> > ignore or postpone, but: Could we do the dynamic_debug_register_classes()
> > automatically at module load as a new special section? And then throw in a
> > bit of kbuild so that in a given subsystem every driver gets the same
> > class names by default and everything would just work, without having to
> > sprinkle calls to dynamic_debug_register_classes() all over the place?
> >
> 
> This is now done; Ive added __dyndbg_classes section.
> load_module() now grabs it from the .ko
> and ddebug_add_module() attaches it to the module's ddebug_table record.
> for builtins, dynamic_debug_init feeds the builtin class-maps to
> ddebug_add_module
> 
> bash-5.1# modprobe test_dynamic_debug dyndbg="class FOO +p"
> [   88.374722] dyndbg: class[0]: nm:test_dynamic_debug base:20 len:7 ty:1
> [   88.375158] dyndbg:  0: EMERG
> [   88.375345] dyndbg:  1: DANGER
> [   88.375540] dyndbg:  2: ERROR
> [   88.375726] dyndbg:  3: WARNING
> [   88.375930] dyndbg:  4: NOTICE
> [   88.376130] dyndbg:  5: INFO
> [   88.376310] dyndbg:  6: DEBUG
> [   88.376499] dyndbg: class[1]: nm:test_dynamic_debug base:12 len:3 ty:1
> [   88.376903] dyndbg:  0: ONE
> [   88.377079] dyndbg:  1: TWO
> [   88.377253] dyndbg:  2: THREE
> [   88.377441] dyndbg: class[2]: nm:test_dynamic_debug base:8 len:3 ty:0
> [   88.377837] dyndbg:  0: bing
> [   88.378022] dyndbg:  1: bong
> [   88.378203] dyndbg:  2: boom
> [   88.378387] dyndbg: class[3]: nm:test_dynamic_debug base:4 len:3 ty:0
> [   88.378800] dyndbg:  0: Foo
> [   88.378986] dyndbg:  1: Bar
> [   88.379167] dyndbg:  2: Buzz
> [   88.379348] dyndbg: class[4]: nm:test_dynamic_debug base:0 len:3 ty:0
> [   88.379757] dyndbg:  0: FOO
> [   88.379938] dyndbg:  1: BAR
> [   88.380136] dyndbg:  2: BUZZ
> [   88.380410] dyndbg: module:test_dynamic_debug attached 5 classes
> [   88.380881] dyndbg:  24 debug prints in module test_dynamic_debug
> [   88.381315] dyndbg: module: test_dynamic_debug dyndbg="class FOO +p"
> [   88.381714] dyndbg: query 0: "class FOO +p" mod:test_dynamic_debug
> [   88.382109] dyndbg: split into words: "class" "FOO" "+p"
> [   88.382445] dyndbg: op='+'
> [   88.382616] dyndbg: flags=0x1
> [   88.382802] dyndbg: *flagsp=0x1 *maskp=0x
> [   88.383101] dyndbg: parsed: func="" file="" module="test_dynamic_debug"
> format="" lineno=0-0 class=FOO
> [   88.383740] dyndbg: applied: func="" file="" module="test_dynamic_debug"
> format="" lineno=0-0 class=FOO
> [   88.384324] dyndbg: processed 1 queries, with 2 matches, 0 errs
> bash-5.1#
> 
> so its working at module-load time.

Awesome!

> > For the entire class approach, did you spot another subsystem that could
> > benefit from this and maybe make a more solid case that this is something
> > good?
> >
> 
> I had been working on the premise that ~4k drm*dbg callsites was a good
> case.

Oh I'm happy with just drm, but occasionally we've done stuff in drm that
the wider kernel community found a bit silly. So bit more acks/validation
from outside the dri-devel echo chamber would be great, whatever form it
is.

> verbosity-levels - with x missing.
> 
> the next revision adds something, which "kinda works".
> But I think I'll rip it out, and do this simpler approach instead:
> 
> implement a verbose/levels  param & callback, which 

Re: [PATCH 07/16] drm/amd/display: Cap OLED brightness per max frame-average luminance

2022-06-08 Thread Aaron Ma

Hi Roman:

Can the panel achieve the max peak luminance
if it is limited in frame-average luminance?

Regards,
Aaron


Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on VCN3

2022-06-08 Thread Alex Deucher
I'll push this with the TLB fix.

Alex

On Wed, Jun 8, 2022 at 9:47 AM Christian König
 wrote:
>
> I need to look into this more deeply when I'm back from sick leave.
>
> Till then this workaround should be sufficient since VCN3 is the only
> callback which tries to adjust the instance.
>
> Regards,
> Christian.
>
> Am 07.06.22 um 22:22 schrieb Alex Deucher:
> > We'll need to implement the parse callbacks for vcn4 as well if we
> > haven't already.
> >
> > Alex
> >
> > On Tue, Jun 7, 2022 at 4:20 PM Dong, Ruijing  wrote:
> >> [AMD Official Use Only - General]
> >>
> >> I can see for VCN4, AV1 dec/enc also need to limit to the first instance.
> >>
> >> Thanks,
> >> Ruijing
> >>
> >> -Original Message-
> >> From: amd-gfx  On Behalf Of Alex 
> >> Deucher
> >> Sent: Friday, June 3, 2022 10:12 AM
> >> To: Christian König 
> >> Cc: Pelloux-Prayer, Pierre-Eric ; 
> >> amd-gfx list 
> >> Subject: Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on 
> >> VCN3
> >>
> >> Do the other uvd/vce/vcn ring parse functions need a similar fix?
> >>
> >> Alex
> >>
> >>
> >> On Fri, Jun 3, 2022 at 10:08 AM Alex Deucher  wrote:
> >>> On Fri, Jun 3, 2022 at 8:10 AM Christian König
> >>>  wrote:
>  Am 03.06.22 um 14:08 schrieb Pierre-Eric Pelloux-Prayer:
> > Hi Christian,
> >
> > The patch is: Tested-by: Pierre-Eric Pelloux-Prayer
> > 
> >
> > Could you add a reference to 
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F2037data=05%7C01%7CRuijing.Dong%40amd.com%7C5ba73dfe91ba47e21dd608da456b0609%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637898623221806051%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=WgIZD299Xe0XVG%2Ftb2rn14njS%2FgHIhtIHIDAZ2Fj40k%3Dreserved=0
> >  in the commit message?
>  Sure, can you also give me an rb or acked-by so that I can push it?
> >>> Reviewed-by: Alex Deucher 
> >>>
>  Thanks,
>  Christian.
> 
> > Thanks,
> > Pierre-Eric
> >
> > On 03/06/2022 12:21, Christian König wrote:
> >> The job is not yet initialized here.
> >>
> >> Signed-off-by: Christian König 
> >> Fixes: 1027d5d791b7 ("drm/amdgpu: use job and ib structures
> >> directly in CS parsers")
> >> ---
> >>drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 17 +++--
> >>1 file changed, 7 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> >> b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> >> index 3cabceee5f57..39405f0db824 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
> >> @@ -1761,23 +1761,21 @@ static const struct amdgpu_ring_funcs 
> >> vcn_v3_0_dec_sw_ring_vm_funcs = {
> >>   .emit_reg_write_reg_wait = 
> >> amdgpu_ring_emit_reg_write_reg_wait_helper,
> >>};
> >>
> >> -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
> >> -struct amdgpu_job *job)
> >> +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
> >>{
> >>   struct drm_gpu_scheduler **scheds;
> >>
> >>   /* The create msg must be in the first IB submitted */
> >> -if (atomic_read(>base.entity->fence_seq))
> >> +if (atomic_read(>entity->fence_seq))
> >>   return -EINVAL;
> >>
> >>   scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
> >>   [AMDGPU_RING_PRIO_DEFAULT].sched;
> >> -drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
> >> +drm_sched_entity_modify_sched(p->entity, scheds, 1);
> >>   return 0;
> >>}
> >>
> >> -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct 
> >> amdgpu_job *job,
> >> -uint64_t addr)
> >> +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t
> >> +addr)
> >>{
> >>   struct ttm_operation_ctx ctx = { false, false };
> >>   struct amdgpu_bo_va_mapping *map; @@ -1848,7 +1846,7 @@
> >> static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct 
> >> amdgpu_job *job,
> >>   if (create[0] == 0x7 || create[0] == 0x10 || create[0] 
> >> == 0x11)
> >>   continue;
> >>
> >> -r = vcn_v3_0_limit_sched(p, job);
> >> +r = vcn_v3_0_limit_sched(p);
> >>   if (r)
> >>   goto out;
> >>   }
> >> @@ -1862,7 +1860,7 @@ static int 
> >> vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
> >>  struct amdgpu_job *job,
> >>  struct amdgpu_ib *ib)
> >>{
> >> -struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
> >> +

Re: [PATCH] drm/amdgpu: always flush the TLB on gfx8

2022-06-08 Thread Alex Deucher
yes. will do.

Alex

On Wed, Jun 8, 2022 at 9:58 AM Christian König
 wrote:
>
> Am 07.06.22 um 22:27 schrieb Alex Deucher:
> > On Fri, Jun 3, 2022 at 9:05 AM Christian König
> >  wrote:
> >> The TLB on GFX8 stores each block of 8 PTEs where any of the valid bits
> >> are set.
> >>
> >> Signed-off-by: Christian König 
> > Reviewed-by: Alex Deucher 
>
> Alex could you push this? It's an important fix, but I'm seriously not
> feeling well at the moment.
>
> Thanks,
> Christian.
>
> >
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +
> >>   1 file changed, 5 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> index 9596c22fded6..b747488c28ad 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> >> @@ -847,6 +847,11 @@ int amdgpu_vm_update_range(struct amdgpu_device 
> >> *adev, struct amdgpu_vm *vm,
> >>  flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
> >>   adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);
> >>
> >> +   /*
> >> +* On GFX8 and older any 8 PTE block with a valid bit set enters 
> >> the TLB
> >> +*/
> >> +   flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0);
> >> +
> >>  memset(, 0, sizeof(params));
> >>  params.adev = adev;
> >>  params.vm = vm;
> >> --
> >> 2.25.1
> >>
>


Re: [PATCH] drm/amdgpu: always flush the TLB on gfx8

2022-06-08 Thread Christian König

Am 07.06.22 um 22:27 schrieb Alex Deucher:

On Fri, Jun 3, 2022 at 9:05 AM Christian König
 wrote:

The TLB on GFX8 stores each block of 8 PTEs where any of the valid bits
are set.

Signed-off-by: Christian König 

Reviewed-by: Alex Deucher 


Alex could you push this? It's an important fix, but I'm seriously not 
feeling well at the moment.


Thanks,
Christian.




---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9596c22fded6..b747488c28ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -847,6 +847,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
 flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
  adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);

+   /*
+* On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
+*/
+   flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0);
+
 memset(, 0, sizeof(params));
 params.adev = adev;
 params.vm = vm;
--
2.25.1





Re: [PATCH] drm/amdgpu/display: Remove unnecessary typecasts and fix build issues

2022-06-08 Thread Leo



On 2022-06-07 13:58, Aurabindo Pillai wrote:
> 
> 
> On 2022-06-07 11:34, Leo wrote:
>>
>>
>> On 2022-06-07 05:40, Chandan Vurdigere Nataraj wrote:
>>> [Why]
>>> Getting below errors:
>>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1414:5:
>>>  error: implicit conversion from enumeration type 'enum 
>>> scan_direction_class' to different enumeration type 'enum 
>>> dm_rotation_angle' [-Werror,-Wenum-conversion]
>>>  mode_lib->vba.SourceScan[k],
>>>  ^~~
>>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1744:22:
>>>  error: implicit conversion from enumeration type 'enum 
>>> scan_direction_class' to different enumeration type 'enum 
>>> dm_rotation_angle' [-Werror,-Wenum-conversion]
>>>  && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || 
>>> mode_lib->vba.DCCEnable[k] == true)) {
>>>     ~~ ^~~
>>> 2 errors generated.
>>>
>>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:5484:18:
>>>  error: implicit conversion from enumeration type 'RequestType' to 
>>> different enumeration type 'enum RequestType' [-Werror,-Wenum-conversion]
>>>  RequestLuma = REQ_256Bytes;
>>>  ~ ^~~~
>>> 18 errors of similar kind
>>>
>>> [How]
>>> 1. Add typecast at relevant places
>>> 2. Move the enum RequestType definition ahead of declarations
>>>
>>> Signed-off-by: Chandan Vurdigere Nataraj 
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
>>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
>>> index b77a1ae792d1..5828e60f291d 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
>>> @@ -1411,7 +1411,7 @@ static void 
>>> DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
>>>   v->BytePerPixelC[k],
>>>   v->BytePerPixelDETY[k],
>>>   v->BytePerPixelDETC[k],
>>> -    mode_lib->vba.SourceScan[k],
>>> +    (enum dm_rotation_angle) mode_lib->vba.SourceScan[k],
>>
>> Hi Jay,
>>
>> This seems fishy, dm_rotation_angle and scan_dirrection_class are very 
>> different enums.
>> Comparing dml32_CalculateDCCConfiguration() with 
>> CalculateDCCConfiguration(), it seems dm_rotation_angle is new for DCN32. Is 
>> passing vba.SourceScan correct here?
> 
> I agree. It should be typecast to scan_direction_class

>From the build error, it looks like dml32_CalculateDCCConfiguration() wants 
>type `enum dm_rotation_amgle`, but vba.SourceScan[n] is of type `enum 
>scan_direction_class`. The error is complaining that we're implicitly casting 
>from scan_direction_class to dm_rotation_angle. In otherwords, this was an 
>issue prior to Chandan's fix.

I think we'll need to address this separately, but for now this fixes a build 
on chromeos with clang 14 that I've been able to reproduce. Elevating my ack to:

Reviewed-by: Leo Li 

Thanks!
Leo

>>
>> One more comment below.
>>
>>>   /* Output */
>>>   >DCCYMaxUncompressedBlock[k],
>>>   >DCCCMaxUncompressedBlock[k],
>>> @@ -1741,7 +1741,8 @@ void 
>>> dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
>>>   mode_lib->vba.SourceFormatPixelAndScanSupport = true;
>>>   for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
>>>   if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
>>> -    && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || 
>>> mode_lib->vba.DCCEnable[k] == true)) {
>>> +    && (!(!IsVertical((enum dm_rotation_angle) 
>>> mode_lib->vba.SourceScan[k]))
>>> +    || mode_lib->vba.DCCEnable[k] == true)) {
>>>   mode_lib->vba.SourceFormatPixelAndScanSupport = false;
>>>   }
>>>   }
>>> diff --git 
>>> a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
>>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
>>> index 6509a84eeb64..07f3a85f8edf 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
>>> @@ -5314,9 +5314,15 @@ void dml32_CalculateDCCConfiguration(
>>>   unsigned int    *IndependentBlockLuma,
>>>   unsigned int    *IndependentBlockChroma)
>>>   {
>>> +    typedef enum {
>>> +    REQ_256Bytes,
>>> +    REQ_128BytesNonContiguous,
>>> +    REQ_128BytesContiguous,
>>> +    REQ_NA
>>> +    } RequestType;
>>>   -    enum RequestType   RequestLuma;
>>> -    enum RequestType   RequestChroma;
>>> +    RequestType   RequestLuma;
>>> +    RequestType   RequestChroma;
>>
>> This might need a wider 

Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on VCN3

2022-06-08 Thread Christian König

I need to look into this more deeply when I'm back from sick leave.

Till then this workaround should be sufficient since VCN3 is the only 
callback which tries to adjust the instance.


Regards,
Christian.

Am 07.06.22 um 22:22 schrieb Alex Deucher:

We'll need to implement the parse callbacks for vcn4 as well if we
haven't already.

Alex

On Tue, Jun 7, 2022 at 4:20 PM Dong, Ruijing  wrote:

[AMD Official Use Only - General]

I can see for VCN4, AV1 dec/enc also need to limit to the first instance.

Thanks,
Ruijing

-Original Message-
From: amd-gfx  On Behalf Of Alex Deucher
Sent: Friday, June 3, 2022 10:12 AM
To: Christian König 
Cc: Pelloux-Prayer, Pierre-Eric ; amd-gfx list 

Subject: Re: [PATCH] drm/amdgpu: fix limiting AV1 to the first instance on VCN3

Do the other uvd/vce/vcn ring parse functions need a similar fix?

Alex


On Fri, Jun 3, 2022 at 10:08 AM Alex Deucher  wrote:

On Fri, Jun 3, 2022 at 8:10 AM Christian König
 wrote:

Am 03.06.22 um 14:08 schrieb Pierre-Eric Pelloux-Prayer:

Hi Christian,

The patch is: Tested-by: Pierre-Eric Pelloux-Prayer


Could you add a reference to 
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.freedesktop.org%2Fdrm%2Famd%2F-%2Fissues%2F2037data=05%7C01%7CRuijing.Dong%40amd.com%7C5ba73dfe91ba47e21dd608da456b0609%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637898623221806051%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=WgIZD299Xe0XVG%2Ftb2rn14njS%2FgHIhtIHIDAZ2Fj40k%3Dreserved=0
 in the commit message?

Sure, can you also give me an rb or acked-by so that I can push it?

Reviewed-by: Alex Deucher 


Thanks,
Christian.


Thanks,
Pierre-Eric

On 03/06/2022 12:21, Christian König wrote:

The job is not yet initialized here.

Signed-off-by: Christian König 
Fixes: 1027d5d791b7 ("drm/amdgpu: use job and ib structures
directly in CS parsers")
---
   drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 17 +++--
   1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 3cabceee5f57..39405f0db824 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -1761,23 +1761,21 @@ static const struct amdgpu_ring_funcs 
vcn_v3_0_dec_sw_ring_vm_funcs = {
  .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
   };

-static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
-struct amdgpu_job *job)
+static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
   {
  struct drm_gpu_scheduler **scheds;

  /* The create msg must be in the first IB submitted */
-if (atomic_read(>base.entity->fence_seq))
+if (atomic_read(>entity->fence_seq))
  return -EINVAL;

  scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
  [AMDGPU_RING_PRIO_DEFAULT].sched;
-drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
+drm_sched_entity_modify_sched(p->entity, scheds, 1);
  return 0;
   }

-static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
-uint64_t addr)
+static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t
+addr)
   {
  struct ttm_operation_ctx ctx = { false, false };
  struct amdgpu_bo_va_mapping *map; @@ -1848,7 +1846,7 @@
static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
  if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
  continue;

-r = vcn_v3_0_limit_sched(p, job);
+r = vcn_v3_0_limit_sched(p);
  if (r)
  goto out;
  }
@@ -1862,7 +1860,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct 
amdgpu_cs_parser *p,
 struct amdgpu_job *job,
 struct amdgpu_ib *ib)
   {
-struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
+struct amdgpu_ring *ring =
+ to_amdgpu_ring(p->entity->rq->sched);
  uint32_t msg_lo = 0, msg_hi = 0;
  unsigned i;
  int r;
@@ -1881,8 +1879,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct 
amdgpu_cs_parser *p,
  msg_hi = val;
  } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
 val == 0) {
-r = vcn_v3_0_dec_msg(p, job,
- ((u64)msg_hi) << 32 | msg_lo);
+r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32
+ | msg_lo);
  if (r)
  return r;
  }





RE: [PATCH] drm/amdgpu/display: Remove unnecessary typecasts and fix build issues

2022-06-08 Thread VURDIGERENATARAJ, CHANDAN
Hi,

>> 
>> 
>> On 2022-06-07 05:40, Chandan Vurdigere Nataraj wrote:
>>> [Why]
>>> Getting below errors:
>>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1414:5:
>>>  error: implicit conversion from enumeration type 'enum 
>>> scan_direction_class' to different enumeration type 'enum 
>>> dm_rotation_angle' [-Werror,-Wenum-conversion]
>>>  mode_lib->vba.SourceScan[k],
>>>  ^~~
>>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1744:22:
>>>  error: implicit conversion from enumeration type 'enum 
>>> scan_direction_class' to different enumeration type 'enum 
>>> dm_rotation_angle' [-Werror,-Wenum-conversion]
>>>  && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || 
>>> mode_lib->vba.DCCEnable[k] == true)) {
>>> ~~ 
>>> ^~~
>>> 2 errors generated.
>>>
>>> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:5484:18:
>>>  error: implicit conversion from enumeration type 'RequestType' to 
>>> different enumeration type 'enum RequestType' [-Werror,-Wenum-conversion]
>>>  RequestLuma = REQ_256Bytes;
>>>  ~ ^~~~
>>> 18 errors of similar kind
>>>
>>> [How]
>>> 1. Add typecast at relevant places
>>> 2. Move the enum RequestType definition ahead of declarations
>>>
>>> Signed-off-by: Chandan Vurdigere Nataraj 
>>> 
>>>
>>> diff --git 
>>> a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
>>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
>>> index b77a1ae792d1..5828e60f291d 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
>>> @@ -1411,7 +1411,7 @@ static void 
>>> DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
>>> v->BytePerPixelC[k],
>>> v->BytePerPixelDETY[k],
>>> v->BytePerPixelDETC[k],
>>> -   mode_lib->vba.SourceScan[k],
>>> +   (enum dm_rotation_angle) 
>>> mode_lib->vba.SourceScan[k],
>> 
>> Hi Jay,
>> 
>> This seems fishy, dm_rotation_angle and scan_dirrection_class are very 
>> different enums.
>> Comparing dml32_CalculateDCCConfiguration() with 
>> CalculateDCCConfiguration(), it seems dm_rotation_angle is new for DCN32. Is 
>> passing vba.SourceScan correct here?
>
>I agree. It should be typecast to scan_direction_class

Let me change and re-submit. Thanks for the inputs.

>> 
>> One more comment below.
>> 
>>> /* Output */
>>> >DCCYMaxUncompressedBlock[k],
>>> >DCCCMaxUncompressedBlock[k], @@ -1741,7 
>>> +1741,8 @@ void 
>>> dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
>>> mode_lib->vba.SourceFormatPixelAndScanSupport = true;
>>> for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
>>> if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
>>> -   && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || 
>>> mode_lib->vba.DCCEnable[k] == true)) {
>>> +   && (!(!IsVertical((enum dm_rotation_angle) 
>>> mode_lib->vba.SourceScan[k]))
>>> +   || mode_lib->vba.DCCEnable[k] == true)) {
>>> mode_lib->vba.SourceFormatPixelAndScanSupport = false;
>>> }
>>> }
>>> diff --git 
>>> a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
>>> b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
>>> index 6509a84eeb64..07f3a85f8edf 100644
>>> --- 
>>> a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_
>>> +++ 32.c
>>> @@ -5314,9 +5314,15 @@ void dml32_CalculateDCCConfiguration(
>>> unsigned int*IndependentBlockLuma,
>>> unsigned int*IndependentBlockChroma)
>>>   {
>>> +   typedef enum {
>>> +   REQ_256Bytes,
>>> +   REQ_128BytesNonContiguous,
>>> +   REQ_128BytesContiguous,
>>> +   REQ_NA
>>> +   } RequestType;
>>>   
>>> -   enum RequestType   RequestLuma;
>>> -   enum RequestType   RequestChroma;
>>> +   RequestType   RequestLuma;
>>> +   RequestType   RequestChroma;
>> 
>> This might need a wider cleanup, enum RequestType is defined in 
>> display_mode_enums.h and is already included in all the display_mode_vba*.c 
>> files I've come across. Unless I'm missing something, we shouldn't need to 
>> redefine RequestType.
>> 
>> That said, there doesn't seem to be any functional change, and it 
>> fixes a build error. So
>
>I'm unable to repro this error. Are you using clang? If so, which version ?

Re: [PATCH 3/3] drm/amdkfd: use existing VM helper for PD and PT validation in SVM

2022-06-08 Thread Felix Kuehling

Am 2022-06-07 um 22:21 schrieb Lang Yu:

On 06/07/ , Felix Kuehling wrote:

Am 2022-06-07 um 05:59 schrieb Lang Yu:

This will remove some redundant codes.

Signed-off-by: Lang Yu 

The redundancy is quite small, and amdgpu_amdkfd_gpuvm_validate_pt_pd_bos
and amdgpu_amdkfd_bo_validate are quite a bit more complex and handle more
different cases. Someone changing those functions in the future may not
realize the effect that may have on the SVM code.

I'd prefer to keep the svm_range_bo_validate function in kfd_svm.c to make
the code easier to understand and maintain. If anything, I'd move it closer
to where its used, because it's only used in one place.

Thanks for your comments. I got it. By the way,
is it necessary to update vm->pd_phys_addr here?
I noticed that vm->pd_phys_addr is updated in
vm_validate_pt_pd_bos()? Thanks!


It's not needed here. The only time we need to update this is, when we 
re-enable queues after a PD eviction. This is handled by the PD 
validation in amdgpu_amdkfd_gpuvm_restore_process_bos -> 
process_validate_vms -> vm_validate_pt_pd_bos.


Regards,
  Felix




Regards,
Lang


Regards,
   Felix



---
   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 13 +
   1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d6fc00d51c8c..03e07d1d1d1a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -625,15 +625,6 @@ svm_range_get_pdd_by_adev(struct svm_range *prange, struct 
amdgpu_device *adev)
return kfd_process_device_from_gpuidx(p, gpu_idx);
   }
-static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
-{
-   struct ttm_operation_ctx ctx = { false, false };
-
-   amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
-
-   return ttm_bo_validate(>tbo, >placement, );
-}
-
   static int
   svm_range_check_attr(struct kfd_process *p,
 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
@@ -1428,9 +1419,7 @@ static int svm_range_reserve_bos(struct 
svm_validate_context *ctx)
goto unreserve_out;
}
-   r = amdgpu_vm_validate_pt_bos(pdd->dev->adev,
- drm_priv_to_vm(pdd->drm_priv),
- svm_range_bo_validate, NULL);
+   r = 
amdgpu_amdkfd_gpuvm_validate_pt_pd_bos(drm_priv_to_vm(pdd->drm_priv));
if (r) {
pr_debug("failed %d validate pt bos\n", r);
goto unreserve_out;


Re: [PATCH] drm/amdgpu/display: Remove unnecessary typecasts and fix build issues

2022-06-08 Thread Rodrigo Siqueira Jordao




On 2022-06-07 13:58, Aurabindo Pillai wrote:



On 2022-06-07 11:34, Leo wrote:



On 2022-06-07 05:40, Chandan Vurdigere Nataraj wrote:

[Why]
Getting below errors:
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1414:5: 
error: implicit conversion from enumeration type 'enum 
scan_direction_class' to different enumeration type 'enum 
dm_rotation_angle' [-Werror,-Wenum-conversion]

 mode_lib->vba.SourceScan[k],
 ^~~
drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1744:22: 
error: implicit conversion from enumeration type 'enum 
scan_direction_class' to different enumeration type 'enum 
dm_rotation_angle' [-Werror,-Wenum-conversion]
 && 
(!(!IsVertical(mode_lib->vba.SourceScan[k])) || 
mode_lib->vba.DCCEnable[k] == true)) {

    ~~ ^~~
2 errors generated.

drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_util_32.c:5484:18: 
error: implicit conversion from enumeration type 'RequestType' to 
different enumeration type 'enum RequestType' 
[-Werror,-Wenum-conversion]

 RequestLuma = REQ_256Bytes;
 ~ ^~~~
18 errors of similar kind

[How]
1. Add typecast at relevant places
2. Move the enum RequestType definition ahead of declarations

Signed-off-by: Chandan Vurdigere Nataraj 



diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c

index b77a1ae792d1..5828e60f291d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -1411,7 +1411,7 @@ static void 
DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman

  v->BytePerPixelC[k],
  v->BytePerPixelDETY[k],
  v->BytePerPixelDETC[k],
-    mode_lib->vba.SourceScan[k],
+    (enum dm_rotation_angle) mode_lib->vba.SourceScan[k],


Hi Jay,

This seems fishy, dm_rotation_angle and scan_dirrection_class are very 
different enums.
Comparing dml32_CalculateDCCConfiguration() with 
CalculateDCCConfiguration(), it seems dm_rotation_angle is new for 
DCN32. Is passing vba.SourceScan correct here?


I agree. It should be typecast to scan_direction_class


One more comment below.


  /* Output */
  >DCCYMaxUncompressedBlock[k],
  >DCCCMaxUncompressedBlock[k],
@@ -1741,7 +1741,8 @@ void 
dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib 
*mode_l

  mode_lib->vba.SourceFormatPixelAndScanSupport = true;
  for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
  if (mode_lib->vba.SurfaceTiling[k] == dm_sw_linear
-    && (!(!IsVertical(mode_lib->vba.SourceScan[k])) || 
mode_lib->vba.DCCEnable[k] == true)) {
+    && (!(!IsVertical((enum dm_rotation_angle) 
mode_lib->vba.SourceScan[k]))

+    || mode_lib->vba.DCCEnable[k] == true)) {
  mode_lib->vba.SourceFormatPixelAndScanSupport = false;
  }
  }
diff --git 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c

index 6509a84eeb64..07f3a85f8edf 100644
--- 
a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c

@@ -5314,9 +5314,15 @@ void dml32_CalculateDCCConfiguration(
  unsigned int    *IndependentBlockLuma,
  unsigned int    *IndependentBlockChroma)
  {
+    typedef enum {
+    REQ_256Bytes,
+    REQ_128BytesNonContiguous,
+    REQ_128BytesContiguous,
+    REQ_NA
+    } RequestType;
-    enum RequestType   RequestLuma;
-    enum RequestType   RequestChroma;
+    RequestType   RequestLuma;
+    RequestType   RequestChroma;


This might need a wider cleanup, enum RequestType is defined in 
display_mode_enums.h and is already included in all the 
display_mode_vba*.c files I've come across. Unless I'm missing 
something, we shouldn't need to redefine RequestType.


That said, there doesn't seem to be any functional change, and it 
fixes a build error. So


I'm unable to repro this error. Are you using clang? If so, which version ?


Hey Jay,

I was checking this patch in our CI, and we can reproduce this issue 
with a clang.


Thanks
Siqueira




Acked-by: Leo Li 

Thanks,
Leo


  unsigned int   segment_order_horz_contiguous_luma;
  unsigned int   segment_order_horz_contiguous_chroma;
@@ -5350,13 +5356,6 @@ void dml32_CalculateDCCConfiguration(
  double   detile_buf_vp_horz_limit;
  double   detile_buf_vp_vert_limit;
-    typedef enum {
-    REQ_256Bytes,
-    REQ_128BytesNonContiguous,

Re: [PATCH 1/1] drm/amdgpu/jpeg2: Add jpeg vmid update under IB submit

2022-06-08 Thread James Zhu

Reviewed-by:JamesZhuforthis patch James

On 2022-06-08 1:36 a.m., Mohammad Zafar Ziya wrote:

Add jpeg vmid update under IB submit

Signed-off-by: Mohammad Zafar Ziya
Acked-by: Christian König
---
  drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 6 +-
  drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h | 1 +
  2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index d2722adabd1b..f3c1af5130ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -535,6 +535,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
  {
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
  
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET,

+   0, 0, PACKETJ_TYPE0));
+   amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT));
+
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4)));
@@ -768,7 +772,7 @@ static const struct amdgpu_ring_funcs 
jpeg_v2_0_dec_ring_vm_funcs = {
8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
8 + 16,
-   .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */
+   .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */
.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
index 1a03baa59755..654e43e83e2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -41,6 +41,7 @@
  #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET
0x4084
  #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
  #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET  0x401f
+#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149
  
  #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR0x18000

  

[PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device

2022-06-08 Thread Ramesh Errabolu
The field is redundant and does not serve any functional role

Signed-off-by: Ramesh Errabolu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 --
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 -
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 -
 3 files changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cbfb32b3d235..a5409531a2fd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
 
-   dev->io_link_count++;
dev->node_props.io_links_count++;
list_add_tail(>list, >io_link_props);
break;
@@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
-   to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(>list, _dev->io_link_props);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3e240b22ec91..304322ac39e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1819,7 +1819,6 @@ static void kfd_topology_update_io_links(int 
proximity_domain)
 */
if (iolink->node_to == proximity_domain) {
list_del(>list);
-   dev->io_link_count--;
dev->node_props.io_links_count--;
} else {
if (iolink->node_from > proximity_domain)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 2fb5664e1041..9f6c949186c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -130,7 +130,6 @@ struct kfd_topology_device {
struct list_headmem_props;
uint32_tcache_count;
struct list_headcache_props;
-   uint32_tio_link_count;
struct list_headio_link_props;
struct list_headp2p_link_props;
struct list_headperf_props;
-- 
2.35.1



[PATCH] drm/amdgpu: Unpin MMIO and DOORBELL BOs only after map count goes to zero

2022-06-08 Thread Ramesh Errabolu
In existing code MMIO and DOORBELL BOs are unpinned without ensuring the
condition that their map count has reached zero. Unpinning without checking
this constraint could lead to an error while BO is being freed. The patch
fixes this issue.

Signed-off-by: Ramesh Errabolu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a1de900ba677..e5dc94b745b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1832,13 +1832,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
mutex_lock(>lock);
 
-   /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
-   if (mem->alloc_flags &
-   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
-KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
-   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
-   }
-
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(>lock);
@@ -1855,7 +1848,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Make sure restore workers don't access the BO any more */
bo_list_entry = >validate_list;
mutex_lock(_info->lock);
-   list_del(_list_entry->head);
+   list_del_init(_list_entry->head);
mutex_unlock(_info->lock);
 
/* No more MMU notifiers */
@@ -1880,6 +1873,12 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
ret = unreserve_bo_and_vms(, false, false);
 
+   /* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
+   if (mem->alloc_flags &
+   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
+   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+
/* Free the sync object */
amdgpu_sync_free(>sync);
 
-- 
2.35.1



RE: [PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device

2022-06-08 Thread Errabolu, Ramesh
[AMD Official Use Only - General]

Ignore the patch sent a bit early.

Regards,
Ramesh

-Original Message-
From: Errabolu, Ramesh  
Sent: Wednesday, June 8, 2022 5:07 PM
To: amd-gfx@lists.freedesktop.org
Cc: Errabolu, Ramesh 
Subject: [PATCH] drm/amdkfd: Remove field io_link_count from struct 
kfd_topology_device

The field is redundant and does not serve any functional role

Signed-off-by: Ramesh Errabolu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 --
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 -  
drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 -
 3 files changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cbfb32b3d235..a5409531a2fd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
 
-   dev->io_link_count++;
dev->node_props.io_links_count++;
list_add_tail(>list, >io_link_props);
break;
@@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
-   to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(>list, _dev->io_link_props);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3e240b22ec91..304322ac39e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1819,7 +1819,6 @@ static void kfd_topology_update_io_links(int 
proximity_domain)
 */
if (iolink->node_to == proximity_domain) {
list_del(>list);
-   dev->io_link_count--;
dev->node_props.io_links_count--;
} else {
if (iolink->node_from > proximity_domain) diff 
--git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 2fb5664e1041..9f6c949186c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -130,7 +130,6 @@ struct kfd_topology_device {
struct list_headmem_props;
uint32_tcache_count;
struct list_headcache_props;
-   uint32_tio_link_count;
struct list_headio_link_props;
struct list_headp2p_link_props;
struct list_headperf_props;
--
2.35.1


[PATCH] drm/amdkfd: Remove field io_link_count from struct kfd_topology_device

2022-06-08 Thread Ramesh Errabolu
The field is redundant and does not serve any functional role

Signed-off-by: Ramesh Errabolu 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 --
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 1 -
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 -
 3 files changed, 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cbfb32b3d235..a5409531a2fd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1040,7 +1040,6 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
props->rec_transfer_size =
iolink->recommended_transfer_size;
 
-   dev->io_link_count++;
dev->node_props.io_links_count++;
list_add_tail(>list, >io_link_props);
break;
@@ -1067,7 +1066,6 @@ static int kfd_parse_subtype_iolink(struct 
crat_subtype_iolink *iolink,
props2->node_from = id_to;
props2->node_to = id_from;
props2->kobj = NULL;
-   to_dev->io_link_count++;
to_dev->node_props.io_links_count++;
list_add_tail(>list, _dev->io_link_props);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3e240b22ec91..304322ac39e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1819,7 +1819,6 @@ static void kfd_topology_update_io_links(int 
proximity_domain)
 */
if (iolink->node_to == proximity_domain) {
list_del(>list);
-   dev->io_link_count--;
dev->node_props.io_links_count--;
} else {
if (iolink->node_from > proximity_domain)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 2fb5664e1041..9f6c949186c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -130,7 +130,6 @@ struct kfd_topology_device {
struct list_headmem_props;
uint32_tcache_count;
struct list_headcache_props;
-   uint32_tio_link_count;
struct list_headio_link_props;
struct list_headp2p_link_props;
struct list_headperf_props;
-- 
2.35.1



Re: [PATCH 1/5] drm/amd/pm: add interface to deallocate power_context for smu_v13_0_7

2022-06-08 Thread Wang, Yang(Kevin)
[AMD Official Use Only - General]

Series is

Reviewed-by: Yang Wang 

Best Regards,
Kevin

From: amd-gfx  on behalf of Kenneth Feng 

Sent: Wednesday, June 8, 2022 5:05 PM
To: amd-gfx@lists.freedesktop.org 
Cc: Feng, Kenneth 
Subject: [PATCH 1/5] drm/amd/pm: add interface to deallocate power_context for 
smu_v13_0_7

add interface to deallocate power_context for smu_v13_0_7

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index bdea7bca3805..7da42cae5d6e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -1541,6 +1541,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = 
{
 .load_microcode = smu_v13_0_load_microcode,
 .init_smc_tables = smu_v13_0_7_init_smc_tables,
 .init_power = smu_v13_0_init_power,
+   .fini_power = smu_v13_0_fini_power,
 .check_fw_status = smu_v13_0_7_check_fw_status,
 .setup_pptable = smu_v13_0_7_setup_pptable,
 .check_fw_version = smu_v13_0_check_fw_version,
--
2.25.1



答复: [PATCH 2/5] drm/amd/pm: enable BACO on smu_v13_0_7

2022-06-08 Thread Feng, Kenneth
[AMD Official Use Only - General]


[AMD Official Use Only - General]
Yeah, synced to the latest code. Somehow ‘drm/amd/pm: drop redundant 
declarations’was reverted somehow at a point.
Thanks.


Best wishes
Kenneth Feng


发件人: Lazar, Lijo 
日期: 星期三, 2022年6月8日 17:14
收件人: Feng, Kenneth , amd-gfx@lists.freedesktop.org 

主题: Re: [PATCH 2/5] drm/amd/pm: enable BACO on smu_v13_0_7


On 6/8/2022 2:35 PM, Kenneth Feng wrote:
> enable BACO on smu_v13_0_7
>
> Signed-off-by: Kenneth Feng 
> ---
>   drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 1 +
>   drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 7 +++
>   2 files changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index fb04d82f66e6..f57710790b8c 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -1456,6 +1456,7 @@ static int smu_disable_dpms(struct smu_context *smu)
>case IP_VERSION(11, 0, 0):
>case IP_VERSION(11, 0, 5):
>case IP_VERSION(11, 0, 9):
> + case IP_VERSION(13, 0, 7):
>return 0;
>default:
>break;
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
> b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> index 7da42cae5d6e..dc614befcdf5 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
> @@ -97,6 +97,7 @@ static struct cmn2asic_msg_mapping 
> smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
>MSG_MAP(UseDefaultPPTable,  PPSMC_MSG_UseDefaultPPTable,   
> 0),
>MSG_MAP(RunDcBtc,   PPSMC_MSG_RunDcBtc,
> 0),
>MSG_MAP(EnterBaco,  PPSMC_MSG_EnterBaco,   
> 0),
> + MSG_MAP(ExitBaco,   PPSMC_MSG_ExitBaco, 
> 0),
>MSG_MAP(SetSoftMinByFreq,   PPSMC_MSG_SetSoftMinByFreq,
> 1),
>MSG_MAP(SetSoftMaxByFreq,   PPSMC_MSG_SetSoftMaxByFreq,
> 1),
>MSG_MAP(SetHardMinByFreq,   PPSMC_MSG_SetHardMinByFreq,
> 1),
> @@ -281,6 +282,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context 
> *smu,
>*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_MPCLK_DS_BIT);
>*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_PCC_DFLL_BIT);
>*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT);
> + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT);
>
>if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK)
>*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCN_BIT);
> @@ -1584,6 +1586,11 @@ static const struct pptable_funcs 
> smu_v13_0_7_ppt_funcs = {
>.set_tool_table_location = smu_v13_0_set_tool_table_location,
>.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
>.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
> + .baco_is_support = smu_v13_0_baco_is_support,
> + .baco_get_state = smu_v13_0_baco_get_state,
> + .baco_set_state = smu_v13_0_baco_set_state,
> + .baco_enter = smu_v13_0_baco_enter,
> + .baco_exit = smu_v13_0_baco_exit,

I remember seeing this one -  "drm/amd/pm: drop redundant declarations"
which drops smu13 baco common functions. Is this in sync with the latest
source?

Thanks,
Lijo


>   };
>
>   void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
>


Re: [PATCH 2/5] drm/amd/pm: enable BACO on smu_v13_0_7

2022-06-08 Thread Lazar, Lijo




On 6/8/2022 2:35 PM, Kenneth Feng wrote:

enable BACO on smu_v13_0_7

Signed-off-by: Kenneth Feng 
---
  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 1 +
  drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 7 +++
  2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index fb04d82f66e6..f57710790b8c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1456,6 +1456,7 @@ static int smu_disable_dpms(struct smu_context *smu)
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 9):
+   case IP_VERSION(13, 0, 7):
return 0;
default:
break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 7da42cae5d6e..dc614befcdf5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -97,6 +97,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(UseDefaultPPTable,  PPSMC_MSG_UseDefaultPPTable,
   0),
MSG_MAP(RunDcBtc,   PPSMC_MSG_RunDcBtc, 
   0),
MSG_MAP(EnterBaco,  PPSMC_MSG_EnterBaco,
   0),
+   MSG_MAP(ExitBaco,   PPSMC_MSG_ExitBaco, 
   0),
MSG_MAP(SetSoftMinByFreq,   PPSMC_MSG_SetSoftMinByFreq, 
   1),
MSG_MAP(SetSoftMaxByFreq,   PPSMC_MSG_SetSoftMaxByFreq, 
   1),
MSG_MAP(SetHardMinByFreq,   PPSMC_MSG_SetHardMinByFreq, 
   1),
@@ -281,6 +282,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context 
*smu,
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_MPCLK_DS_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_PCC_DFLL_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT);
+   *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT);
  
  	if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK)

*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCN_BIT);
@@ -1584,6 +1586,11 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs 
= {
.set_tool_table_location = smu_v13_0_set_tool_table_location,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
+   .baco_is_support = smu_v13_0_baco_is_support,
+   .baco_get_state = smu_v13_0_baco_get_state,
+   .baco_set_state = smu_v13_0_baco_set_state,
+   .baco_enter = smu_v13_0_baco_enter,
+   .baco_exit = smu_v13_0_baco_exit,


I remember seeing this one -  "drm/amd/pm: drop redundant declarations" 
which drops smu13 baco common functions. Is this in sync with the latest 
source?


Thanks,
Lijo



  };
  
  void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)




[PATCH 5/5] drm/amd/pm: support BAMACO reset on smu_v13_0_7

2022-06-08 Thread Kenneth Feng
support BAMACO reset on smu_v13_0_7, take BAMACO as a subset of BACO
for the low latency, and it only happens on specific platforms.

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  1 +
 .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c  | 57 ++-
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index 6d51e4340aad..b81c657c7386 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -432,6 +432,7 @@ struct smu_baco_context
 {
uint32_t state;
bool platform_support;
+   bool maco_support;
 };
 
 struct smu_freq_info {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index b635c2b4f81c..369e84fef5a6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -303,6 +303,8 @@ static int smu_v13_0_7_check_powerplay_table(struct 
smu_context *smu)
struct smu_13_0_7_powerplay_table *powerplay_table =
table_context->power_play_table;
struct smu_baco_context *smu_baco = >smu_baco;
+   PPTable_t *smc_pptable = table_context->driver_pptable;
+   BoardTable_t *BoardTable = _pptable->BoardTable;
 
if (powerplay_table->platform_caps & 
SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC)
smu->dc_controlled_by_gpio = true;
@@ -311,6 +313,9 @@ static int smu_v13_0_7_check_powerplay_table(struct 
smu_context *smu)
powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_MACO)
smu_baco->platform_support = true;
 
+   if (smu_baco->platform_support && (BoardTable->HsrEnabled || 
BoardTable->VddqOffEnabled))
+   smu_baco->maco_support = true;
+
table_context->thermal_controller_type =
powerplay_table->thermal_controller_type;
 
@@ -1537,6 +1542,54 @@ static int smu_v13_0_7_set_power_profile_mode(struct 
smu_context *smu, long *inp
return ret;
 }
 
+static int smu_v13_0_7_baco_set_state(struct smu_context *smu,
+enum smu_baco_state state)
+{
+   struct smu_baco_context *smu_baco = >smu_baco;
+   struct amdgpu_device *adev = smu->adev;
+   bool is_maco_support = smu_baco->maco_support;
+   int ret = 0;
+
+   if (smu_v13_0_baco_get_state(smu) == state)
+   return 0;
+
+   if (state == SMU_BACO_STATE_ENTER) {
+   ret = smu_cmn_send_smc_msg_with_param(smu,
+ SMU_MSG_EnterBaco,
+ (is_maco_support ? 2 : 0),
+ NULL);
+   } else {
+   ret = smu_cmn_send_smc_msg(smu,
+  SMU_MSG_ExitBaco,
+  NULL);
+   if (ret)
+   return ret;
+
+   /* clear vbios scratch 6 and 7 for coming asic reinit */
+   WREG32(adev->bios_scratch_reg_offset + 6, 0);
+   WREG32(adev->bios_scratch_reg_offset + 7, 0);
+   }
+
+   if (!ret)
+   smu_baco->state = state;
+
+   return ret;
+}
+
+static int smu_v13_0_7_baco_enter(struct smu_context *smu)
+{
+   int ret = 0;
+
+   ret = smu_v13_0_7_baco_set_state(smu,
+  SMU_BACO_STATE_ENTER);
+   if (ret)
+   return ret;
+
+   msleep(10);
+
+   return ret;
+}
+
 static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.get_allowed_feature_mask = smu_v13_0_7_get_allowed_feature_mask,
.set_default_dpm_table = smu_v13_0_7_set_default_dpm_table,
@@ -1591,8 +1644,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = 
{
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
.baco_is_support = smu_v13_0_baco_is_support,
.baco_get_state = smu_v13_0_baco_get_state,
-   .baco_set_state = smu_v13_0_baco_set_state,
-   .baco_enter = smu_v13_0_baco_enter,
+   .baco_set_state = smu_v13_0_7_baco_set_state,
+   .baco_enter = smu_v13_0_7_baco_enter,
.baco_exit = smu_v13_0_baco_exit,
 };
 
-- 
2.25.1



[PATCH 4/5] drm/amd/pm: enable gfxoff on smu_v13_0_7

2022-06-08 Thread Kenneth Feng
enable gfxoff on smu_v13_0_7

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index dc614befcdf5..b635c2b4f81c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -249,6 +249,9 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context 
*smu,
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_IMU_BIT);
}
 
+   if (adev->pm.pp_feature & PP_GFXOFF_MASK)
+   *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFXOFF_BIT);
+
if (adev->pm.pp_feature & PP_MCLK_DPM_MASK) {
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_UCLK_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_FCLK_BIT);
-- 
2.25.1



[PATCH 3/5] drm/amd/pm: update the driver if header for smu_v13_0_7

2022-06-08 Thread Kenneth Feng
update the driver if header for smu_v13_0_7

Signed-off-by: Kenneth Feng 
---
 .../inc/pmfw_if/smu13_driver_if_v13_0_7.h | 62 +--
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h  |  2 +-
 2 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h 
b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index d99b4b47d49d..132da684e379 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -25,10 +25,10 @@
 
 // *** IMPORTANT ***
 // PMFW TEAM: Always increment the interface version on any change to this file
-#define SMU13_DRIVER_IF_VERSION  0x28
+#define SMU13_DRIVER_IF_VERSION  0x2A
 
 //Increment this version if SkuTable_t or BoardTable_t change
-#define PPTABLE_VERSION 0x1D
+#define PPTABLE_VERSION 0x1E
 
 #define NUM_GFXCLK_DPM_LEVELS16
 #define NUM_SOCCLK_DPM_LEVELS8
@@ -112,6 +112,22 @@
 #define FEATURE_SPARE_63_BIT  63
 #define NUM_FEATURES  64
 
+#define ALLOWED_FEATURE_CTRL_DEFAULT 0xULL
+#define ALLOWED_FEATURE_CTRL_SCPM(1 << FEATURE_DPM_GFXCLK_BIT) | \
+ (1 << 
FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \
+ (1 << FEATURE_DPM_UCLK_BIT) | \
+ (1 << FEATURE_DPM_FCLK_BIT) | \
+ (1 << FEATURE_DPM_SOCCLK_BIT) | \
+ (1 << FEATURE_DPM_MP0CLK_BIT) | \
+ (1 << FEATURE_DPM_LINK_BIT) | \
+ (1 << FEATURE_DPM_DCN_BIT) | \
+ (1 << FEATURE_DS_GFXCLK_BIT) | \
+ (1 << FEATURE_DS_SOCCLK_BIT) | \
+ (1 << FEATURE_DS_FCLK_BIT) | \
+ (1 << FEATURE_DS_LCLK_BIT) | \
+ (1 << FEATURE_DS_DCFCLK_BIT) | \
+ (1 << FEATURE_DS_UCLK_BIT)
+
 //For use with feature control messages
 typedef enum {
   FEATURE_PWR_ALL,
@@ -662,7 +678,7 @@ typedef struct {
 #define PP_NUM_OD_VF_CURVE_POINTS PP_NUM_RTAVFS_PWL_ZONES + 1
 
 
-#define PP_OD_FEATURE_VF_CURVE_BIT  0
+#define PP_OD_FEATURE_GFX_VF_CURVE_BIT  0
 #define PP_OD_FEATURE_VMAX_BIT  1
 #define PP_OD_FEATURE_PPT_BIT   2
 #define PP_OD_FEATURE_FAN_CURVE_BIT 3
@@ -671,6 +687,8 @@ typedef struct {
 #define PP_OD_FEATURE_TDC_BIT  6
 #define PP_OD_FEATURE_GFXCLK_BIT  7
 #define PP_OD_FEATURE_UCLK_BIT  8
+#define PP_OD_FEATURE_ZERO_FAN_BIT  9
+#define PP_OD_FEATURE_TEMPERATURE_BIT 10
 
 typedef enum {
   PP_OD_POWER_FEATURE_ALWAYS_ENABLED,
@@ -689,8 +707,8 @@ typedef struct {
   uint8_tRuntimePwrSavingFeaturesCtrl;
 
   //Frequency changes
-  uint16_t   GfxclkFmin;   // MHz
-  uint16_t   GfxclkFmax;   // MHz
+  int16_t   GfxclkFmin;   // MHz
+  int16_t   GfxclkFmax;   // MHz
   uint16_t   UclkFmin; // MHz
   uint16_t   UclkFmax; // MHz
 
@@ -701,17 +719,17 @@ typedef struct {
   //Fan control
   uint8_tFanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS];
   uint8_tFanLinearTempPoints[NUM_OD_FAN_MAX_POINTS];
-  uint16_t   FanMaximumRpm;
   uint16_t   FanMinimumPwm;
-  uint16_t   FanAcousticLimitRpm;
+  uint16_t   AcousticTargetRpmThreshold;
+  uint16_t   AcousticLimitRpmThreshold;
   uint16_t   FanTargetTemperature; // Degree Celcius
   uint8_tFanZeroRpmEnable;
   uint8_tFanZeroRpmStopTemp;
   uint8_tFanMode;
-  uint8_tPadding[1];
-
+  uint8_tMaxOpTemp;
+  uint8_tPadding[4];
 
-  uint32_t   Spare[13];
+  uint32_t   Spare[12];
   uint32_t   MmHubPadding[8]; // SMU internal use. Adding here 
instead of external as a workaround
 } OverDriveTable_t;
 
@@ -740,17 +758,17 @@ typedef struct {
 
   uint8_tFanLinearPwmPoints;
   uint8_tFanLinearTempPoints;
-  uint16_t   FanMaximumRpm;
   uint16_t   FanMinimumPwm;
-  uint16_t   FanAcousticLimitRpm;
+  uint16_t   AcousticTargetRpmThreshold;
+  uint16_t   AcousticLimitRpmThreshold;
   uint16_t   FanTargetTemperature; // Degree Celcius
   uint8_tFanZeroRpmEnable;
   uint8_tFanZeroRpmStopTemp;
   uint8_tFanMode;
-  uint8_tPadding[1];
+  uint8_tMaxOpTemp;
+  uint8_tPadding[4];
 
-
-  uint32_t 

[PATCH 2/5] drm/amd/pm: enable BACO on smu_v13_0_7

2022-06-08 Thread Kenneth Feng
enable BACO on smu_v13_0_7

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c| 1 +
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 7 +++
 2 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index fb04d82f66e6..f57710790b8c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1456,6 +1456,7 @@ static int smu_disable_dpms(struct smu_context *smu)
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 5):
case IP_VERSION(11, 0, 9):
+   case IP_VERSION(13, 0, 7):
return 0;
default:
break;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 7da42cae5d6e..dc614befcdf5 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -97,6 +97,7 @@ static struct cmn2asic_msg_mapping 
smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(UseDefaultPPTable,  PPSMC_MSG_UseDefaultPPTable,
   0),
MSG_MAP(RunDcBtc,   PPSMC_MSG_RunDcBtc, 
   0),
MSG_MAP(EnterBaco,  PPSMC_MSG_EnterBaco,
   0),
+   MSG_MAP(ExitBaco,   PPSMC_MSG_ExitBaco, 
   0),
MSG_MAP(SetSoftMinByFreq,   PPSMC_MSG_SetSoftMinByFreq, 
   1),
MSG_MAP(SetSoftMaxByFreq,   PPSMC_MSG_SetSoftMaxByFreq, 
   1),
MSG_MAP(SetHardMinByFreq,   PPSMC_MSG_SetHardMinByFreq, 
   1),
@@ -281,6 +282,7 @@ smu_v13_0_7_get_allowed_feature_mask(struct smu_context 
*smu,
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_MPCLK_DS_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_PCC_DFLL_BIT);
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_SOC_CG_BIT);
+   *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_BACO_BIT);
 
if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK)
*(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DPM_DCN_BIT);
@@ -1584,6 +1586,11 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs 
= {
.set_tool_table_location = smu_v13_0_set_tool_table_location,
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
+   .baco_is_support = smu_v13_0_baco_is_support,
+   .baco_get_state = smu_v13_0_baco_get_state,
+   .baco_set_state = smu_v13_0_baco_set_state,
+   .baco_enter = smu_v13_0_baco_enter,
+   .baco_exit = smu_v13_0_baco_exit,
 };
 
 void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
-- 
2.25.1



[PATCH 1/5] drm/amd/pm: add interface to deallocate power_context for smu_v13_0_7

2022-06-08 Thread Kenneth Feng
add interface to deallocate power_context for smu_v13_0_7

Signed-off-by: Kenneth Feng 
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index bdea7bca3805..7da42cae5d6e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -1541,6 +1541,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = 
{
.load_microcode = smu_v13_0_load_microcode,
.init_smc_tables = smu_v13_0_7_init_smc_tables,
.init_power = smu_v13_0_init_power,
+   .fini_power = smu_v13_0_fini_power,
.check_fw_status = smu_v13_0_7_check_fw_status,
.setup_pptable = smu_v13_0_7_setup_pptable,
.check_fw_version = smu_v13_0_check_fw_version,
-- 
2.25.1



Re: [PATCH 1/1] drm/radeon: Initialize fences array entries in radeon_sa_bo_next_hole

2022-06-08 Thread Christian König

Am 07.06.22 um 17:19 schrieb Xiaohui Zhang:

Similar to the handling of amdgpu_sa_bo_next_hole in commit 6a15f3ff19a8
("drm/amdgpu: Initialize fences array entries in amdgpu_sa_bo_next_hole"),
we thought a patch might be needed here as well.

The entries were only initialized once in radeon_sa_bo_new. If a fence
wasn't signalled yet in the first radeon_sa_bo_next_hole call, but then
got signalled before a later radeon_sa_bo_next_hole call, it could
destroy the fence but leave its pointer in the array, resulting in
use-after-free in radeon_sa_bo_new.


I would rather like to see the sub allocator moved into a common drm helper.

Regards,
Christian.



Signed-off-by: Xiaohui Zhang 
---
  drivers/gpu/drm/radeon/radeon_sa.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_sa.c 
b/drivers/gpu/drm/radeon/radeon_sa.c
index 310c322c7112..0981948bd9ed 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -267,6 +267,8 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager 
*sa_manager,
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
struct radeon_sa_bo *sa_bo;
  
+		fences[i] = NULL;

+
if (list_empty(_manager->flist[i])) {
continue;
}
@@ -332,10 +334,8 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
  
  	spin_lock(_manager->wq.lock);

do {
-   for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-   fences[i] = NULL;
+   for (i = 0; i < RADEON_NUM_RINGS; ++i)
tries[i] = 0;
-   }
  
  		do {

radeon_sa_bo_try_free(sa_manager);




Re: [PATCH 1/1] drm/amdgpu/jpeg2: Add jpeg vmid update under IB submit

2022-06-08 Thread Lazar, Lijo




On 6/8/2022 11:06 AM, Mohammad Zafar Ziya wrote:

Add jpeg vmid update under IB submit

Signed-off-by: Mohammad Zafar Ziya 
Acked-by: Christian König 


Reviewed-by: Lijo Lazar 

Thanks,
Lijo


---
  drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 6 +-
  drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h | 1 +
  2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index d2722adabd1b..f3c1af5130ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -535,6 +535,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
  {
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
  
+	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET,

+   0, 0, PACKETJ_TYPE0));
+   amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT));
+
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4)));
@@ -768,7 +772,7 @@ static const struct amdgpu_ring_funcs 
jpeg_v2_0_dec_ring_vm_funcs = {
8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
8 + 16,
-   .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */
+   .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */
.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h 
b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
index 1a03baa59755..654e43e83e2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -41,6 +41,7 @@
  #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET
0x4084
  #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
  #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET  0x401f
+#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149
  
  #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR0x18000
  



Re: [PATCH v5 02/13] mm: handling Non-LRU pages returned by vm_normal_pages

2022-06-08 Thread Alistair Popple


I can't see any issues with this now so:

Reviewed-by: Alistair Popple 

Alex Sierra  writes:

> With DEVICE_COHERENT, we'll soon have vm_normal_pages() return
> device-managed anonymous pages that are not LRU pages. Although they
> behave like normal pages for purposes of mapping in CPU page, and for
> COW. They do not support LRU lists, NUMA migration or THP.
>
> We also introduced a FOLL_LRU flag that adds the same behaviour to
> follow_page and related APIs, to allow callers to specify that they
> expect to put pages on an LRU list.
>
> Signed-off-by: Alex Sierra 
> Acked-by: Felix Kuehling 
> ---
>  fs/proc/task_mmu.c | 2 +-
>  include/linux/mm.h | 3 ++-
>  mm/gup.c   | 6 +-
>  mm/huge_memory.c   | 2 +-
>  mm/khugepaged.c| 9 ++---
>  mm/ksm.c   | 6 +++---
>  mm/madvise.c   | 4 ++--
>  mm/memory.c| 9 -
>  mm/mempolicy.c | 2 +-
>  mm/migrate.c   | 4 ++--
>  mm/mlock.c | 2 +-
>  mm/mprotect.c  | 2 +-
>  12 files changed, 33 insertions(+), 18 deletions(-)
>
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 2d04e3470d4c..2dd8c8a66924 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1792,7 +1792,7 @@ static struct page *can_gather_numa_stats(pte_t pte, 
> struct vm_area_struct *vma,
>   return NULL;
>
>   page = vm_normal_page(vma, addr, pte);
> - if (!page)
> + if (!page || is_zone_device_page(page))
>   return NULL;
>
>   if (PageReserved(page))
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index bc8f326be0ce..d3f43908ff8d 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -601,7 +601,7 @@ struct vm_operations_struct {
>  #endif
>   /*
>* Called by vm_normal_page() for special PTEs to find the
> -  * page for @addr.  This is useful if the default behavior
> +  * page for @addr. This is useful if the default behavior
>* (using pte_page()) would not find the correct page.
>*/
>   struct page *(*find_special_page)(struct vm_area_struct *vma,
> @@ -2934,6 +2934,7 @@ struct page *follow_page(struct vm_area_struct *vma, 
> unsigned long address,
>  #define FOLL_NUMA0x200   /* force NUMA hinting page fault */
>  #define FOLL_MIGRATION   0x400   /* wait for page to replace migration 
> entry */
>  #define FOLL_TRIED   0x800   /* a retry, previous pass started an IO */
> +#define FOLL_LRU0x1000  /* return only LRU (anon or page cache) */
>  #define FOLL_REMOTE  0x2000  /* we are working on non-current tsk/mm */
>  #define FOLL_COW 0x4000  /* internal GUP flag */
>  #define FOLL_ANON0x8000  /* don't do file mappings */
> diff --git a/mm/gup.c b/mm/gup.c
> index 551264407624..48b45bcc8501 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -532,7 +532,11 @@ static struct page *follow_page_pte(struct 
> vm_area_struct *vma,
>   }
>
>   page = vm_normal_page(vma, address, pte);
> - if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
> + if ((flags & FOLL_LRU) && ((page && is_zone_device_page(page)) ||
> + (!page && pte_devmap(pte {
> + page = ERR_PTR(-EEXIST);
> + goto out;
> + } else if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) 
> {
>   /*
>* Only return device mapping pages in the FOLL_GET or FOLL_PIN
>* case since they are only valid while holding the pgmap
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index a77c78a2b6b5..48182c8fe151 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2906,7 +2906,7 @@ static int split_huge_pages_pid(int pid, unsigned long 
> vaddr_start,
>   }
>
>   /* FOLL_DUMP to ignore special (like zero) pages */
> - page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
> + page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP | FOLL_LRU);
>
>   if (IS_ERR(page))
>   continue;
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 16be62d493cd..671ac7800e53 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -618,7 +618,7 @@ static int __collapse_huge_page_isolate(struct 
> vm_area_struct *vma,
>   goto out;
>   }
>   page = vm_normal_page(vma, address, pteval);
> - if (unlikely(!page)) {
> + if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
>   result = SCAN_PAGE_NULL;
>   goto out;
>   }
> @@ -1267,7 +1267,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>   writable = true;
>
>   page = vm_normal_page(vma, _address, pteval);
> - if (unlikely(!page)) {
> + if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
>   result = SCAN_PAGE_NULL;
>   goto out_unmap;
>   }
> @@ -1479,7