On 2026-01-20 05:34, Six, Lancelot wrote:
[AMD Official Use Only - AMD Internal Distribution Only]

Hi,


-----Original Message-----
From: Zhu, James<[email protected]>
Sent: 19 January 2026 17:15
To:[email protected]
Cc: Six, Lancelot<[email protected]>; Kim, Jonathan
<[email protected]>; Zhu, James<[email protected]>
Subject: [PATCH] drm/amdkfd: add mask for debug trap flag setting

to specify which bits are valid setting on flags.

Signed-off-by: James Zhu<[email protected]>
---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c   | 48 ++++++++++++------------
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h   |  3 +-
  include/uapi/linux/kfd_ioctl.h           |  3 +-
  4 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 79586abad7fd..fd43ef7c9076 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -3377,7 +3377,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep,
struct kfd_process *p, v
                               args->clear_node_address_watch.id);
               break;
       case KFD_IOC_DBG_TRAP_SET_FLAGS:
-             r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags);
+             r = kfd_dbg_trap_set_flags(target, &args->set_flags);
               break;
       case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT:
               r = kfd_dbg_ev_query_debug_event(target,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index a04875236647..279160ca71a9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -512,38 +512,42 @@ static void
kfd_dbg_clear_process_address_watch(struct kfd_process *target)
                       kfd_dbg_trap_clear_dev_address_watch(target-
pdds[i], j);
  }

-int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags)
+int kfd_dbg_trap_set_flags(struct kfd_process *target,
+     struct kfd_ioctl_dbg_trap_set_flags_args *set_flags)
  {
       uint32_t prev_flags = target->dbg_flags;
       int i, r = 0, rewind_count = 0;

+     if (!((set_flags->flags ^ prev_flags) & set_flags->mask))
+             return 0;
+
       for (i = 0; i < target->n_pdds; i++) {
               struct kfd_topology_device *topo_dev =
                               kfd_topology_device_by_id(target->pdds[i]-
dev->id);
               uint32_t caps = topo_dev->node_props.capability;
               uint32_t caps2 = topo_dev->node_props.capability2;
+             struct amdgpu_device *adev = target->pdds[i]->dev->adev;

-             if (!(caps &
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) &&
-                     (*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) {
-                     *flags = prev_flags;
-                     return -EACCES;
-             }
-
-             if (!(caps &
HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) &&
-                 (*flags & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) {
-                     *flags = prev_flags;
-                     return -EACCES;
-             }
-
-             if (!(caps2 &
HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED) &&
-                 (*flags &
KFD_DBG_TRAP_FLAG_LDS_OUT_OF_ADDR_RANGE)) {
-                     *flags = prev_flags;
+             if (set_flags->mask == 0xFFFFFFFF && !set_flags->flags)
+                     break;
+             if ((!(caps &
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) &&
+                     (set_flags->mask &
KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) ||
+                     (!(caps &
HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) &&
+                 (set_flags->mask & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP))
||
+                     (!(caps2 &
HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED) &&
+                 (set_flags->mask &
KFD_DBG_TRAP_FLAG_LDS_OUT_OF_ADDR_RANGE))) {
+                     set_flags->flags = prev_flags;
+                     dev_dbg(adev->dev, "Debug Trap set mask 0x%x caps
0x%x caps2 0x%x",
+                             set_flags->mask, caps, caps2);
The logic here seems odd.  If "set_flags->mask" is the set of flags a caller 
wants to modify (I don't think it should, see below), a call which asks for PRECISE_MEMORY 
to be disabled would return EACCESS if PRECISE_MEMORY is not supported.  Before this patch, 
it would have been perfectly valid set (or confirm) the PRECISE_MEMORY flag to 0.
[JZ] the logical here is checking input setting, if caps does not support such 
feature, mask trys to set this feature bit, then exit with -EACCESS.
Also, consider the following scenario where a user wants to set PRECISE_ALU=0 
and PRECISE_MEMORY=1:

   set_flags->flags = KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP;
   set_flags->mask = KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP | 
KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP;

If the platform only supports PRECISE_MEMORY (like gfx11 for example), the call 
would erroneously fail with EACCESS.  This was possible before this patch.

[JZ] then mask should not include KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP which tells user want to clear KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP on this

ASIC, but it actually not been supported. err tells user that the operation clearing PRECISE_ALU is invalid.


                       return -EACCES;
               }
       }

-     target->dbg_flags = *flags;
-     *flags = prev_flags;
+     target->dbg_flags ^= (target->dbg_flags ^ set_flags->flags) & set_flags-
mask;
+     pr_debug("Debug Trap previous flags 0x%x set flags 0x%x set mask
0x%x target flags 0x%x",
+             prev_flags, set_flags->flags, set_flags->mask, target->dbg_flags);
+
+     set_flags->flags = prev_flags;
       for (i = 0; i < target->n_pdds; i++) {
               struct kfd_process_device *pdd = target->pdds[i];

@@ -555,10 +559,8 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target,
uint32_t *flags)
               else
                       r = kfd_dbg_set_mes_debug_mode(pdd, true);

-             if (r) {
-                     target->dbg_flags = prev_flags;
+             if (r)
                       break;
-             }

               rewind_count++;
       }
@@ -596,7 +598,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process
*target, bool unwind, int unwind
       int i;

       if (!unwind) {
-             uint32_t flags = 0;
+             struct kfd_ioctl_dbg_trap_set_flags_args set_flags = {0,
0xFFFFFFFF};
               int resume_count = resume_queues(target, 0, NULL);

               if (resume_count)
@@ -606,7 +608,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process
*target, bool unwind, int unwind
               kfd_dbg_clear_process_address_watch(target);
               kfd_dbg_trap_set_wave_launch_mode(target, 0);

-             kfd_dbg_trap_set_flags(target, &flags);
+             kfd_dbg_trap_set_flags(target, &set_flags);
       }

       for (i = 0; i < target->n_pdds; i++) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 894753818cba..34d27eb500a5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -62,7 +62,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct
kfd_process_device *pdd,
                                       uint32_t watch_address_mask,
                                       uint32_t *watch_id,
                                       uint32_t watch_mode);
-int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags);
+int kfd_dbg_trap_set_flags(struct kfd_process *target,
+             struct kfd_ioctl_dbg_trap_set_flags_args *set_flags);
  int kfd_dbg_trap_query_exception_info(struct kfd_process *target,
               uint32_t source_id,
               uint32_t exception_code,
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index e9b756ca228c..0522fe7344e4 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -1515,6 +1515,7 @@ struct
kfd_ioctl_dbg_trap_clear_node_address_watch_args {
   *     Sets flags for wave behaviour.
   *
   *     @flags (IN/OUT) - IN = flags to enable, OUT = flags previously enabled
+ *     @mask  (IN)     - IN = specified debug trap operation bits on flag
I am not sure I fully understand this comment.  Is it meant to say which bits 
from FLAGS will be set?

Also, this seems to break backwards compatibility.  Before this patch, a caller would 
call this ioctl with "flags" set, and the PAD field would be 0.  The semantics 
of this call is to set all bits from flags.  The updated semantics of MASK would make it 
so no flags are set instead of alle.
[JZ] Jon has pointed out this. I have a new version to fix it.  I will add below code fallback to old version.
    if (!set_flags->mask)
        set_flags->mask = set_flags->flags;


The updated ioctl must preserve this existing pattern, so instead of mask 
telling which bits must be changed, it should specify which bits should be 
preserved (or ignored from FLAGS).

To make it clearer, I'd probably call this "preserve_mask"
[JZ] to use mask to tell which bits in flag will be modified seem more straightforward.

         @preserve_mask (IN)     - IN = Specify flags which must not be 
modified.

Best,
Lancelot.

   *
   *     Generic errors apply (see kfd_dbg_trap_operations).
   *     Return - 0 on SUCCESS.
@@ -1522,7 +1523,7 @@ struct
kfd_ioctl_dbg_trap_clear_node_address_watch_args {
   */
  struct kfd_ioctl_dbg_trap_set_flags_args {
       __u32 flags;
-     __u32 pad;
+     __u32 mask;
  };

  /**
--
2.34.1

Reply via email to