[Public] > -----Original Message----- > From: Zhu, James <[email protected]> > Sent: Monday, January 19, 2026 12:15 PM > To: [email protected] > Cc: Six, Lancelot <[email protected]>; Kim, Jonathan > <[email protected]>; Zhu, James <[email protected]> > Subject: [PATCH] drm/amdkfd: add mask for debug trap flag setting > > to specify which bits are valid setting on flags.
Can you elaborate? > > Signed-off-by: James Zhu <[email protected]> > --- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- > drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 48 ++++++++++++------------ > drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 3 +- > include/uapi/linux/kfd_ioctl.h | 3 +- > 4 files changed, 30 insertions(+), 26 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index 79586abad7fd..fd43ef7c9076 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -3377,7 +3377,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, > struct kfd_process *p, v > args->clear_node_address_watch.id); > break; > case KFD_IOC_DBG_TRAP_SET_FLAGS: > - r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags); > + r = kfd_dbg_trap_set_flags(target, &args->set_flags); > break; > case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: > r = kfd_dbg_ev_query_debug_event(target, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > index a04875236647..279160ca71a9 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c > @@ -512,38 +512,42 @@ static void > kfd_dbg_clear_process_address_watch(struct kfd_process *target) > kfd_dbg_trap_clear_dev_address_watch(target- > >pdds[i], j); > } > > -int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags) > +int kfd_dbg_trap_set_flags(struct kfd_process *target, > + struct kfd_ioctl_dbg_trap_set_flags_args *set_flags) > { > uint32_t prev_flags = target->dbg_flags; > int i, r = 0, rewind_count = 0; > > + if (!((set_flags->flags ^ prev_flags) & set_flags->mask)) Does this block old debuggers from setting debug mode if pad is 0? > + return 0; > + > for (i = 0; i < target->n_pdds; i++) { > struct kfd_topology_device *topo_dev = > kfd_topology_device_by_id(target->pdds[i]- > >dev->id); > uint32_t caps = topo_dev->node_props.capability; > uint32_t caps2 = topo_dev->node_props.capability2; > + struct amdgpu_device *adev = target->pdds[i]->dev->adev; > > - if (!(caps & > HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) && > - (*flags & KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) { > - *flags = prev_flags; > - return -EACCES; > - } > - > - if (!(caps & > HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) && > - (*flags & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) { > - *flags = prev_flags; > - return -EACCES; > - } > - > - if (!(caps2 & > HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED) && > - (*flags & > KFD_DBG_TRAP_FLAG_LDS_OUT_OF_ADDR_RANGE)) { > - *flags = prev_flags; > + if (set_flags->mask == 0xFFFFFFFF && !set_flags->flags) > + break; Seems like this is only for a deactivation system call. Probably don't want to let users abuse this with a magic number then. Maybe breakout into new static call in deactivation or just device loop deactivate directly there since this logic is starting to get complicated. > + if ((!(caps & > HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED) && > + (set_flags->mask & > KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP)) || > + (!(caps & > HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED) && > + (set_flags->mask & KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP)) > || > + (!(caps2 & > HSA_CAP2_TRAP_DEBUG_LDS_OUT_OF_ADDR_RANGE_SUPPORTED) && > + (set_flags->mask & > KFD_DBG_TRAP_FLAG_LDS_OUT_OF_ADDR_RANGE))) { Please vertically indent align "caps" and "set_flags" underneath each other for legibility. > + set_flags->flags = prev_flags; > + dev_dbg(adev->dev, "Debug Trap set mask 0x%x caps > 0x%x caps2 0x%x", > + set_flags->mask, caps, caps2); > return -EACCES; > } > } > > - target->dbg_flags = *flags; > - *flags = prev_flags; > + target->dbg_flags ^= (target->dbg_flags ^ set_flags->flags) & set_flags- > >mask; I assume we can only set/unset 1 flag at a time, which is why this works. Otherwise, maybe I don't have enough background on this, but please do elaborate. > + pr_debug("Debug Trap previous flags 0x%x set flags 0x%x set mask 0x%x > target flags 0x%x", > + prev_flags, set_flags->flags, set_flags->mask, target- > >dbg_flags); > + > + set_flags->flags = prev_flags; > for (i = 0; i < target->n_pdds; i++) { > struct kfd_process_device *pdd = target->pdds[i]; > > @@ -555,10 +559,8 @@ int kfd_dbg_trap_set_flags(struct kfd_process *target, > uint32_t *flags) > else > r = kfd_dbg_set_mes_debug_mode(pdd, true); > > - if (r) { > - target->dbg_flags = prev_flags; Doesn't rewind require the previous value for something to rewind to? > + if (r) > break; > - } > > rewind_count++; > } > @@ -596,7 +598,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, > bool unwind, int unwind > int i; > > if (!unwind) { > - uint32_t flags = 0; > + struct kfd_ioctl_dbg_trap_set_flags_args set_flags = {0, > 0xFFFFFFFF}; Similar to 3 comments above. Just call a new static or loop directly to reset debug mode (i.e. all flags clear). i.e. you don't have to rewind on preemption failure here because deactivation itself is a rewind we're pretty much out of luck at this point if we fail. Jon > int resume_count = resume_queues(target, 0, NULL); > > if (resume_count) > @@ -606,7 +608,7 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, > bool unwind, int unwind > kfd_dbg_clear_process_address_watch(target); > kfd_dbg_trap_set_wave_launch_mode(target, 0); > > - kfd_dbg_trap_set_flags(target, &flags); > + kfd_dbg_trap_set_flags(target, &set_flags); > } > > for (i = 0; i < target->n_pdds; i++) { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h > b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h > index 894753818cba..34d27eb500a5 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h > @@ -62,7 +62,8 @@ int kfd_dbg_trap_set_dev_address_watch(struct > kfd_process_device *pdd, > uint32_t watch_address_mask, > uint32_t *watch_id, > uint32_t watch_mode); > -int kfd_dbg_trap_set_flags(struct kfd_process *target, uint32_t *flags); > +int kfd_dbg_trap_set_flags(struct kfd_process *target, > + struct kfd_ioctl_dbg_trap_set_flags_args *set_flags); > int kfd_dbg_trap_query_exception_info(struct kfd_process *target, > uint32_t source_id, > uint32_t exception_code, > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h > index e9b756ca228c..0522fe7344e4 100644 > --- a/include/uapi/linux/kfd_ioctl.h > +++ b/include/uapi/linux/kfd_ioctl.h > @@ -1515,6 +1515,7 @@ struct > kfd_ioctl_dbg_trap_clear_node_address_watch_args { > * Sets flags for wave behaviour. > * > * @flags (IN/OUT) - IN = flags to enable, OUT = flags previously enabled > + * @mask (IN) - IN = specified debug trap operation bits on flag > * > * Generic errors apply (see kfd_dbg_trap_operations). > * Return - 0 on SUCCESS. > @@ -1522,7 +1523,7 @@ struct > kfd_ioctl_dbg_trap_clear_node_address_watch_args { > */ > struct kfd_ioctl_dbg_trap_set_flags_args { > __u32 flags; > - __u32 pad; > + __u32 mask; > }; > > /** > -- > 2.34.1
