On 9/25/2025 5:41 AM, Kuehling, Felix wrote:
> On 2025-09-23 03:25, Zhu Lingshan wrote:
>> This commit introduces a new id field for
>> struct kfd process, which helps identify
>> a kfd process among multiple contexts that
>> all belong to a single user space program.
>>
>> The sysfs entry of a secondary kfd process
>> is placed under the sysfs entry folder of
>> its primary kfd process.
>>
>> The naming format of the sysfs entry of a secondary
>> kfd process is "context_%u" where %u is the process id.
>>
>> Signed-off-by: Zhu Lingshan <[email protected]>
>> Reviewed-by: Felix Kuehling <[email protected]>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++
>> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 83 +++++++++++++++++++++++-
>> 2 files changed, 86 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index d1436f1f527d..d140463e221b 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -998,6 +998,9 @@ struct kfd_process {
>> /* Tracks debug per-vmid request for debug flags */
>> u32 dbg_flags;
>> + /* kfd process id */
>> + u16 id;
>
> Can this subsume the "primary" flag? E.g. process->id == 0 could mean
> "primary context", and all the secondary contexts could have non-0 IDs.
I will remove this primary flag and try using process->id to identify the
contexts, so here 0 is not a good
default value for this process->id of the primary kfd context anymore, because
0 is the default initialized
value for all kfd contexts, I will assign another default value 0xFFFF to the
primary kfd context.
This change will affect some other patches in this series, I will remove their
"reviewed-by" tag.
Thanks
Lingshan
>
> Regards,
> Felix
>
>
>> +
>> atomic_t poison;
>> /* Queues are in paused stated because we are in the process of
>> doing a CRIU checkpoint */
>> bool queues_paused;
>> @@ -1012,6 +1015,9 @@ struct kfd_process {
>> /* indicating whether this is a primary kfd_process */
>> bool primary;
>> +
>> + /* The primary kfd_process allocating IDs for its secondary
>> kfd_process, 0 for primary kfd_process */
>> + struct ida id_table;
>> };
>> #define KFD_PROCESS_TABLE_SIZE 8 /* bits: 256 entries */
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 5d59a4d994d5..8e498fd35b8c 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -54,6 +54,9 @@ DEFINE_MUTEX(kfd_processes_mutex);
>> DEFINE_SRCU(kfd_processes_srcu);
>> +#define KFD_PROCESS_ID_MIN 1
>> +#define KFD_PROCESS_ID_WIDTH 16
>> +
>> /* For process termination handling */
>> static struct workqueue_struct *kfd_process_wq;
>> @@ -827,6 +830,7 @@ static void
>> kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
>> int kfd_create_process_sysfs(struct kfd_process *process)
>> {
>> + struct kfd_process *primary_process;
>> int ret;
>> if (process->kobj) {
>> @@ -839,9 +843,22 @@ int kfd_create_process_sysfs(struct kfd_process
>> *process)
>> pr_warn("Creating procfs kobject failed");
>> return -ENOMEM;
>> }
>> - ret = kobject_init_and_add(process->kobj, &procfs_type,
>> - procfs.kobj, "%d",
>> - (int)process->lead_thread->pid);
>> +
>> + if (process->primary)
>> + ret = kobject_init_and_add(process->kobj, &procfs_type,
>> + procfs.kobj, "%d",
>> + (int)process->lead_thread->pid);
>> + else {
>> + primary_process =
>> kfd_lookup_process_by_mm(process->lead_thread->mm);
>> + if (!primary_process)
>> + return -ESRCH;
>> +
>> + ret = kobject_init_and_add(process->kobj, &procfs_type,
>> + primary_process->kobj, "context_%u",
>> + process->id);
>> + kfd_unref_process(primary_process);
>> + }
>> +
>> if (ret) {
>> pr_warn("Creating procfs pid directory failed");
>> kobject_put(process->kobj);
>> @@ -863,6 +880,51 @@ int kfd_create_process_sysfs(struct kfd_process
>> *process)
>> return 0;
>> }
>> +static int kfd_process_alloc_id(struct kfd_process *process)
>> +{
>> + int ret;
>> + struct kfd_process *primary_process;
>> +
>> + if (process->primary) {
>> + process->id = 0;
>> +
>> + return 0;
>> + }
>> +
>> + primary_process =
>> kfd_lookup_process_by_mm(process->lead_thread->mm);
>> + if (!primary_process)
>> + return -ESRCH;
>> +
>> + ret = ida_alloc_range(&primary_process->id_table,
>> KFD_PROCESS_ID_MIN,
>> + (1 << KFD_PROCESS_ID_WIDTH) - 1, GFP_KERNEL);
>> + if (ret < 0)
>> + goto out;
>> +
>> + process->id = ret;
>> + ret = 0;
>> +
>> +out:
>> + kfd_unref_process(primary_process);
>> +
>> + return ret;
>> +}
>> +
>> +static void kfd_process_free_id(struct kfd_process *process)
>> +{
>> + struct kfd_process *primary_process;
>> +
>> + if (process->primary)
>> + return;
>> +
>> + primary_process =
>> kfd_lookup_process_by_mm(process->lead_thread->mm);
>> + if (!primary_process)
>> + return;
>> +
>> + ida_free(&primary_process->id_table, process->id);
>> +
>> + kfd_unref_process(primary_process);
>> +}
>> +
>> struct kfd_process *kfd_create_process(struct task_struct *thread)
>> {
>> struct kfd_process *process;
>> @@ -1193,6 +1255,11 @@ static void kfd_process_wq_release(struct
>> work_struct *work)
>> if (ef)
>> dma_fence_signal(ef);
>> + if (!p->primary)
>> + kfd_process_free_id(p);
>> + else
>> + ida_destroy(&p->id_table);
>> +
>> kfd_process_remove_sysfs(p);
>> kfd_debugfs_remove_process(p);
>> @@ -1549,6 +1616,12 @@ static struct kfd_process
>> *create_process(const struct task_struct *thread, bool
>> process->queues_paused = false;
>> process->primary = primary;
>> + err = kfd_process_alloc_id(process);
>> + if (err) {
>> + pr_err("Creating kfd process: failed to alloc an id\n");
>> + goto err_alloc_id;
>> + }
>> +
>> INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
>> INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
>> process->last_restore_timestamp = get_jiffies_64();
>> @@ -1599,6 +1672,8 @@ static struct kfd_process *create_process(const
>> struct task_struct *thread, bool
>> goto err_register_notifier;
>> }
>> BUG_ON(mn != &process->mmu_notifier);
>> +
>> + ida_init(&process->id_table);
>> }
>> kfd_unref_process(process);
>> @@ -1619,6 +1694,8 @@ static struct kfd_process *create_process(const
>> struct task_struct *thread, bool
>> err_process_pqm_init:
>> kfd_event_free_process(process);
>> err_event_init:
>> + kfd_process_free_id(process);
>> +err_alloc_id:
>> mutex_destroy(&process->mutex);
>> kfree(process);
>> err_alloc_process: