Introduce a per-drm_file eventfd manager to support render-node event subscriptions.
The manager is implemented in amdgpu_eventfd.[ch] and is owned by the drm_file (amdgpu_fpriv). It maps event_id -> eventfd_id object, where each eventfd_id can have multiple eventfds bound (fan-out). The design is IRQ-safe for signaling: IRQ path takes the xarray lock (irqsave) and signals eventfds while still holding the lock. This patch only adds the core manager and wires its lifetime into open/postclose. Cc: Harish Kasiviswanathan <[email protected]> Cc: Felix Kuehling <[email protected]> Cc: Alex Deucher <[email protected]> Suggested-by: Christian König <[email protected]> Signed-off-by: Srinivasan Shanmugam <[email protected]> Change-Id: Iac025dcb7c1b4f9ed74ac4190085e1925e2c8e68 --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c | 321 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h | 76 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 28 +- 5 files changed, 430 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 006d49d6b4af..30b1cf3c6cdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -67,7 +67,8 @@ amdgpu-y += amdgpu_device.o amdgpu_reg_access.o amdgpu_doorbell_mgr.o amdgpu_kms amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ - amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu_ip.o \ + amdgpu_eventfd.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 1e71a03c8bba..9e650b3707e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -44,6 +44,7 @@ #include <linux/hashtable.h> #include <linux/dma-fence.h> #include <linux/pci.h> +#include <linux/xarray.h> #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_placement.h> @@ -434,6 +435,8 @@ struct amdgpu_flip_work { bool async; }; +struct amdgpu_eventfd_mgr; + /* * file private structure */ @@ -453,6 +456,8 @@ struct amdgpu_fpriv { /** GPU partition selection */ uint32_t xcp_id; + + struct amdgpu_eventfd_mgr *eventfd_mgr; }; int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c new file mode 100644 index 000000000000..15ffb74c1de3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.c @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2026 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Render-node eventfd subscription infrastructure. + */ + +#include <linux/slab.h> +#include <linux/err.h> + +#include "amdgpu_eventfd.h" + +static void amdgpu_eventfd_mgr_release(struct kref *ref) +{ + struct amdgpu_eventfd_mgr *mgr = + container_of(ref, struct amdgpu_eventfd_mgr, refcount); + unsigned long index; + struct amdgpu_eventfd_id *id; + + /* Final teardown: remove all ids and drop all eventfd references. */ + spin_lock(&mgr->lock); + mgr->fd_closing = true; + spin_unlock(&mgr->lock); + + xa_lock(&mgr->ids); + xa_for_each(&mgr->ids, index, id) { + struct amdgpu_eventfd_entry *e; + struct hlist_node *tmp; + + __xa_erase(&mgr->ids, index); + + spin_lock(&id->lock); + hlist_for_each_entry_safe(e, tmp, &id->entries, hnode) { + hlist_del(&e->hnode); + id->n_entries--; + if (e->ctx) + eventfd_ctx_put(e->ctx); + kfree(e); + } + spin_unlock(&id->lock); + kfree(id); + } + xa_unlock(&mgr->ids); + + xa_destroy(&mgr->ids); + kfree(mgr); +} + +struct amdgpu_eventfd_mgr *amdgpu_eventfd_mgr_alloc(void) +{ + struct amdgpu_eventfd_mgr *mgr; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return NULL; + + kref_init(&mgr->refcount); + xa_init(&mgr->ids); + spin_lock_init(&mgr->lock); + mgr->bind_count = 0; + mgr->fd_closing = false; + + return mgr; +} + +void amdgpu_eventfd_mgr_get(struct amdgpu_eventfd_mgr *mgr) +{ + if (mgr) + kref_get(&mgr->refcount); +} + +void amdgpu_eventfd_mgr_put(struct amdgpu_eventfd_mgr *mgr) +{ + if (mgr) + kref_put(&mgr->refcount, amdgpu_eventfd_mgr_release); +} + +void amdgpu_eventfd_mgr_mark_closing(struct amdgpu_eventfd_mgr *mgr) +{ + if (!mgr) + return; + + spin_lock(&mgr->lock); + mgr->fd_closing = true; + spin_unlock(&mgr->lock); +} + +static struct amdgpu_eventfd_id *amdgpu_eventfd_id_alloc(u32 event_id) +{ + struct amdgpu_eventfd_id *id; + + id = kzalloc(sizeof(*id), GFP_KERNEL); + if (!id) + return NULL; + + id->event_id = event_id; + spin_lock_init(&id->lock); + INIT_HLIST_HEAD(&id->entries); + id->n_entries = 0; + return id; +} + +int amdgpu_eventfd_bind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int eventfd) +{ + struct amdgpu_eventfd_id *id, *new_id = NULL; + struct amdgpu_eventfd_entry *e; + struct eventfd_ctx *ctx; + unsigned long flags; + bool found = false; + + if (!mgr || !event_id || eventfd < 0) + return -EINVAL; + + /* If file is closing, refuse new binds. */ + spin_lock(&mgr->lock); + if (mgr->fd_closing) { + spin_unlock(&mgr->lock); + return -EBADF; + } + spin_unlock(&mgr->lock); + + ctx = eventfd_ctx_fdget(eventfd); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + eventfd_ctx_put(ctx); + return -ENOMEM; + } + e->ctx = ctx; + e->eventfd = eventfd; + + /* + * Prepare id object outside xa_lock_irqsave(): kzalloc(GFP_KERNEL) + * must not happen while holding spinlock/irqs-disabled. + */ + new_id = amdgpu_eventfd_id_alloc(event_id); + + /* + * IRQ-safe design: + * - protect id lookup + insertion under xarray lock (irqsave) + * - protect entry list under id->lock + */ + xa_lock_irqsave(&mgr->ids, flags); + + id = xa_load(&mgr->ids, event_id); + if (!id) { + if (!new_id) { + xa_unlock_irqrestore(&mgr->ids, flags); + eventfd_ctx_put(ctx); + kfree(e); + return -ENOMEM; + } + if (xa_err(xa_store(&mgr->ids, event_id, new_id, GFP_NOWAIT))) { + xa_unlock_irqrestore(&mgr->ids, flags); + kfree(new_id); + eventfd_ctx_put(ctx); + kfree(e); + return -ENOMEM; + } + id = new_id; + new_id = NULL; + } + + /* Enforce total bind limit. */ + spin_lock(&mgr->lock); + if (mgr->bind_count >= AMDGPU_EVENTFD_MAX_BINDS) { + spin_unlock(&mgr->lock); + xa_unlock_irqrestore(&mgr->ids, flags); + kfree(new_id); + eventfd_ctx_put(ctx); + kfree(e); + return -ENOSPC; + } + spin_unlock(&mgr->lock); + + spin_lock(&id->lock); + { + struct amdgpu_eventfd_entry *it; + + hlist_for_each_entry(it, &id->entries, hnode) { + if (it->eventfd == eventfd) { + found = true; + break; + } + } + if (!found) { + hlist_add_head(&e->hnode, &id->entries); + id->n_entries++; + } + } + spin_unlock(&id->lock); + + if (!found) { + spin_lock(&mgr->lock); + mgr->bind_count++; + spin_unlock(&mgr->lock); + } + + xa_unlock_irqrestore(&mgr->ids, flags); + + /* If event_id already existed, drop unused allocation. */ + kfree(new_id); + + /* If already bound, keep existing and drop the new reference. */ + if (found) { + eventfd_ctx_put(ctx); + kfree(e); + } + return 0; +} + +int amdgpu_eventfd_unbind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int eventfd) +{ + struct amdgpu_eventfd_id *id; + struct amdgpu_eventfd_entry *e; + struct hlist_node *tmp; + unsigned long flags; + bool removed = false; + + if (!mgr || !event_id || eventfd < 0) + return -EINVAL; + + xa_lock_irqsave(&mgr->ids, flags); + id = xa_load(&mgr->ids, event_id); + if (!id) { + xa_unlock_irqrestore(&mgr->ids, flags); + return -ENOENT; + } + + spin_lock(&id->lock); + hlist_for_each_entry_safe(e, tmp, &id->entries, hnode) { + if (e->eventfd == eventfd) { + hlist_del(&e->hnode); + id->n_entries--; + removed = true; + if (e->ctx) + eventfd_ctx_put(e->ctx); + kfree(e); + break; + } + } + spin_unlock(&id->lock); + + if (removed) { + spin_lock(&mgr->lock); + if (mgr->bind_count) + mgr->bind_count--; + spin_unlock(&mgr->lock); + } + + /* If no more eventfds bound to this event_id, remove id object. */ + if (removed && id->n_entries == 0) { + (void)xa_erase(&mgr->ids, event_id); + kfree(id); + } + + xa_unlock_irqrestore(&mgr->ids, flags); + + return removed ? 0 : -ENOENT; +} + +void amdgpu_eventfd_signal(struct amdgpu_eventfd_mgr *mgr, u32 event_id, u64 count) +{ + struct amdgpu_eventfd_id *id; + struct amdgpu_eventfd_entry *e; + unsigned long flags; + bool closing; + + if (!mgr || !event_id || !count) + return; + + /* Fast closing check (best-effort). */ + spin_lock(&mgr->lock); + closing = mgr->fd_closing; + spin_unlock(&mgr->lock); + if (closing) + return; + + /* + * IRQ path: keep xarray locked while signaling, as suggested. + * eventfd_signal() is IRQ-safe. + */ + xa_lock_irqsave(&mgr->ids, flags); + id = xa_load(&mgr->ids, event_id); + if (!id) { + xa_unlock_irqrestore(&mgr->ids, flags); + return; + } + + spin_lock(&id->lock); + hlist_for_each_entry(e, &id->entries, hnode) { + if (e->ctx) + eventfd_signal(e->ctx); + } + spin_unlock(&id->lock); + + xa_unlock_irqrestore(&mgr->ids, flags); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h new file mode 100644 index 000000000000..542a1d65e13b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eventfd.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2026 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * Render-node eventfd subscription infrastructure. + */ + +#ifndef __AMDGPU_EVENTFD_H__ +#define __AMDGPU_EVENTFD_H__ + +#include <linux/kref.h> +#include <linux/spinlock.h> +#include <linux/xarray.h> +#include <linux/eventfd.h> + +/* + * Manager lifetime: + * - owned by drm_file (amdgpu_fpriv) + * - can also be referenced by longer-lived producer objects (e.g. USERQ fence + * driver) via kref. + */ +struct amdgpu_eventfd_mgr { + struct kref refcount; + struct xarray ids; /* key: event_id(u32) -> struct amdgpu_eventfd_id* */ + spinlock_t lock; /* protects ids + counts */ + u32 bind_count; + bool fd_closing; +}; + +struct amdgpu_eventfd_entry { + struct hlist_node hnode; + struct eventfd_ctx *ctx; + int eventfd; +}; + +struct amdgpu_eventfd_id { + u32 event_id; + spinlock_t lock; /* protects entries */ + struct hlist_head entries; /* struct amdgpu_eventfd_entry */ + u32 n_entries; +}; + +/* Per-file cap to align with common KFD-style event-id space and avoid abuse */ +#define AMDGPU_EVENTFD_MAX_BINDS 4096U + +struct amdgpu_eventfd_mgr *amdgpu_eventfd_mgr_alloc(void); +void amdgpu_eventfd_mgr_get(struct amdgpu_eventfd_mgr *mgr); +void amdgpu_eventfd_mgr_put(struct amdgpu_eventfd_mgr *mgr); +void amdgpu_eventfd_mgr_mark_closing(struct amdgpu_eventfd_mgr *mgr); + +int amdgpu_eventfd_bind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int eventfd); +int amdgpu_eventfd_unbind(struct amdgpu_eventfd_mgr *mgr, u32 event_id, int eventfd); +void amdgpu_eventfd_signal(struct amdgpu_eventfd_mgr *mgr, u32 event_id, u64 count); + +#endif /* __AMDGPU_EVENTFD_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index f69332eed051..8ab8f9dc4cfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -42,6 +42,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_gem.h" #include "amdgpu_display.h" +#include "amdgpu_eventfd.h" #include "amdgpu_ras.h" #include "amdgpu_reset.h" #include "amd_pcie.h" @@ -1469,10 +1470,17 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } + fpriv->eventfd_mgr = amdgpu_eventfd_mgr_alloc(); + if (!fpriv->eventfd_mgr) { + r = -ENOMEM; + goto free_fpriv; + } + pasid = amdgpu_pasid_alloc(16); if (pasid < 0) { dev_warn(adev->dev, "No more PASIDs available!"); pasid = 0; + goto error_pasid; } r = amdgpu_xcp_open_device(adev, fpriv, file_priv); @@ -1538,6 +1546,12 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (pasid) amdgpu_pasid_free(pasid); +free_fpriv: + if (fpriv && fpriv->eventfd_mgr) { + amdgpu_eventfd_mgr_put(fpriv->eventfd_mgr); + fpriv->eventfd_mgr = NULL; + } + kfree(fpriv); out_suspend: @@ -1568,6 +1582,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (!fpriv) return; + amdgpu_eventfd_mgr_mark_closing(fpriv->eventfd_mgr); pm_runtime_get_sync(dev->dev); if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL) @@ -1605,10 +1620,19 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, idr_destroy(&fpriv->bo_list_handles); mutex_destroy(&fpriv->bo_list_lock); + pm_runtime_put_autosuspend(dev->dev); + + /* + * Drop the eventfd manager BEFORE freeing fpriv. + * (mgr can be refcounted by longer-lived producers) + */ + if (fpriv->eventfd_mgr) { + amdgpu_eventfd_mgr_put(fpriv->eventfd_mgr); + fpriv->eventfd_mgr = NULL; + } + kfree(fpriv); file_priv->driver_priv = NULL; - - pm_runtime_put_autosuspend(dev->dev); } -- 2.34.1
