Signal slots are identical to event IDs.

Replace the used_slot_bitmap and events hash table with an IDR to
allocate and lookup event IDs and signal slots more efficiently.

Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 230 ++++++++++----------------------
 drivers/gpu/drm/amd/amdkfd/kfd_events.h |  14 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   6 +-
 3 files changed, 80 insertions(+), 170 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index f800e48..cddd4b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -41,24 +41,16 @@ struct kfd_event_waiter {
        bool activated;          /* Becomes true when event is signaled */
 };
 
-#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
-#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)
-
 /*
- * Over-complicated pooled allocator for event notification slots.
- *
  * Each signal event needs a 64-bit signal slot where the signaler will write
- * a 1 before sending an interrupt.l (This is needed because some interrupts
+ * a 1 before sending an interrupt. (This is needed because some interrupts
  * do not contain enough spare data bits to identify an event.)
- * We get whole pages from vmalloc and map them to the process VA.
- * Individual signal events are then allocated a slot in a page.
+ * We get whole pages and map them to the process VA.
+ * Individual signal events use their event_id as slot index.
  */
-
 struct kfd_signal_page {
        uint64_t *kernel_address;
        uint64_t __user *user_address;
-       unsigned int free_slots;
-       unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
 };
 
 /*
@@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page)
        return page->kernel_address;
 }
 
-static bool allocate_free_slot(struct kfd_process *process,
-                              unsigned int *out_slot_index)
-{
-       struct kfd_signal_page *page = process->signal_page;
-       unsigned int slot;
-
-       if (!page || page->free_slots == 0) {
-               pr_debug("No free event signal slots were found for process 
%p\n",
-                        process);
-
-               return false;
-       }
-
-       slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
-
-       __set_bit(slot, page->used_slot_bitmap);
-       page->free_slots--;
-
-       page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
-
-       *out_slot_index = slot;
-
-       pr_debug("Allocated event signal slot in page %p, slot %d\n",
-                page, slot);
-
-       return true;
-}
-
 static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 {
        void *backing_store;
@@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct 
kfd_process *p)
        if (!page)
                return NULL;
 
-       page->free_slots = SLOTS_PER_PAGE;
-
        backing_store = (void *) __get_free_pages(GFP_KERNEL,
                                        get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
        if (!backing_store)
@@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct 
kfd_process *p)
        return NULL;
 }
 
-static bool allocate_event_notification_slot(struct kfd_process *p,
-                                            unsigned int *signal_slot_index)
+static int allocate_event_notification_slot(struct kfd_process *p,
+                                           struct kfd_event *ev)
 {
+       int id;
+
        if (!p->signal_page) {
                p->signal_page = allocate_signal_page(p);
                if (!p->signal_page)
-                       return false;
+                       return -ENOMEM;
        }
 
-       return allocate_free_slot(p, signal_slot_index);
-}
+       id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT,
+                      GFP_KERNEL);
+       if (id < 0)
+               return id;
 
-/* Assumes that the process's event_mutex is locked. */
-static void release_event_notification_slot(struct kfd_signal_page *page,
-                                               size_t slot_index)
-{
-       __clear_bit(slot_index, page->used_slot_bitmap);
-       page->free_slots++;
+       ev->event_id = id;
+       page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
 
-       /* We don't free signal pages, they are retained by the process
-        * and reused until it exits.
-        */
+       return 0;
 }
 
 /*
@@ -162,89 +122,32 @@ static void release_event_notification_slot(struct 
kfd_signal_page *page,
  */
 static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
 {
-       struct kfd_event *ev;
-
-       hash_for_each_possible(p->events, ev, events, id)
-               if (ev->event_id == id)
-                       return ev;
-
-       return NULL;
-}
-
-/*
- * Produce a kfd event id for a nonsignal event.
- * These are arbitrary numbers, so we do a sequential search through
- * the hash table for an unused number.
- */
-static u32 make_nonsignal_event_id(struct kfd_process *p)
-{
-       u32 id;
-
-       for (id = p->next_nonsignal_event_id;
-               id < KFD_LAST_NONSIGNAL_EVENT_ID &&
-               lookup_event_by_id(p, id);
-               id++)
-               ;
-
-       if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
-
-               /*
-                * What if id == LAST_NONSIGNAL_EVENT_ID - 1?
-                * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
-                * the first loop fails immediately and we proceed with the
-                * wraparound loop below.
-                */
-               p->next_nonsignal_event_id = id + 1;
-
-               return id;
-       }
-
-       for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
-               id < KFD_LAST_NONSIGNAL_EVENT_ID &&
-               lookup_event_by_id(p, id);
-               id++)
-               ;
-
-
-       if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
-               p->next_nonsignal_event_id = id + 1;
-               return id;
-       }
-
-       p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
-       return 0;
-}
-
-static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
-                                               unsigned int signal_slot)
-{
-       return lookup_event_by_id(p, signal_slot);
+       return idr_find(&p->event_idr, id);
 }
 
 static int create_signal_event(struct file *devkfd,
                                struct kfd_process *p,
                                struct kfd_event *ev)
 {
+       int ret;
+
        if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
                if (!p->signal_event_limit_reached) {
                        pr_warn("Signal event wasn't created because limit was 
reached\n");
                        p->signal_event_limit_reached = true;
                }
-               return -ENOMEM;
+               return -ENOSPC;
        }
 
-       if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) {
+       ret = allocate_event_notification_slot(p, ev);
+       if (ret) {
                pr_warn("Signal event wasn't created because out of kernel 
memory\n");
-               return -ENOMEM;
+               return ret;
        }
 
        p->signal_event_count++;
 
-       ev->user_signal_address =
-                       &p->signal_page->user_address[ev->signal_slot_index];
-
-       ev->event_id = ev->signal_slot_index;
-
+       ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
        pr_debug("Signal event number %zu created with id %d, address %p\n",
                        p->signal_event_count, ev->event_id,
                        ev->user_signal_address);
@@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd,
        return 0;
 }
 
-/*
- * No non-signal events are supported yet.
- * We create them as events that never signal.
- * Set event calls from user-mode are failed.
- */
 static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
 {
-       ev->event_id = make_nonsignal_event_id(p);
-       if (ev->event_id == 0)
-               return -ENOMEM;
+       /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+        * intentional integer overflow to -1 without a compiler
+        * warning. idr_alloc treats a negative value as "maximum
+        * signed integer".
+        */
+       int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+                          (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+                          GFP_KERNEL);
+
+       if (id < 0)
+               return id;
+       ev->event_id = id;
 
        return 0;
 }
@@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct 
kfd_event *ev)
 void kfd_event_init_process(struct kfd_process *p)
 {
        mutex_init(&p->event_mutex);
-       hash_init(p->events);
+       idr_init(&p->event_idr);
        p->signal_page = NULL;
-       p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
        p->signal_event_count = 0;
 }
 
@@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct 
kfd_event *ev)
                waiter->event = NULL;
        wake_up_all(&ev->wq);
 
-       if ((ev->type == KFD_EVENT_TYPE_SIGNAL ||
-            ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) {
-               release_event_notification_slot(p->signal_page,
-                                               ev->signal_slot_index);
+       if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
+           ev->type == KFD_EVENT_TYPE_DEBUG)
                p->signal_event_count--;
-       }
 
-       hash_del(&ev->events);
+       idr_remove(&p->event_idr, ev->event_id);
        kfree(ev);
 }
 
 static void destroy_events(struct kfd_process *p)
 {
        struct kfd_event *ev;
-       struct hlist_node *tmp;
-       unsigned int hash_bkt;
+       uint32_t id;
 
-       hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
+       idr_for_each_entry(&p->event_idr, ev, id)
                destroy_event(p, ev);
+       idr_destroy(&p->event_idr);
 }
 
 /*
@@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
                if (!ret) {
                        *event_page_offset = KFD_MMAP_EVENTS_MASK;
                        *event_page_offset <<= PAGE_SHIFT;
-                       *event_slot_index = ev->signal_slot_index;
+                       *event_slot_index = ev->event_id;
                }
                break;
        default:
@@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct 
kfd_process *p,
        }
 
        if (!ret) {
-               hash_add(p->events, &ev->events, ev->event_id);
-
                *event_id = ev->event_id;
                *event_trigger_data = ev->event_id;
        } else {
@@ -465,17 +366,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t 
event_id)
 
 static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
 {
-       page_slots(p->signal_page)[ev->signal_slot_index] =
-                                               UNSIGNALED_EVENT_SLOT;
-}
-
-static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
-{
-       if (!p->signal_page)
-               return false;
-       else
-               return page_slots(p->signal_page)[index] !=
-                       UNSIGNALED_EVENT_SLOT;
+       page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
 }
 
 static void set_event_from_interrupt(struct kfd_process *p,
@@ -514,13 +405,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, 
uint32_t partial_id,
                 * ignore it, but we could use any bits we did receive to
                 * search faster.
                 */
-               unsigned int i;
+               uint64_t *slots = page_slots(p->signal_page);
+               uint32_t id;
+
+               if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+                       /* With relatively few events, it's faster to
+                        * iterate over the event IDR
+                        */
+                       idr_for_each_entry(&p->event_idr, ev, id) {
+                               if (id >= KFD_SIGNAL_EVENT_LIMIT)
+                                       break;
 
-               for (i = 0; i < SLOTS_PER_PAGE; i++)
-                       if (is_slot_signaled(p, i)) {
-                               ev = lookup_event_by_page_slot(p, i);
-                               set_event_from_interrupt(p, ev);
+                               if (slots[id] != UNSIGNALED_EVENT_SLOT)
+                                       set_event_from_interrupt(p, ev);
                        }
+               } else {
+                       /* With relatively many events, it's faster to
+                        * iterate over the signal slots and lookup
+                        * only signaled events from the IDR.
+                        */
+                       for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+                               if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+                                       ev = lookup_event_by_id(p, id);
+                                       set_event_from_interrupt(p, ev);
+                               }
+               }
        }
 
        mutex_unlock(&p->event_mutex);
@@ -832,12 +741,13 @@ static void lookup_events_by_type_and_signal(struct 
kfd_process *p,
 {
        struct kfd_hsa_memory_exception_data *ev_data;
        struct kfd_event *ev;
-       int bkt;
+       uint32_t id;
        bool send_signal = true;
 
        ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
 
-       hash_for_each(p->events, bkt, ev, events)
+       id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+       idr_for_each_entry_continue(&p->event_idr, ev, id)
                if (ev->type == type) {
                        send_signal = false;
                        dev_dbg(kfd_device,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index f85fcee..abca5bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -31,9 +31,13 @@
 #include "kfd_priv.h"
 #include <uapi/linux/kfd_ioctl.h>
 
-#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
-#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
-#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
+/*
+ * IDR supports non-negative integer IDs. Small IDs are used for
+ * signal events to match their signal slot. Use the upper half of the
+ * ID space for non-signal events.
+ */
+#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
+#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
 
 /*
  * Written into kfd_signal_slot_t to indicate that the event is not signaled.
@@ -47,9 +51,6 @@ struct kfd_event_waiter;
 struct signal_page;
 
 struct kfd_event {
-       /* All events in process, rooted at kfd_process.events. */
-       struct hlist_node events;
-
        u32 event_id;
 
        bool signaled;
@@ -60,7 +61,6 @@ struct kfd_event {
        wait_queue_head_t wq; /* List of event waiters. */
 
        /* Only for signal events. */
-       unsigned int signal_slot_index;
        uint64_t __user *user_signal_address;
 
        /* type specific data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c1b3ee2..ebae8e1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/kfd_ioctl.h>
+#include <linux/idr.h>
 #include <kgd_kfd_interface.h>
 
 #include "amd_shared.h"
@@ -538,11 +539,10 @@ struct kfd_process {
 
        /* Event-related data */
        struct mutex event_mutex;
-       /* All events in process hashed by ID, linked on kfd_event.events. */
-       DECLARE_HASHTABLE(events, 4);
+       /* Event ID allocator and lookup */
+       struct idr event_idr;
        /* Event page */
        struct kfd_signal_page *signal_page;
-       u32 next_nonsignal_event_id;
        size_t signal_event_count;
        bool signal_event_limit_reached;
 };
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to