Problems with lib rte_malloc:
 1. Rte_malloc searches a heap's entire free list looking for
    the best fit, resulting in linear complexity.
 2. Heaps store free blocks in a singly-linked list, resulting
    in linear complexity when rte_free needs to remove an
    adjacent block.
 3. The library inserts and removes free blocks with ad hoc,
    in-line code, rather than using linked-list functions or
    macros.

This patch addresses those problems as follows:
 1. Replace single free list with a handful of free lists.
    Each free list contains blocks of a specified size range,
    for example:
      list[0]: (0   , 2^7]
      list[1]: (2^7 , 2^9]
      list[2]: (2^9 , 2^11]
      list[3]: (2^11, 2^13]
      list[4]: (2^13, MAX_SIZE]

    When allocating a block, start at the first list that can
    contain a big enough block. Search subsequent lists, if
    necessary. Terminate the search as soon as we find a block
    that is big enough.
 2. Use doubly-linked lists, so that we can remove free blocks
    in constant time.
 3. Use BSD LIST macros, as defined in sys/queue.h and the
    QUEUE(3) man page.

Signed-off-by: Robert Sanford <rsanford2 at gmail.com>
---
 lib/librte_eal/common/include/rte_malloc_heap.h |    6 +-
 lib/librte_malloc/malloc_elem.c                 |  121 +++++++++++++++--------
 lib/librte_malloc/malloc_elem.h                 |   17 +++-
 lib/librte_malloc/malloc_heap.c                 |   67 ++++++-------
 4 files changed, 128 insertions(+), 83 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h 
b/lib/librte_eal/common/include/rte_malloc_heap.h
index 5e139cf..1f5d653 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -35,14 +35,18 @@
 #define _RTE_MALLOC_HEAP_H_

 #include <stddef.h>
+#include <sys/queue.h>
 #include <rte_spinlock.h>

+/* Number of free lists per heap, grouped by size. */
+#define RTE_HEAP_NUM_FREELISTS  5
+
 /**
  * Structure to hold malloc heap
  */
 struct malloc_heap {
        rte_spinlock_t lock;
-       struct malloc_elem * volatile free_head;
+       LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
        unsigned mz_count;
        unsigned alloc_count;
        size_t total_size;
diff --git a/lib/librte_malloc/malloc_elem.c b/lib/librte_malloc/malloc_elem.c
index f0da640..13cd5d3 100644
--- a/lib/librte_malloc/malloc_elem.c
+++ b/lib/librte_malloc/malloc_elem.c
@@ -33,6 +33,7 @@
 #include <stdint.h>
 #include <stddef.h>
 #include <stdio.h>
+#include <string.h>
 #include <sys/queue.h>

 #include <rte_memory.h>
@@ -60,7 +61,8 @@ malloc_elem_init(struct malloc_elem *elem,
 {
        elem->heap = heap;
        elem->mz = mz;
-       elem->prev = elem->next_free = NULL;
+       elem->prev = NULL;
+       memset(&elem->free_list, 0, sizeof(elem->free_list));
        elem->state = ELEM_FREE;
        elem->size = size;
        elem->pad = 0;
@@ -125,14 +127,71 @@ split_elem(struct malloc_elem *elem, struct malloc_elem 
*split_pt)
 }

 /*
+ * Given an element size, compute its freelist index.
+ * We free an element into the freelist containing similarly-sized elements.
+ * We try to allocate elements starting with the freelist containing
+ * similarly-sized elements, and if necessary, we search freelists
+ * containing larger elements.
+ *
+ * Example element size ranges for a heap with five free lists:
+ *   heap->free_head[0] - (0   , 2^7]
+ *   heap->free_head[1] - (2^7 , 2^9]
+ *   heap->free_head[2] - (2^9 , 2^11]
+ *   heap->free_head[3] - (2^11, 2^13]
+ *   heap->free_head[4] - (2^13, MAX_SIZE]
+ */
+size_t
+malloc_elem_free_list_index(size_t size)
+{
+#define MALLOC_MINSIZE_LOG2   7
+#define MALLOC_LOG2_INCREMENT 2
+
+       size_t log2;
+       size_t index;
+
+       if (size <= (1UL << MALLOC_MINSIZE_LOG2))
+               return 0;
+
+       /* Find next power of 2 >= size. */
+       log2 = sizeof(size) * 8 - __builtin_clzl(size-1);
+
+       /* Compute freelist index, based on log2(size). */
+       index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) /
+               MALLOC_LOG2_INCREMENT;
+
+       return (index <= RTE_HEAP_NUM_FREELISTS-1?
+               index: RTE_HEAP_NUM_FREELISTS-1);
+}
+
+/*
+ * Add the specified element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem)
+{
+       size_t idx = malloc_elem_free_list_index(elem->size - 
MALLOC_ELEM_HEADER_LEN);
+
+       elem->state = ELEM_FREE;
+       LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list);
+}
+
+/*
+ * Remove the specified element from its heap's free list.
+ */
+static void
+elem_free_list_remove(struct malloc_elem *elem)
+{
+       LIST_REMOVE(elem, free_list);
+}
+
+/*
  * reserve a block of data in an existing malloc_elem. If the malloc_elem
  * is much larger than the data block requested, we split the element in two.
  * This function is only called from malloc_heap_alloc so parameter checking
  * is not done here, as it's done there previously.
  */
 struct malloc_elem *
-malloc_elem_alloc(struct malloc_elem *elem, size_t size,
-               unsigned align, struct malloc_elem *prev_free)
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align)
 {
        struct malloc_elem *new_elem = elem_start_pt(elem, size, align);
        const unsigned old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
@@ -151,20 +210,20 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size,
                        set_header(new_elem);
                }
                /* remove element from free list */
-               if (prev_free == NULL)
-                       elem->heap->free_head = elem->next_free;
-               else
-                       prev_free->next_free = elem->next_free;
+               elem_free_list_remove(elem);

                return new_elem;
        }

        /* we are going to split the element in two. The original element
-        * remains free, and the new element is the one allocated, so no free 
list
-        * changes need to be made.
+        * remains free, and the new element is the one allocated.
+        * Re-insert original element, in case its new size makes it
+        * belong on a different list.
         */
+       elem_free_list_remove(elem);
        split_elem(elem, new_elem);
        new_elem->state = ELEM_BUSY;
+       malloc_elem_free_list_insert(elem);

        return new_elem;
 }
@@ -182,25 +241,6 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem 
*elem2)
 }

 /*
- * scan the free list, and remove the request element from that
- * free list. (Free list to scan is got from heap pointer in element)
- */
-static inline void
-remove_from_free_list(struct malloc_elem *elem)
-{
-       if (elem == elem->heap->free_head)
-               elem->heap->free_head = elem->next_free;
-       else{
-               struct malloc_elem *prev_free = elem->heap->free_head;
-               while (prev_free && prev_free->next_free != elem)
-                       prev_free = prev_free->next_free;
-               if (!prev_free)
-                       rte_panic("Corrupted free list\n");
-               prev_free->next_free = elem->next_free;
-       }
-}
-
-/*
  * free a malloc_elem block by adding it to the free list. If the
  * blocks either immediately before or immediately after newly freed block
  * are also free, the blocks are merged together.
@@ -214,21 +254,22 @@ malloc_elem_free(struct malloc_elem *elem)
        rte_spinlock_lock(&(elem->heap->lock));
        struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
        if (next->state == ELEM_FREE){
-               /* join to this one, and remove from free list */
+               /* remove from free list, join to this one */
+               elem_free_list_remove(next);
                join_elem(elem, next);
-               remove_from_free_list(next);
        }

        /* check if previous element is free, if so join with it and return,
-        * no need to update free list, as that element is already there
+        * need to re-insert in free list, as that element's size is changing
         */
-       if (elem->prev != NULL && elem->prev->state == ELEM_FREE)
+       if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
+               elem_free_list_remove(elem->prev);
                join_elem(elem->prev, elem);
+               malloc_elem_free_list_insert(elem->prev);
+       }
        /* otherwise add ourselves to the free list */
        else {
-               elem->next_free = elem->heap->free_head;
-               elem->heap->free_head = elem;
-               elem->state = ELEM_FREE;
+               malloc_elem_free_list_insert(elem);
                elem->pad = 0;
        }
        /* decrease heap's count of allocated elements */
@@ -258,20 +299,18 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
        if (current_size + next->size < new_size)
                goto err_return;

-       /* we now know the element fits, so join the two, then remove from free
-        * list
+       /* we now know the element fits, so remove from free list,
+        * join the two
         */
+       elem_free_list_remove(next);
        join_elem(elem, next);
-       remove_from_free_list(next);

        if (elem->size - new_size > MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){
                /* now we have a big block together. Lets cut it down a bit, by 
splitting */
                struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size);
                split_pt = RTE_PTR_ALIGN_CEIL(split_pt, CACHE_LINE_SIZE);
                split_elem(elem, split_pt);
-               split_pt->state = ELEM_FREE;
-               split_pt->next_free = elem->heap->free_head;
-               elem->heap->free_head = split_pt;
+               malloc_elem_free_list_insert(split_pt);
        }
        rte_spinlock_unlock(&elem->heap->lock);
        return 0;
diff --git a/lib/librte_malloc/malloc_elem.h b/lib/librte_malloc/malloc_elem.h
index eadecf9..63c69e1 100644
--- a/lib/librte_malloc/malloc_elem.h
+++ b/lib/librte_malloc/malloc_elem.h
@@ -46,7 +46,7 @@ enum elem_state {
 struct malloc_elem {
        struct malloc_heap *heap;
        struct malloc_elem *volatile prev;      /* points to prev elem in 
memzone */
-       struct malloc_elem *volatile next_free; /* to make list of free 
elements */
+       LIST_ENTRY(malloc_elem) free_list;      /* list of free elements in 
heap */
        const struct rte_memzone *mz;
        volatile enum elem_state state;
        uint32_t pad;
@@ -156,8 +156,7 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size, 
unsigned align);
  * is much larger than the data block requested, we split the element in two.
  */
 struct malloc_elem *
-malloc_elem_alloc(struct malloc_elem *elem, size_t size,
-               unsigned align, struct malloc_elem *prev_free);
+malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align);

 /*
  * free a malloc_elem block by adding it to the free list. If the
@@ -174,4 +173,16 @@ malloc_elem_free(struct malloc_elem *elem);
 int
 malloc_elem_resize(struct malloc_elem *elem, size_t size);

+/*
+ * Given an element size, compute its freelist index.
+ */
+size_t
+malloc_elem_free_list_index(size_t size);
+
+/*
+ * Add element to its heap's free list.
+ */
+void
+malloc_elem_free_list_insert(struct malloc_elem *elem);
+
 #endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_malloc/malloc_heap.c b/lib/librte_malloc/malloc_heap.c
index 882749c..cfc02f1 100644
--- a/lib/librte_malloc/malloc_heap.c
+++ b/lib/librte_malloc/malloc_heap.c
@@ -114,9 +114,8 @@ malloc_heap_add_memzone(struct malloc_heap *heap, size_t 
size, unsigned align)
        const unsigned elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
        malloc_elem_init(start_elem, heap, mz, elem_size);
        malloc_elem_mkend(end_elem, start_elem);
+       malloc_elem_free_list_insert(start_elem);

-       start_elem->next_free = heap->free_head;
-       heap->free_head = start_elem;
        /* increase heap total size by size of new memzone */
        heap->total_size+=mz_size - MALLOC_ELEM_OVERHEAD;
        return 0;
@@ -125,35 +124,25 @@ malloc_heap_add_memzone(struct malloc_heap *heap, size_t 
size, unsigned align)
 /*
  * Iterates through the freelist for a heap to find a free element
  * which can store data of the required size and with the requested alignment.
- * Returns null on failure, or pointer to element on success, with the pointer
- * to the previous element in the list, if any, being returned in a parameter
- * (to make removing the element from the free list faster).
+ * Returns null on failure, or pointer to element on success.
  */
 static struct malloc_elem *
-find_suitable_element(struct malloc_heap *heap, size_t size,
-               unsigned align, struct malloc_elem **prev)
+find_suitable_element(struct malloc_heap *heap, size_t size, unsigned align)
 {
-       struct malloc_elem *elem, *min_elem, *min_prev;
-       size_t min_sz;
-
-       elem = heap->free_head;
-       min_elem = NULL;
-       min_prev = NULL;
-       min_sz = (size_t) SIZE_MAX;
-       *prev = NULL;
-
-       while(elem){
-               if (malloc_elem_can_hold(elem, size, align)) {
-                       if (min_sz > elem->size) {
-                               min_elem = elem;
-                               *prev = min_prev;
-                               min_sz = elem->size;
-                       }
+       size_t idx;
+       struct malloc_elem *elem;
+
+       for (idx = malloc_elem_free_list_index(size);
+               idx < RTE_HEAP_NUM_FREELISTS; idx++)
+       {
+               for (elem = LIST_FIRST(&heap->free_head[idx]);
+                       !!elem; elem = LIST_NEXT(elem, free_list))
+               {
+                       if (malloc_elem_can_hold(elem, size, align))
+                               return elem;
                }
-               min_prev = elem;
-               elem = elem->next_free;
        }
-       return (min_elem);
+       return NULL;
 }

 /*
@@ -169,15 +158,14 @@ malloc_heap_alloc(struct malloc_heap *heap,
        size = CACHE_LINE_ROUNDUP(size);
        align = CACHE_LINE_ROUNDUP(align);
        rte_spinlock_lock(&heap->lock);
-       struct malloc_elem *prev, *elem = find_suitable_element(heap,
-                       size, align, &prev);
+       struct malloc_elem *elem = find_suitable_element(heap, size, align);
        if (elem == NULL){
                if ((malloc_heap_add_memzone(heap, size, align)) == 0)
-                       elem = find_suitable_element(heap, size, align, &prev);
+                       elem = find_suitable_element(heap, size, align);
        }

        if (elem != NULL){
-               elem = malloc_elem_alloc(elem, size, align, prev);
+               elem = malloc_elem_alloc(elem, size, align);
                /* increase heap's count of allocated elements */
                heap->alloc_count++;
        }
@@ -193,7 +181,8 @@ int
 malloc_heap_get_stats(const struct malloc_heap *heap,
                struct rte_malloc_socket_stats *socket_stats)
 {
-       struct malloc_elem *elem = heap->free_head;
+       size_t idx;
+       struct malloc_elem *elem;

        /* Initialise variables for heap */
        socket_stats->free_count = 0;
@@ -201,13 +190,15 @@ malloc_heap_get_stats(const struct malloc_heap *heap,
        socket_stats->greatest_free_size = 0;

        /* Iterate through free list */
-       while(elem) {
-               socket_stats->free_count++;
-               socket_stats->heap_freesz_bytes += elem->size;
-               if (elem->size > socket_stats->greatest_free_size)
-                       socket_stats->greatest_free_size = elem->size;
-
-               elem = elem->next_free;
+       for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+               for (elem = LIST_FIRST(&heap->free_head[idx]);
+                       !!elem; elem = LIST_NEXT(elem, free_list))
+               {
+                       socket_stats->free_count++;
+                       socket_stats->heap_freesz_bytes += elem->size;
+                       if (elem->size > socket_stats->greatest_free_size)
+                               socket_stats->greatest_free_size = elem->size;
+               }
        }
        /* Get stats on overall heap and allocated memory on this heap */
        socket_stats->heap_totalsz_bytes = heap->total_size;
-- 
1.7.1

Reply via email to