we just record the cached_hole_size now, which will be used when
the criteria meet both of 'free_vmap_cache == NULL' and 'size <
cached_hole_size'. However, under above scenario, the search will
start from the rb_root and then find the node which just in front
of the cached hole.

free_vmap_cache miss:
      vmap_area_root
          /      \
       _next     U
        /  (T1)
 cached_hole_node
       /
     ...   (T2)
      /
    first

vmap_area_list->first->......->cached_hole_node->cached_hole_node.list.next
                  |-------(T3)-------| | <<< cached_hole_size >>> |

vmap_area_list->......->cached_hole_node->cached_hole_node.list.next
                               | <<< cached_hole_size >>> |

The time cost to search the node now is T = T1 + T2 + T3.
The commit add a cached_hole_node here to record the one just in front of
the cached_hole_size, which can help to avoid walking the rb tree and
the list and make the T = 0;

Signed-off-by: Zhaoyang Huang <[email protected]>
---
 mm/vmalloc.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8698c1c..4e76e7f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -336,6 +336,7 @@ unsigned long vmalloc_to_pfn(const void *vmalloc_addr)
 
 /* The vmap cache globals are protected by vmap_area_lock */
 static struct rb_node *free_vmap_cache;
+static struct vmap_area *cached_hole_node;
 static unsigned long cached_hole_size;
 static unsigned long cached_vstart;
 static unsigned long cached_align;
@@ -444,6 +445,12 @@ static struct vmap_area *alloc_vmap_area(unsigned long 
size,
                        size < cached_hole_size ||
                        vstart < cached_vstart ||
                        align < cached_align) {
+       /*if we have a cached node, just use it*/
+       if ((size < cached_hole_size) && cached_hole_node != NULL) {
+               addr = ALIGN(cached_hole_node->va_end, align);
+               cached_hole_node = NULL;
+               goto found;
+       }
 nocache:
                cached_hole_size = 0;
                free_vmap_cache = NULL;
@@ -487,8 +494,13 @@ static struct vmap_area *alloc_vmap_area(unsigned long 
size,
 
        /* from the starting point, walk areas until a suitable hole is found */
        while (addr + size > first->va_start && addr + size <= vend) {
-               if (addr + cached_hole_size < first->va_start)
+               if (addr + cached_hole_size < first->va_start) {
                        cached_hole_size = first->va_start - addr;
+                       /*record the node corresponding to the hole*/
+                       cached_hole_node = (first->list.prev ==
+                                           &vmap_area_list) ?
+                                           NULL : list_prev_entry(first, list);
+               }
                addr = ALIGN(first->va_end, align);
                if (addr + size < addr)
                        goto overflow;
@@ -571,10 +583,17 @@ static void __free_vmap_area(struct vmap_area *va)
                        }
                }
        }
+       if (va == cached_hole_node) {
+               /*cached node is freed, the hole get bigger*/
+               if (cached_hole_node->list.prev != &vmap_area_list)
+                       cached_hole_node = list_prev_entry(cached_hole_node,
+                                                          list);
+               else
+                       cached_hole_node = NULL;
+       }
        rb_erase(&va->rb_node, &vmap_area_root);
        RB_CLEAR_NODE(&va->rb_node);
        list_del_rcu(&va->list);
-
        /*
         * Track the highest possible candidate for pcpu area
         * allocation.  Areas outside of vmalloc area can be returned
-- 
1.9.1

Reply via email to