Ok, so I got rid of the global stuff, this also obsoletes 3/10.

---
Subject: mm: slub: add knowledge of reserve pages

Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation
contexts that are entitled to it.

Care is taken to only touch the SLUB slow path.

Signed-off-by: Peter Zijlstra <[EMAIL PROTECTED]>
Cc: Christoph Lameter <[EMAIL PROTECTED]>
---
 mm/slub.c |   87 +++++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 69 insertions(+), 18 deletions(-)

Index: linux-2.6-2/mm/slub.c
===================================================================
--- linux-2.6-2.orig/mm/slub.c
+++ linux-2.6-2/mm/slub.c
@@ -20,11 +20,12 @@
 #include <linux/mempolicy.h>
 #include <linux/ctype.h>
 #include <linux/kallsyms.h>
+#include "internal.h"
 
 /*
  * Lock order:
  *   1. slab_lock(page)
- *   2. slab->list_lock
+ *   2. node->list_lock
  *
  *   The slab_lock protects operations on the object of a particular
  *   slab and its metadata in the page struct. If the slab lock
@@ -1069,7 +1070,7 @@ static void setup_object(struct kmem_cac
                s->ctor(object, s, 0);
 }
 
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int 
*reserve)
 {
        struct page *page;
        struct kmem_cache_node *n;
@@ -1087,6 +1088,7 @@ static struct page *new_slab(struct kmem
        if (!page)
                goto out;
 
+       *reserve = page->reserve;
        n = get_node(s, page_to_nid(page));
        if (n)
                atomic_long_inc(&n->nr_slabs);
@@ -1403,12 +1405,36 @@ static inline void flush_slab(struct kme
 }
 
 /*
+ * cpu slab reserve magic
+ *
+ * we mark reserve status in the lsb of the ->cpu_slab[] pointer.
+ */
+static inline unsigned long cpu_slab_reserve(struct kmem_cache *s, int cpu)
+{
+       return unlikely((unsigned long)s->cpu_slab[cpu] & 1);
+}
+
+static inline void
+cpu_slab_set(struct kmem_cache *s, int cpu, struct page *page, int reserve)
+{
+       if (unlikely(reserve))
+               page = (struct page *)((unsigned long)page | 1);
+
+       s->cpu_slab[cpu] = page;
+}
+
+static inline struct page *cpu_slab(struct kmem_cache *s, int cpu)
+{
+       return (struct page *)((unsigned long)s->cpu_slab[cpu] & ~1UL);
+}
+
+/*
  * Flush cpu slab.
  * Called from IPI handler with interrupts disabled.
  */
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 {
-       struct page *page = s->cpu_slab[cpu];
+       struct page *page = cpu_slab(s, cpu);
 
        if (likely(page))
                flush_slab(s, page, cpu);
@@ -1457,10 +1483,22 @@ static void *__slab_alloc(struct kmem_ca
 {
        void **object;
        int cpu = smp_processor_id();
+       int reserve = 0;
 
        if (!page)
                goto new_slab;
 
+       if (cpu_slab_reserve(s, cpu)) {
+               /*
+                * If the current slab is a reserve slab and the current
+                * allocation context does not allow access to the reserves
+                * we must force an allocation to test the current levels.
+                */
+               if (!(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS))
+                       goto alloc_slab;
+               reserve = 1;
+       }
+
        slab_lock(page);
        if (unlikely(node != -1 && page_to_nid(page) != node))
                goto another_slab;
@@ -1468,10 +1506,9 @@ load_freelist:
        object = page->freelist;
        if (unlikely(!object))
                goto another_slab;
-       if (unlikely(SlabDebug(page)))
+       if (unlikely(SlabDebug(page) || reserve))
                goto debug;
 
-       object = page->freelist;
        page->lockless_freelist = object[page->offset];
        page->inuse = s->objects;
        page->freelist = NULL;
@@ -1484,14 +1521,28 @@ another_slab:
 new_slab:
        page = get_partial(s, gfpflags, node);
        if (page) {
-               s->cpu_slab[cpu] = page;
+               cpu_slab_set(s, cpu, page, reserve);
                goto load_freelist;
        }
 
-       page = new_slab(s, gfpflags, node);
+alloc_slab:
+       page = new_slab(s, gfpflags, node, &reserve);
        if (page) {
+               struct page *slab;
+
                cpu = smp_processor_id();
-               if (s->cpu_slab[cpu]) {
+               slab = cpu_slab(s, cpu);
+
+               if (cpu_slab_reserve(s, cpu) && !reserve) {
+                       /*
+                        * If the current cpu_slab is a reserve slab but we
+                        * managed to allocate a new slab the pressure is
+                        * lifted and we can unmark the current one.
+                        */
+                       cpu_slab_set(s, cpu, slab, 0);
+               }
+
+               if (slab) {
                        /*
                         * Someone else populated the cpu_slab while we
                         * enabled interrupts, or we have gotten scheduled
@@ -1499,29 +1550,28 @@ new_slab:
                         * requested node even if __GFP_THISNODE was
                         * specified. So we need to recheck.
                         */
-                       if (node == -1 ||
-                               page_to_nid(s->cpu_slab[cpu]) == node) {
+                       if (node == -1 || page_to_nid(slab) == node) {
                                /*
                                 * Current cpuslab is acceptable and we
                                 * want the current one since its cache hot
                                 */
                                discard_slab(s, page);
-                               page = s->cpu_slab[cpu];
+                               page = slab;
                                slab_lock(page);
                                goto load_freelist;
                        }
                        /* New slab does not fit our expectations */
-                       flush_slab(s, s->cpu_slab[cpu], cpu);
+                       flush_slab(s, slab, cpu);
                }
                slab_lock(page);
                SetSlabFrozen(page);
-               s->cpu_slab[cpu] = page;
+               cpu_slab_set(s, cpu, page, reserve);
                goto load_freelist;
        }
        return NULL;
 debug:
-       object = page->freelist;
-       if (!alloc_debug_processing(s, page, object, addr))
+       if (SlabDebug(page) &&
+                       !alloc_debug_processing(s, page, object, addr))
                goto another_slab;
 
        page->inuse++;
@@ -1548,7 +1598,7 @@ static void __always_inline *slab_alloc(
        unsigned long flags;
 
        local_irq_save(flags);
-       page = s->cpu_slab[smp_processor_id()];
+       page = cpu_slab(s, smp_processor_id());
        if (unlikely(!page || !page->lockless_freelist ||
                        (node != -1 && page_to_nid(page) != node)))
 
@@ -1873,10 +1923,11 @@ static struct kmem_cache_node * __init e
 {
        struct page *page;
        struct kmem_cache_node *n;
+       int reserve;
 
        BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
 
-       page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node);
+       page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, 
&reserve);
 
        BUG_ON(!page);
        n = page->freelist;
@@ -3189,7 +3240,7 @@ static unsigned long slab_objects(struct
        per_cpu = nodes + nr_node_ids;
 
        for_each_possible_cpu(cpu) {
-               struct page *page = s->cpu_slab[cpu];
+               struct page *page = cpu_slab(s, cpu);
                int node;
 
                if (page) {

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to