Stuart Henderson wrote:
> While running some fairly memory-hungry jobs I hit a state where wchan
> in top was showing "fltamap" and the machine locked up (couldn't enter
> DDB).
> 
> Which must be this:
> 
>               /* didn't work?  must be out of RAM.  sleep. */
>               if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
>                       uvmfault_unlockmaps(ufi, TRUE);
>                       uvm_wait("fltamapcopy");
>                       continue;
>               }

Correct. You still have plenty of RAM free, but the kernel cannot
allocate an amap and amap slots to maintain it because it runs
out of kernel virtual address space.

Here's a diff that allocates the most commonly used amap slot sizes
(between 1 and 16) using pool_get(9) instead of malloc(9). That should
reduce the pressure on kernel virtual address space somewhat (on amd64
at least),

As discussed with Theo, there's more optimization potential
for amaps with up to 4 slots: the slots can be stored in the amap
itself. This is still brewing though, and will later be added on top
of the diff below.

First testing and review is still appreciated to know we have the
basics right before doing more optimizations.

Index: uvm/uvm_amap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
retrieving revision 1.60
diff -u -p -r1.60 uvm_amap.c
--- uvm/uvm_amap.c      6 Mar 2016 14:47:07 -0000       1.60
+++ uvm/uvm_amap.c      9 Mar 2016 19:37:55 -0000
@@ -52,8 +52,13 @@
 
 struct pool uvm_amap_pool;
 
+/* Pools for amap slots for the most common amap slot sizes */
+struct pool uvm_amap_slot_pools[UVM_AMAP_CHUNK];
+
 LIST_HEAD(, vm_amap) amap_list;
 
+static char amap_slot_pool_names[UVM_AMAP_CHUNK][13];
+
 #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *))
 
 /*
@@ -151,10 +156,20 @@ pp_setreflen(int *ppref, int offset, int
 void
 amap_init(void)
 {
+       int i;
+
        /* Initialize the vm_amap pool. */
        pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, PR_WAITOK,
            "amappl", NULL);
        pool_sethiwat(&uvm_amap_pool, 4096);
+
+       for (i = 0; i < nitems(uvm_amap_slot_pools); i++) {
+               snprintf(amap_slot_pool_names[i],
+                   sizeof(amap_slot_pool_names[0]), "amapslotpl%d", i + 1);
+               pool_init(&uvm_amap_slot_pools[i], (i + 1) * MALLOC_SLOT_UNIT,
+                   0, 0, PR_WAITOK, amap_slot_pool_names[i], NULL);
+               pool_sethiwat(&uvm_amap_slot_pools[i], 4096);
+       }
 }
 
 /*
@@ -172,8 +187,13 @@ amap_alloc1(int slots, int padslots, int
        if (amap == NULL)
                return(NULL);
 
-       totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) /
-           MALLOC_SLOT_UNIT;
+       totalslots = slots + padslots;
+       KASSERT(totalslots > 0);
+
+       if (totalslots > UVM_AMAP_CHUNK)
+               totalslots = malloc_roundup(
+                   (slots + padslots) * MALLOC_SLOT_UNIT) / MALLOC_SLOT_UNIT;
+
        amap->am_ref = 1;
        amap->am_flags = 0;
 #ifdef UVM_AMAP_PPREF
@@ -183,8 +203,14 @@ amap_alloc1(int slots, int padslots, int
        amap->am_nslot = slots;
        amap->am_nused = 0;
 
-       amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP,
-           waitf);
+       if (totalslots > UVM_AMAP_CHUNK)
+               amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT,
+                   M_UVMAMAP, waitf);
+       else
+               amap->am_slots = pool_get(
+                   &uvm_amap_slot_pools[totalslots - 1],
+                   (waitf == M_WAITOK) ? PR_WAITOK : PR_NOWAIT);
+
        if (amap->am_slots == NULL)
                goto fail1;
 
@@ -238,7 +264,12 @@ amap_free(struct vm_amap *amap)
        KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
        KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
 
-       free(amap->am_slots, M_UVMAMAP, 0);
+       if (amap->am_maxslot > UVM_AMAP_CHUNK)
+               free(amap->am_slots, M_UVMAMAP, 0);
+       else
+               pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 1],
+                   amap->am_slots);
+
 #ifdef UVM_AMAP_PPREF
        if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
                free(amap->am_ppref, M_UVMAMAP, 0);
@@ -338,8 +369,12 @@ amap_extend(struct vm_map_entry *entry, 
                }
        }
 #endif
-       newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP,
-           M_WAITOK | M_CANFAIL);
+       if (slotneed > UVM_AMAP_CHUNK)
+               newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP,
+                   M_WAITOK | M_CANFAIL);
+       else
+               newsl = pool_get(&uvm_amap_slot_pools[slotneed - 1],
+                   PR_WAITOK | PR_LIMITFAIL);
        if (newsl == NULL) {
 #ifdef UVM_AMAP_PPREF
                if (newppref != NULL) {
@@ -389,12 +424,17 @@ amap_extend(struct vm_map_entry *entry, 
        }
 #endif
 
-       /* update master values */
+       /* free */
+       if (amap->am_maxslot > UVM_AMAP_CHUNK)
+               free(oldsl, M_UVMAMAP, 0);
+       else
+               pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 1],
+                   oldsl);
+
+       /* and update master values */
        amap->am_nslot = slotneed;
        amap->am_maxslot = slotalloc;
 
-       /* and free */
-       free(oldsl, M_UVMAMAP, 0);
 #ifdef UVM_AMAP_PPREF
        if (oldppref && oldppref != PPREF_NONE)
                free(oldppref, M_UVMAMAP, 0);
 
> I was monitoring top to see if I was getting close to the available
> memory and it reported plenty free.
> 
> Is there a way I can identify when I'm getting close to this state
> so I can kill a job rather than crash the machine? (I have a few that
> I need to get run as quickly as possible ..)
> 
> 
> Memory: Real: 2920M/5017M act/tot Free: 11G Cache: 165M Swap: 0K/16G
> 
>   PID USERNAME PRI NICE  SIZE   RES STATE     WAIT      TIME    CPU COMMAND
> 15768 sthen    -18    0 3000M 2192M sleep     fltamap  55:29 88.92% perl
> 25439 sthen    -18    0  377M  392M sleep     fltamap   3:08 87.94% perl
>  8523 sthen    -18    0  306M  319M sleep     fltamap   1:04 83.35% perl
> 15304 _tomcat   54    0 1156M 2208K idle      thrslee  10:13 19.78% java
> 26520 sthen    -18    0  804K 2224K sleep     fltamap   0:03 14.89% top
> 27905 _postgre -18    0  147M 5520K sleep     fltamap   3:52 12.30% postgres
> ...
> 

Reply via email to