Stuart Henderson wrote: > While running some fairly memory-hungry jobs I hit a state where wchan > in top was showing "fltamap" and the machine locked up (couldn't enter > DDB). > > Which must be this: > > /* didn't work? must be out of RAM. sleep. */ > if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { > uvmfault_unlockmaps(ufi, TRUE); > uvm_wait("fltamapcopy"); > continue; > }
Correct. You still have plenty of RAM free, but the kernel cannot allocate an amap and amap slots to maintain it because it runs out of kernel virtual address space. Here's a diff that allocates the most commonly used amap slot sizes (between 1 and 16) using pool_get(9) instead of malloc(9). That should reduce the pressure on kernel virtual address space somewhat (on amd64 at least), As discussed with Theo, there's more optimization potential for amaps with up to 4 slots: the slots can be stored in the amap itself. This is still brewing though, and will later be added on top of the diff below. First testing and review is still appreciated to know we have the basics right before doing more optimizations. Index: uvm/uvm_amap.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_amap.c,v retrieving revision 1.60 diff -u -p -r1.60 uvm_amap.c --- uvm/uvm_amap.c 6 Mar 2016 14:47:07 -0000 1.60 +++ uvm/uvm_amap.c 9 Mar 2016 19:37:55 -0000 @@ -52,8 +52,13 @@ struct pool uvm_amap_pool; +/* Pools for amap slots for the most common amap slot sizes */ +struct pool uvm_amap_slot_pools[UVM_AMAP_CHUNK]; + LIST_HEAD(, vm_amap) amap_list; +static char amap_slot_pool_names[UVM_AMAP_CHUNK][13]; + #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *)) /* @@ -151,10 +156,20 @@ pp_setreflen(int *ppref, int offset, int void amap_init(void) { + int i; + /* Initialize the vm_amap pool. */ pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, PR_WAITOK, "amappl", NULL); pool_sethiwat(&uvm_amap_pool, 4096); + + for (i = 0; i < nitems(uvm_amap_slot_pools); i++) { + snprintf(amap_slot_pool_names[i], + sizeof(amap_slot_pool_names[0]), "amapslotpl%d", i + 1); + pool_init(&uvm_amap_slot_pools[i], (i + 1) * MALLOC_SLOT_UNIT, + 0, 0, PR_WAITOK, amap_slot_pool_names[i], NULL); + pool_sethiwat(&uvm_amap_slot_pools[i], 4096); + } } /* @@ -172,8 +187,13 @@ amap_alloc1(int slots, int padslots, int if (amap == NULL) return(NULL); - totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) / - MALLOC_SLOT_UNIT; + totalslots = slots + padslots; + KASSERT(totalslots > 0); + + if (totalslots > UVM_AMAP_CHUNK) + totalslots = malloc_roundup( + (slots + padslots) * MALLOC_SLOT_UNIT) / MALLOC_SLOT_UNIT; + amap->am_ref = 1; amap->am_flags = 0; #ifdef UVM_AMAP_PPREF @@ -183,8 +203,14 @@ amap_alloc1(int slots, int padslots, int amap->am_nslot = slots; amap->am_nused = 0; - amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP, - waitf); + if (totalslots > UVM_AMAP_CHUNK) + amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, + M_UVMAMAP, waitf); + else + amap->am_slots = pool_get( + &uvm_amap_slot_pools[totalslots - 1], + (waitf == M_WAITOK) ? PR_WAITOK : PR_NOWAIT); + if (amap->am_slots == NULL) goto fail1; @@ -238,7 +264,12 @@ amap_free(struct vm_amap *amap) KASSERT(amap->am_ref == 0 && amap->am_nused == 0); KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0); - free(amap->am_slots, M_UVMAMAP, 0); + if (amap->am_maxslot > UVM_AMAP_CHUNK) + free(amap->am_slots, M_UVMAMAP, 0); + else + pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 1], + amap->am_slots); + #ifdef UVM_AMAP_PPREF if (amap->am_ppref && amap->am_ppref != PPREF_NONE) free(amap->am_ppref, M_UVMAMAP, 0); @@ -338,8 +369,12 @@ amap_extend(struct vm_map_entry *entry, } } #endif - newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP, - M_WAITOK | M_CANFAIL); + if (slotneed > UVM_AMAP_CHUNK) + newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP, + M_WAITOK | M_CANFAIL); + else + newsl = pool_get(&uvm_amap_slot_pools[slotneed - 1], + PR_WAITOK | PR_LIMITFAIL); if (newsl == NULL) { #ifdef UVM_AMAP_PPREF if (newppref != NULL) { @@ -389,12 +424,17 @@ amap_extend(struct vm_map_entry *entry, } #endif - /* update master values */ + /* free */ + if (amap->am_maxslot > UVM_AMAP_CHUNK) + free(oldsl, M_UVMAMAP, 0); + else + pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 1], + oldsl); + + /* and update master values */ amap->am_nslot = slotneed; amap->am_maxslot = slotalloc; - /* and free */ - free(oldsl, M_UVMAMAP, 0); #ifdef UVM_AMAP_PPREF if (oldppref && oldppref != PPREF_NONE) free(oldppref, M_UVMAMAP, 0); > I was monitoring top to see if I was getting close to the available > memory and it reported plenty free. > > Is there a way I can identify when I'm getting close to this state > so I can kill a job rather than crash the machine? (I have a few that > I need to get run as quickly as possible ..) > > > Memory: Real: 2920M/5017M act/tot Free: 11G Cache: 165M Swap: 0K/16G > > PID USERNAME PRI NICE SIZE RES STATE WAIT TIME CPU COMMAND > 15768 sthen -18 0 3000M 2192M sleep fltamap 55:29 88.92% perl > 25439 sthen -18 0 377M 392M sleep fltamap 3:08 87.94% perl > 8523 sthen -18 0 306M 319M sleep fltamap 1:04 83.35% perl > 15304 _tomcat 54 0 1156M 2208K idle thrslee 10:13 19.78% java > 26520 sthen -18 0 804K 2224K sleep fltamap 0:03 14.89% top > 27905 _postgre -18 0 147M 5520K sleep fltamap 3:52 12.30% postgres > ... >