Diff below makes out-of-swap checks more robust.
When a system is low on swap, two variables are used to adapt the
behavior of the page daemon and fault handler:
`swpginuse'
indicates how much swap space is being currently used
`swpgonly'
indicates how much swap space stores content of pages that are no
longer living in memory.
The diff below changes the heuristic to detect if the system is currently
out-of-swap. In my tests it makes the system more stable by preventing
hangs. It prevents from hangs that occur when the system has more than 99%
of it swap space filled. When this happen, the checks using the variables
above to figure out if we are out-of-swap might never be true because of:
- Races between the fault-handler and the accounting of the page daemon
due to asynchronous swapping
- The swap partition being never completely allocated (bad pages,
off-by-one, rounding error, size of a swap cluster...)
- Possible off-by-one accounting errors in swap space
So I'm adapting uvm_swapisfull() to return true as soon as more than
99% of the swap space is filled with pages which are no longer in memory.
I also introduce uvm_swapisfilled() to prevent later failures if there is
less than a cluster of space available (the minimum we try to swap out).
This prevent deadlocking if a few slots (<SWCLUSTPAGES) are never allocated.
While here document that `swpginuse' is now also protected by the global
mutex. This helps towards untangling the page daemon.
Stuart, Miod, I wonder if this also help for the off-by-one issue you
are seeing. It might not.
Comments, ok?
diff --git sys/uvm/uvm_pdaemon.c sys/uvm/uvm_pdaemon.c
index 284211d226c..a26a776df67 100644
--- sys/uvm/uvm_pdaemon.c
+++ sys/uvm/uvm_pdaemon.c
@@ -595,11 +595,8 @@ uvmpd_scan_inactive(struct uvm_pmalloc *pma,
* is full, free any swap allocated to the page
* so that other pages can be paged out.
*/
- KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
- if ((p->pg_flags & PQ_SWAPBACKED) &&
- uvmexp.swpginuse == uvmexp.swpages) {
+ if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
uvmpd_dropswap(p);
- }
/*
* the page we are looking at is dirty. we must
@@ -917,9 +914,7 @@ uvmpd_scan(struct uvm_pmalloc *pma, struct
uvm_constraint_range *constraint)
*/
free = uvmexp.free - BUFPAGES_DEFICIT;
swap_shortage = 0;
- if (free < uvmexp.freetarg &&
- uvmexp.swpginuse == uvmexp.swpages &&
- !uvm_swapisfull() &&
+ if (free < uvmexp.freetarg && uvm_swapisfilled() && !uvm_swapisfull() &&
pages_freed == 0) {
swap_shortage = uvmexp.freetarg - free;
}
diff --git sys/uvm/uvm_swap.c sys/uvm/uvm_swap.c
index 27963259eba..913b2366a7c 100644
--- sys/uvm/uvm_swap.c
+++ sys/uvm/uvm_swap.c
@@ -1516,8 +1516,30 @@ ReTry: /* XXXMRG */
}
/*
- * uvm_swapisfull: return true if all of available swap is allocated
- * and in use.
+ * uvm_swapisfilled: return true if the amount of free space in swap is
+ * smaller than the size of a cluster.
+ *
+ * As long as some swap slots are being used by pages currently in memory,
+ * it is possible to reuse them. Even if the swap space has been completly
+ * filled we do not consider it full.
+ */
+int
+uvm_swapisfilled(void)
+{
+ int result;
+
+ mtx_enter(&uvm_swap_data_lock);
+ KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
+ result = (uvmexp.swpginuse + SWCLUSTPAGES) >= uvmexp.swpages;
+ mtx_leave(&uvm_swap_data_lock);
+
+ return result;
+}
+
+/*
+ * uvm_swapisfull: return true if the amount of pages only in swap
+ * accounts for more than 99% of the total swap space.
+ *
*/
int
uvm_swapisfull(void)
@@ -1526,7 +1548,7 @@ uvm_swapisfull(void)
mtx_enter(&uvm_swap_data_lock);
KASSERT(uvmexp.swpgonly <= uvmexp.swpages);
- result = (uvmexp.swpgonly == uvmexp.swpages);
+ result = (uvmexp.swpgonly >= (uvmexp.swpages * 99 / 100));
mtx_leave(&uvm_swap_data_lock);
return result;
diff --git sys/uvm/uvm_swap.h sys/uvm/uvm_swap.h
index 9904fe58cd7..f60237405ca 100644
--- sys/uvm/uvm_swap.h
+++ sys/uvm/uvm_swap.h
@@ -42,6 +42,7 @@ int uvm_swap_put(int, struct vm_page **,
int, int);
int uvm_swap_alloc(int *, boolean_t);
void uvm_swap_free(int, int);
void uvm_swap_markbad(int, int);
+int uvm_swapisfilled(void);
int uvm_swapisfull(void);
void uvm_swap_freepages(struct vm_page **, int);
#ifdef HIBERNATE
diff --git sys/uvm/uvmexp.h sys/uvm/uvmexp.h
index de5f5fa367c..144494b73ff 100644
--- sys/uvm/uvmexp.h
+++ sys/uvm/uvmexp.h
@@ -83,7 +83,7 @@ struct uvmexp {
/* swap */
int nswapdev; /* [S] number of configured swap devices in system */
int swpages; /* [S] number of PAGE_SIZE'ed swap pages */
- int swpginuse; /* [K] number of swap pages in use */
+ int swpginuse; /* [S] number of swap pages in use */
int swpgonly; /* [a] number of swap pages in use, not also in RAM */
int nswget; /* [a] number of swap pages moved from disk to RAM */
int nanon; /* XXX number total of anon's in system */