There's yet another bug: libpager just throws away clean precious pages (those previously offered with pager_offer_page (precious = 1)) upon receiving them from the kernel, since it mistakes them for evicted pages it's being notified about. This is obviously very problematic.
I'm attaching a patch that attempts to fix the issue, but I'm less sure about it. What do you think? Sergey -- >8 -- Subject: [PATCH] libpager: Do not throw away precious pages The kernel invokes memory_object_data_return () with dirty = 0 in two cases: if notification on eviction was requested, or when returning precious pages. Properly distinguish between the two cases, and only throw the clean page away in the former case. --- libpager/data-return.c | 69 ++++++++++++++++-------------------------- libpager/offer-page.c | 2 ++ libpager/priv.h | 1 + 3 files changed, 29 insertions(+), 43 deletions(-) diff --git a/libpager/data-return.c b/libpager/data-return.c index c0f5aaf7..59e3e956 100644 --- a/libpager/data-return.c +++ b/libpager/data-return.c @@ -38,12 +38,12 @@ _pager_do_write_request (struct pager *p, short *pm_entries; int npages, i; char *notified; + char *omit_data; error_t *pagerrs; struct lock_request *lr; struct lock_list {struct lock_request *lr; struct lock_list *next;} *lock_list, *ll; int wakeup; - int omitdata = 0; if (!p || p->port.class != _pager_class) @@ -78,6 +78,9 @@ _pager_do_write_request (struct pager *p, npages = length / __vm_page_size; pagerrs = alloca (npages * sizeof (error_t)); + omit_data = alloca (npages * (sizeof *omit_data)); + memset (omit_data, 0, npages * (sizeof *omit_data)); + notified = alloca (npages * (sizeof *notified)); #ifndef NDEBUG memset (notified, -1, npages * (sizeof *notified)); @@ -90,23 +93,6 @@ _pager_do_write_request (struct pager *p, pm_entries = &p->pagemap[offset / __vm_page_size]; - if (! dirty) - { - munmap ((void *) data, length); - if (!kcopy) { - /* Prepare notified array. */ - for (i = 0; i < npages; i++) - notified[i] = (p->notify_on_evict - && ! (pm_entries[i] & PM_PAGEINWAIT)); - - goto notify; - } - else { - _pager_allow_termination (p); - goto release_out; - } - } - /* Make sure there are no other in-progress writes for any of these pages before we begin. This imposes a little more serialization than we really have to require (because *all* future writes on @@ -115,27 +101,24 @@ _pager_do_write_request (struct pager *p, /* XXX: Is this still needed? */ retry: for (i = 0; i < npages; i++) - if (pm_entries[i] & PM_PAGINGOUT) - { - pm_entries[i] |= PM_WRITEWAIT; - pthread_cond_wait (&p->wakeup, &p->interlock); - goto retry; + { + if ((initializing && (pm_entries[i] & PM_INIT)) || + (!dirty && !(pm_entries[i] & PM_PRECIOUS))) + { + omit_data[i] = 1; + continue; + } + if (pm_entries[i] & PM_PAGINGOUT) + { + pm_entries[i] |= PM_WRITEWAIT; + pthread_cond_wait (&p->wakeup, &p->interlock); + goto retry; } + } /* Mark these pages as being paged out. */ - if (initializing) - { - assert_backtrace (npages <= 32); - for (i = 0; i < npages; i++) - { - if (pm_entries[i] & PM_INIT) - omitdata |= 1 << i; - else - pm_entries[i] |= PM_PAGINGOUT | PM_INIT; - } - } - else - for (i = 0; i < npages; i++) + for (i = 0; i < npages; i++) + if (!omit_data[i]) pm_entries[i] |= PM_PAGINGOUT | PM_INIT; /* If this write occurs while a lock is pending, record @@ -162,7 +145,7 @@ _pager_do_write_request (struct pager *p, but until the pager library interface is changed, this will have to do. */ for (i = 0; i < npages; i++) - if (!(omitdata & (1 << i))) + if (!omit_data[i]) pagerrs[i] = pager_write_page (p->upi, offset + (vm_page_size * i), data + (vm_page_size * i)); @@ -175,9 +158,11 @@ _pager_do_write_request (struct pager *p, wakeup = 0; for (i = 0; i < npages; i++) { - if (omitdata & (1 << i)) + if (omit_data[i]) { - notified[i] = 0; + notified[i] = (p->notify_on_evict + && !kcopy + && ! (pm_entries[i] & PM_PAGEINWAIT)); continue; } @@ -205,14 +190,13 @@ _pager_do_write_request (struct pager *p, } else { - munmap ((void *) (data + (vm_page_size * i)), - vm_page_size); notified[i] = (! kcopy && p->notify_on_evict); if (! kcopy) pm_entries[i] &= ~PM_INCORE; } - pm_entries[i] &= ~(PM_PAGINGOUT | PM_PAGEINWAIT | PM_WRITEWAIT); + pm_entries[i] &= ~(PM_PAGINGOUT | PM_PAGEINWAIT + | PM_WRITEWAIT | PM_PRECIOUS); } for (ll = lock_list; ll; ll = ll->next) @@ -222,7 +206,6 @@ _pager_do_write_request (struct pager *p, if (wakeup) pthread_cond_broadcast (&p->wakeup); - notify: _pager_allow_termination (p); pthread_mutex_unlock (&p->interlock); diff --git a/libpager/offer-page.c b/libpager/offer-page.c index 26f88ca3..392e83b8 100644 --- a/libpager/offer-page.c +++ b/libpager/offer-page.c @@ -38,6 +38,8 @@ pager_offer_page (struct pager *p, short *pm_entry = &p->pagemap[offset / vm_page_size]; *pm_entry |= PM_INCORE; + if (precious) + *pm_entry |= PM_PRECIOUS; err = memory_object_data_supply (p->memobjcntl, offset, buf, vm_page_size, 0, writelock ? VM_PROT_WRITE : VM_PROT_NONE, diff --git a/libpager/priv.h b/libpager/priv.h index d9d76965..a5e22f36 100644 --- a/libpager/priv.h +++ b/libpager/priv.h @@ -108,6 +108,7 @@ extern int _pager_page_errors[]; /* Pagemap format */ /* These are binary state bits */ +#define PM_PRECIOUS 0x0400 /* return even if not dirty */ #define PM_WRITEWAIT 0x0200 /* queue wakeup once write is done */ #define PM_INIT 0x0100 /* data has been written */ #define PM_INCORE 0x0080 /* kernel might have a copy */ -- 2.31.1