[RFC PATCH 1/6] mm: add defer mechanism to ksm to make it more suitable

Wenwei Tao Tue, 09 Jun 2015 23:33:08 -0700

I observe that it is unlikely for KSM to merge new pages from an area
that has already been scanned twice on Android mobile devices, so it's
a waste of power to continue to scan these areas in high frequency.
In this patch a defer mechanism is introduced which is borrowed from
page compaction to KSM. This defer mechanism can automatic lower the scan
frequency in the above case.


Signed-off-by: Wenwei Tao <wenweitaowen...@gmail.com>
---
 mm/ksm.c |  230 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 203 insertions(+), 27 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 4162dce..54ffcb2 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -104,6 +104,7 @@ struct mm_slot {
        struct list_head mm_list;
        struct rmap_item *rmap_list;
        struct mm_struct *mm;
+       unsigned long seqnr;
 };
 
 /**
@@ -117,9 +118,12 @@ struct mm_slot {
  */
 struct ksm_scan {
        struct mm_slot *mm_slot;
+       struct mm_slot *active_slot;
        unsigned long address;
        struct rmap_item **rmap_list;
        unsigned long seqnr;
+       unsigned long ksm_considered;
+       unsigned int ksm_defer_shift;
 };
 
 /**
@@ -182,6 +186,11 @@ struct rmap_item {
 #define UNSTABLE_FLAG  0x100   /* is a node of the unstable tree */
 #define STABLE_FLAG    0x200   /* is listed from the stable tree */
 
+#define ACTIVE_SLOT_FLAG       0x100
+#define ACTIVE_SLOT_SEQNR      0x200
+#define KSM_MAX_DEFER_SHIFT    6
+
+
 /* The stable and unstable tree heads */
 static struct rb_root one_stable_tree[1] = { RB_ROOT };
 static struct rb_root one_unstable_tree[1] = { RB_ROOT };
@@ -197,14 +206,22 @@ static DEFINE_HASHTABLE(mm_slots_hash, 
MM_SLOTS_HASH_BITS);
 static struct mm_slot ksm_mm_head = {
        .mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
 };
+
+static struct mm_slot ksm_mm_active = {
+       .mm_list = LIST_HEAD_INIT(ksm_mm_active.mm_list),
+};
+
 static struct ksm_scan ksm_scan = {
        .mm_slot = &ksm_mm_head,
+       .active_slot = &ksm_mm_active,
 };
 
 static struct kmem_cache *rmap_item_cache;
 static struct kmem_cache *stable_node_cache;
 static struct kmem_cache *mm_slot_cache;
 
+static bool ksm_merged_or_unstable;
+
 /* The number of nodes in the stable tree */
 static unsigned long ksm_pages_shared;
 
@@ -336,6 +353,23 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
        hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
 }
 
+static void move_to_active_list(struct mm_slot *mm_slot)
+{
+       if (mm_slot && !(mm_slot->seqnr & ACTIVE_SLOT_FLAG)) {
+               if (ksm_run & KSM_RUN_UNMERGE && mm_slot->rmap_list)
+                       return;
+               if (mm_slot == ksm_scan.mm_slot) {
+                       if (ksm_scan.active_slot == &ksm_mm_active)
+                               return;
+                       ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
+                                               struct mm_slot, mm_list);
+               }
+               list_move_tail(&mm_slot->mm_list,
+                       &ksm_scan.active_slot->mm_list);
+               mm_slot->seqnr |= (ACTIVE_SLOT_FLAG | ACTIVE_SLOT_SEQNR);
+       }
+}
+
 /*
  * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
  * page tables after it has passed through ksm_exit() - which, if necessary,
@@ -772,6 +806,15 @@ static int unmerge_and_remove_all_rmap_items(void)
        int err = 0;
 
        spin_lock(&ksm_mmlist_lock);
+       mm_slot = list_entry(ksm_mm_active.mm_list.next,
+                       struct mm_slot, mm_list);
+       while (mm_slot != &ksm_mm_active) {
+               list_move_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
+               mm_slot->seqnr &= ~(ACTIVE_SLOT_FLAG | ACTIVE_SLOT_SEQNR);
+               mm_slot = list_entry(ksm_mm_active.mm_list.next,
+                               struct mm_slot, mm_list);
+       }
+       ksm_scan.active_slot = &ksm_mm_active;
        ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
                                                struct mm_slot, mm_list);
        spin_unlock(&ksm_mmlist_lock);
@@ -790,8 +833,8 @@ static int unmerge_and_remove_all_rmap_items(void)
                        if (err)
                                goto error;
                }
-
                remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
+               mm_slot->seqnr = 0;
 
                spin_lock(&ksm_mmlist_lock);
                ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
@@ -806,6 +849,7 @@ static int unmerge_and_remove_all_rmap_items(void)
                        up_read(&mm->mmap_sem);
                        mmdrop(mm);
                } else {
+                       move_to_active_list(mm_slot);
                        spin_unlock(&ksm_mmlist_lock);
                        up_read(&mm->mmap_sem);
                }
@@ -1401,6 +1445,9 @@ static void stable_tree_append(struct rmap_item 
*rmap_item,
                ksm_pages_sharing++;
        else
                ksm_pages_shared++;
+
+       if (!ksm_merged_or_unstable)
+               ksm_merged_or_unstable = true;
 }
 
 /*
@@ -1468,6 +1515,9 @@ static void cmp_and_merge_page(struct page *page, struct 
rmap_item *rmap_item)
        checksum = calc_checksum(page);
        if (rmap_item->oldchecksum != checksum) {
                rmap_item->oldchecksum = checksum;
+               if ((rmap_item->address & UNSTABLE_FLAG) &&
+                               !ksm_merged_or_unstable)
+                       ksm_merged_or_unstable = true;
                return;
        }
 
@@ -1504,6 +1554,31 @@ static void cmp_and_merge_page(struct page *page, struct 
rmap_item *rmap_item)
        }
 }
 
+static bool skip_active_slot_vma(struct rmap_item ***rmap_list,
+               struct vm_area_struct *vma, struct mm_slot *mm_slot)
+{
+       unsigned char age;
+       struct rmap_item *rmap_item = **rmap_list;
+
+       age = (unsigned char)(ksm_scan.seqnr - mm_slot->seqnr);
+       if (age > 0)
+               return false;
+       if (!(vma->vm_flags & VM_HUGETLB)) {
+               while (rmap_item && rmap_item->address < vma->vm_end) {
+                       if (rmap_item->address < vma->vm_start) {
+                               **rmap_list = rmap_item->rmap_list;
+                               remove_rmap_item_from_tree(rmap_item);
+                               free_rmap_item(rmap_item);
+                               rmap_item = **rmap_list;
+                       } else {
+                               *rmap_list = &rmap_item->rmap_list;
+                               rmap_item = rmap_item->rmap_list;
+                       }
+               }
+               return true;
+       } else
+               return false;
+}
 static struct rmap_item *get_next_rmap_item(struct mm_slot *mm_slot,
                                            struct rmap_item **rmap_list,
                                            unsigned long addr)
@@ -1535,15 +1610,18 @@ static struct rmap_item *get_next_rmap_item(struct 
mm_slot *mm_slot,
 static struct rmap_item *scan_get_next_rmap_item(struct page **page)
 {
        struct mm_struct *mm;
-       struct mm_slot *slot;
+       struct mm_slot *slot, *next_slot;
        struct vm_area_struct *vma;
        struct rmap_item *rmap_item;
        int nid;
 
-       if (list_empty(&ksm_mm_head.mm_list))
+       if (list_empty(&ksm_mm_head.mm_list) &&
+               list_empty(&ksm_mm_active.mm_list))
                return NULL;
-
-       slot = ksm_scan.mm_slot;
+       if (ksm_scan.active_slot != &ksm_mm_active)
+               slot = ksm_scan.active_slot;
+       else
+               slot = ksm_scan.mm_slot;
        if (slot == &ksm_mm_head) {
                /*
                 * A number of pages can hang around indefinitely on per-cpu
@@ -1582,8 +1660,16 @@ static struct rmap_item *scan_get_next_rmap_item(struct 
page **page)
                        root_unstable_tree[nid] = RB_ROOT;
 
                spin_lock(&ksm_mmlist_lock);
-               slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
-               ksm_scan.mm_slot = slot;
+               if (unlikely(ksm_scan.seqnr == 0 &&
+                       !list_empty(&ksm_mm_active.mm_list))) {
+                       slot = list_entry(ksm_mm_active.mm_list.next,
+                               struct mm_slot, mm_list);
+                       ksm_scan.active_slot = slot;
+               } else {
+                       slot = list_entry(slot->mm_list.next,
+                               struct mm_slot, mm_list);
+                       ksm_scan.mm_slot = slot;
+               }
                spin_unlock(&ksm_mmlist_lock);
                /*
                 * Although we tested list_empty() above, a racing __ksm_exit
@@ -1608,7 +1694,8 @@ next_mm:
                        continue;
                if (ksm_scan.address < vma->vm_start)
                        ksm_scan.address = vma->vm_start;
-               if (!vma->anon_vma)
+               if (!vma->anon_vma || (ksm_scan.active_slot == slot &&
+                       skip_active_slot_vma(&ksm_scan.rmap_list, vma, slot)))
                        ksm_scan.address = vma->vm_end;
 
                while (ksm_scan.address < vma->vm_end) {
@@ -1639,6 +1726,9 @@ next_mm:
                        ksm_scan.address += PAGE_SIZE;
                        cond_resched();
                }
+               if ((slot->seqnr & (ACTIVE_SLOT_FLAG | ACTIVE_SLOT_SEQNR)) ==
+                       ACTIVE_SLOT_FLAG && vma->vm_flags & VM_HUGETLB)
+                       vma->vm_flags &= ~VM_HUGETLB;
        }
 
        if (ksm_test_exit(mm)) {
@@ -1652,8 +1742,25 @@ next_mm:
        remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
 
        spin_lock(&ksm_mmlist_lock);
-       ksm_scan.mm_slot = list_entry(slot->mm_list.next,
-                                               struct mm_slot, mm_list);
+       slot->seqnr &= ~SEQNR_MASK;
+       slot->seqnr |= (ksm_scan.seqnr & SEQNR_MASK);
+       next_slot = list_entry(slot->mm_list.next,
+               struct mm_slot, mm_list);
+       if (slot == ksm_scan.active_slot) {
+               if (slot->seqnr & ACTIVE_SLOT_SEQNR)
+                       slot->seqnr &= ~ACTIVE_SLOT_SEQNR;
+               else {
+                       slot->seqnr &= ~ACTIVE_SLOT_FLAG;
+                       list_move_tail(&slot->mm_list,
+                               &ksm_scan.mm_slot->mm_list);
+               }
+               ksm_scan.active_slot = next_slot;
+       } else
+               ksm_scan.mm_slot = next_slot;
+
+       if (ksm_scan.active_slot == &ksm_mm_active)
+               ksm_scan.active_slot = list_entry(ksm_mm_active.mm_list.next,
+                                       struct mm_slot, mm_list);
        if (ksm_scan.address == 0) {
                /*
                 * We've completed a full scan of all vmas, holding mmap_sem
@@ -1664,6 +1771,9 @@ next_mm:
                 * or when all VM_MERGEABLE areas have been unmapped (and
                 * mmap_sem then protects against race with MADV_MERGEABLE).
                 */
+               if (ksm_scan.active_slot == slot)
+                       ksm_scan.active_slot = list_entry(slot->mm_list.next,
+                                               struct mm_slot, mm_list);
                hash_del(&slot->link);
                list_del(&slot->mm_list);
                spin_unlock(&ksm_mmlist_lock);
@@ -1678,10 +1788,13 @@ next_mm:
        }
 
        /* Repeat until we've completed scanning the whole list */
-       slot = ksm_scan.mm_slot;
-       if (slot != &ksm_mm_head)
+       if (ksm_scan.active_slot != &ksm_mm_active) {
+               slot = ksm_scan.active_slot;
                goto next_mm;
-
+       } else if (ksm_scan.mm_slot != &ksm_mm_head) {
+               slot = ksm_scan.mm_slot;
+               goto next_mm;
+       }
        ksm_scan.seqnr++;
        return NULL;
 }
@@ -1705,9 +1818,40 @@ static void ksm_do_scan(unsigned int scan_npages)
        }
 }
 
+/*This is copyed from page compaction*/
+
+static inline void defer_ksm(void)
+{
+       ksm_scan.ksm_considered = 0;
+       if (++ksm_scan.ksm_defer_shift > KSM_MAX_DEFER_SHIFT)
+               ksm_scan.ksm_defer_shift = KSM_MAX_DEFER_SHIFT;
+
+}
+
+static inline bool ksm_defered(void)
+{
+       unsigned long defer_limit = 1UL << ksm_scan.ksm_defer_shift;
+
+       if (++ksm_scan.ksm_considered > defer_limit)
+               ksm_scan.ksm_considered = defer_limit;
+       return ksm_scan.ksm_considered < defer_limit &&
+                       list_empty(&ksm_mm_active.mm_list);
+}
+
+static inline void reset_ksm_defer(void)
+{
+       if (ksm_scan.ksm_defer_shift != 0) {
+               ksm_scan.ksm_considered = 0;
+               ksm_scan.ksm_defer_shift = 0;
+       }
+}
+
+
 static int ksmd_should_run(void)
 {
-       return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
+       return (ksm_run & KSM_RUN_MERGE) &&
+               !(list_empty(&ksm_mm_head.mm_list) &&
+                       list_empty(&ksm_mm_active.mm_list));
 }
 
 static int ksm_scan_thread(void *nothing)
@@ -1718,8 +1862,14 @@ static int ksm_scan_thread(void *nothing)
        while (!kthread_should_stop()) {
                mutex_lock(&ksm_thread_mutex);
                wait_while_offlining();
-               if (ksmd_should_run())
+               if (ksmd_should_run() && !ksm_defered()) {
+                       ksm_merged_or_unstable = false;
                        ksm_do_scan(ksm_thread_pages_to_scan);
+                       if (ksm_merged_or_unstable)
+                               reset_ksm_defer();
+                       else
+                               defer_ksm();
+               }
                mutex_unlock(&ksm_thread_mutex);
 
                try_to_freeze();
@@ -1739,6 +1889,8 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long 
start,
                unsigned long end, int advice, unsigned long *vm_flags)
 {
        struct mm_struct *mm = vma->vm_mm;
+       unsigned long vma_length =
+                       (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
        int err;
 
        switch (advice) {
@@ -1761,8 +1913,19 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned 
long start,
                        if (err)
                                return err;
                }
-
-               *vm_flags |= VM_MERGEABLE;
+               /*
+               * Since hugetlb vma is not supported by ksm
+               * use VM_HUGETLB to indicate new mergeable vma
+               */
+               *vm_flags |= VM_MERGEABLE | VM_HUGETLB;
+               if (vma_length > ksm_thread_pages_to_scan) {
+                       struct mm_slot *mm_slot;
+
+                       spin_lock(&ksm_mmlist_lock);
+                       mm_slot = get_mm_slot(mm);
+                       move_to_active_list(mm_slot);
+                       spin_unlock(&ksm_mmlist_lock);
+               }
                break;
 
        case MADV_UNMERGEABLE:
@@ -1775,7 +1938,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long 
start,
                                return err;
                }
 
-               *vm_flags &= ~VM_MERGEABLE;
+               *vm_flags &= ~(VM_MERGEABLE | VM_HUGETLB);
                break;
        }
 
@@ -1792,7 +1955,8 @@ int __ksm_enter(struct mm_struct *mm)
                return -ENOMEM;
 
        /* Check ksm_run too?  Would need tighter locking */
-       needs_wakeup = list_empty(&ksm_mm_head.mm_list);
+       needs_wakeup = list_empty(&ksm_mm_head.mm_list) &&
+                       list_empty(&ksm_mm_active.mm_list);
 
        spin_lock(&ksm_mmlist_lock);
        insert_to_mm_slots_hash(mm, mm_slot);
@@ -1806,10 +1970,8 @@ int __ksm_enter(struct mm_struct *mm)
         * scanning cursor, otherwise KSM pages in newly forked mms will be
         * missed: then we might as well insert at the end of the list.
         */
-       if (ksm_run & KSM_RUN_UNMERGE)
-               list_add_tail(&mm_slot->mm_list, &ksm_mm_head.mm_list);
-       else
-               list_add_tail(&mm_slot->mm_list, &ksm_scan.mm_slot->mm_list);
+       list_add_tail(&mm_slot->mm_list, &ksm_scan.active_slot->mm_list);
+       mm_slot->seqnr |= (ACTIVE_SLOT_FLAG | ACTIVE_SLOT_SEQNR);
        spin_unlock(&ksm_mmlist_lock);
 
        set_bit(MMF_VM_MERGEABLE, &mm->flags);
@@ -1823,7 +1985,7 @@ int __ksm_enter(struct mm_struct *mm)
 
 void __ksm_exit(struct mm_struct *mm)
 {
-       struct mm_slot *mm_slot;
+       struct mm_slot *mm_slot, *current_slot;
        int easy_to_free = 0;
 
        /*
@@ -1837,14 +1999,28 @@ void __ksm_exit(struct mm_struct *mm)
 
        spin_lock(&ksm_mmlist_lock);
        mm_slot = get_mm_slot(mm);
-       if (mm_slot && ksm_scan.mm_slot != mm_slot) {
+       if (ksm_scan.active_slot != &ksm_mm_active)
+               current_slot = ksm_scan.active_slot;
+       else
+               current_slot = ksm_scan.mm_slot;
+       if (mm_slot && mm_slot != current_slot) {
                if (!mm_slot->rmap_list) {
                        hash_del(&mm_slot->link);
                        list_del(&mm_slot->mm_list);
                        easy_to_free = 1;
                } else {
-                       list_move(&mm_slot->mm_list,
-                                 &ksm_scan.mm_slot->mm_list);
+                       if (mm_slot == ksm_scan.mm_slot)
+                               ksm_scan.mm_slot =
+                                       list_entry(mm_slot->mm_list.next,
+                                               struct mm_slot, mm_list);
+                       if (ksm_run & KSM_RUN_UNMERGE)
+                               list_move(&mm_slot->mm_list,
+                                       &ksm_scan.mm_slot->mm_list);
+                       else {
+                               list_move(&mm_slot->mm_list,
+                                       &ksm_scan.active_slot->mm_list);
+                               mm_slot->seqnr |= ACTIVE_SLOT_FLAG;
+                       }
                }
        }
        spin_unlock(&ksm_mmlist_lock);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH 1/6] mm: add defer mechanism to ksm to make it more suitable

Reply via email to