Two new sysfs files are added to demote hugtlb pages.  These files are
both per-hugetlb page size and per node.  Files are:
  demote_size - The size in Kb that pages are demoted to.
  demote - The number of huge pages to demote.

Writing a value to demote will result in an attempt to demote that
number of hugetlb pages to an appropriate number of demote_size pages.

This patch does not provide full demote functionality.  It only provides
the sysfs interfaces and uses existing code to free pages to the buddy
allocator is demote_size == PAGESIZE.

Signed-off-by: Mike Kravetz <mike.krav...@oracle.com>
---
 include/linux/hugetlb.h |   1 +
 mm/hugetlb.c            | 117 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index cccd1aab69dd..5e9d6c8ab411 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -558,6 +558,7 @@ struct hstate {
        int next_nid_to_alloc;
        int next_nid_to_free;
        unsigned int order;
+       unsigned int demote_order;
        unsigned long mask;
        unsigned long max_huge_pages;
        unsigned long nr_huge_pages;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8fb42c6dd74b..161732ba7aaf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2492,7 +2492,7 @@ static void __init hugetlb_hstate_alloc_pages(struct 
hstate *h)
 
 static void __init hugetlb_init_hstates(void)
 {
-       struct hstate *h;
+       struct hstate *h, *h2;
 
        for_each_hstate(h) {
                if (minimum_order > huge_page_order(h))
@@ -2501,6 +2501,17 @@ static void __init hugetlb_init_hstates(void)
                /* oversize hugepages were init'ed in early boot */
                if (!hstate_is_gigantic(h))
                        hugetlb_hstate_alloc_pages(h);
+
+               /*
+                * Set demote order for each hstate.  Note that
+                * h->demote_order is initially 0.
+                */
+               for_each_hstate(h2) {
+                       if (h2 == h)
+                               continue;
+                       if (h2->order < h->order && h2->order > h->demote_order)
+                               h->demote_order = h2->order;
+               }
        }
        VM_BUG_ON(minimum_order == UINT_MAX);
 }
@@ -2710,6 +2721,20 @@ static int set_max_huge_pages(struct hstate *h, unsigned 
long count, int nid,
        return 0;
 }
 
+static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+{
+       int rc = 0;
+
+       /* If no demote order, free to buddy */
+       if (!h->demote_order)
+               return free_pool_huge_page(h, nodes_allowed, 0);
+
+       /*
+        * TODO - demote fucntionality will be added in subsequent patch
+        */
+       return rc;
+}
+
 #define HSTATE_ATTR_RO(_name) \
        static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 
@@ -2908,12 +2933,100 @@ static ssize_t surplus_hugepages_show(struct kobject 
*kobj,
 }
 HSTATE_ATTR_RO(surplus_hugepages);
 
+static ssize_t demote_show(struct kobject *kobj,
+                                      struct kobj_attribute *attr, char *buf)
+{
+       return sysfs_emit(buf, "0\n");
+}
+
+static ssize_t demote_store_action(struct kobject *kobj, const char *buf,
+                                                       size_t len)
+{
+       unsigned long nr_demote;
+       unsigned long nr_available;
+       nodemask_t nodes_allowed, *n_mask;
+       struct hstate *h;
+       int err;
+       int nid;
+
+       err = kstrtoul(buf, 10, &nr_demote);
+       if (err)
+               return err;
+       h = kobj_to_hstate(kobj, &nid);
+
+       spin_lock(&hugetlb_lock);
+       if (nid != NUMA_NO_NODE) {
+               nr_available = h->free_huge_pages_node[nid];
+               init_nodemask_of_node(&nodes_allowed, nid);
+               n_mask = &nodes_allowed;
+       } else {
+               nr_available = h->free_huge_pages;
+               n_mask = &node_states[N_MEMORY];
+       }
+       nr_available -= h->resv_huge_pages;
+       if (nr_available <= 0)
+               goto out;
+       nr_demote = min(nr_available, nr_demote);
+
+       while (nr_demote) {
+               if (!demote_pool_huge_page(h, n_mask))
+                       break;
+
+               cond_resched_lock(&hugetlb_lock);
+               /*
+                * We may have dropped the lock above or in the routines to
+                * demote/free a page.  Recompute nr_demote as counts could
+                * have changed and we want to make sure we do not demote
+                * a reserved huge page.
+                */
+               nr_demote--;
+               if (nid != NUMA_NO_NODE)
+                       nr_available = h->free_huge_pages_node[nid];
+               else
+                       nr_available = h->free_huge_pages;
+               nr_available -= h->resv_huge_pages;
+               if (nr_available <= 0)
+                       nr_demote = 0;
+               else
+                       nr_demote = min(nr_available, nr_demote);
+       }
+
+out:
+       spin_unlock(&hugetlb_lock);
+
+       return len;
+}
+
+static ssize_t demote_store(struct kobject *kobj,
+              struct kobj_attribute *attr, const char *buf, size_t len)
+{
+       return demote_store_action(kobj, buf, len);
+}
+HSTATE_ATTR(demote);
+
+static ssize_t demote_size_show(struct kobject *kobj,
+                                       struct kobj_attribute *attr, char *buf)
+{
+       struct hstate *h;
+       unsigned long demote_size;
+       int nid;
+
+       h = kobj_to_hstate(kobj, &nid);
+       demote_size = h->demote_order;
+
+       return sysfs_emit(buf, "%lukB\n",
+                       (unsigned long)(PAGE_SIZE << h->demote_order) / SZ_1K);
+}
+HSTATE_ATTR_RO(demote_size);
+
 static struct attribute *hstate_attrs[] = {
        &nr_hugepages_attr.attr,
        &nr_overcommit_hugepages_attr.attr,
        &free_hugepages_attr.attr,
        &resv_hugepages_attr.attr,
        &surplus_hugepages_attr.attr,
+       &demote_size_attr.attr,
+       &demote_attr.attr,
 #ifdef CONFIG_NUMA
        &nr_hugepages_mempolicy_attr.attr,
 #endif
@@ -2983,6 +3096,8 @@ static struct attribute *per_node_hstate_attrs[] = {
        &nr_hugepages_attr.attr,
        &free_hugepages_attr.attr,
        &surplus_hugepages_attr.attr,
+       &demote_size_attr.attr,
+       &demote_attr.attr,
        NULL,
 };
 
-- 
2.29.2

Reply via email to