the core of this patch series.
add /dev/mem_notify device for notification low memory to user process.

<usage examle>

        fd = open("/dev/mem_notify", O_RDONLY);
        if (fd < 0) {
                exit(1);
        }
        pollfds.fd = fd;
        pollfds.events = POLLIN;
        pollfds.revents = 0;
        err = poll(&pollfds, 1, -1); // wake up at low memory

        ...
</usage example>

ChangeLog
     v5 -> v6:
         o improve number of wakeup tasks fomula when task is a few.



Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]>

---
 Documentation/devices.txt  |    1
 drivers/char/mem.c         |    5 +
 include/linux/mem_notify.h |   42 +++++++++++++++
 include/linux/mmzone.h     |    1
 mm/Makefile                |    2
 mm/mem_notify.c            |  123 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c            |    1
 7 files changed, 174 insertions(+), 1 deletion(-)

Index: b/drivers/char/mem.c
===================================================================
--- a/drivers/char/mem.c        2008-02-03 20:59:43.000000000 +0900
+++ b/drivers/char/mem.c        2008-02-03 21:00:24.000000000 +0900
@@ -26,6 +26,7 @@
 #include <linux/bootmem.h>
 #include <linux/splice.h>
 #include <linux/pfn.h>
+#include <linux/mem_notify.h>

 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -869,6 +870,9 @@ static int memory_open(struct inode * in
                        filp->f_op = &oldmem_fops;
                        break;
 #endif
+               case 13:
+                       filp->f_op = &mem_notify_fops;
+                       break;
                default:
                        return -ENXIO;
        }
@@ -901,6 +905,7 @@ static const struct {
 #ifdef CONFIG_CRASH_DUMP
        {12,"oldmem",    S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
 #endif
+       {13, "mem_notify", S_IRUGO, &mem_notify_fops},
 };

 static struct class *mem_class;
Index: b/include/linux/mem_notify.h
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ b/include/linux/mem_notify.h        2008-02-03 21:01:41.000000000 +0900
@@ -0,0 +1,42 @@
+/*
+ * Notify applications of memory pressure via /dev/mem_notify
+ *
+ * Copyright (C) 2008 Marcelo Tosatti <[EMAIL PROTECTED]>,
+ *                    KOSAKI Motohiro <[EMAIL PROTECTED]>
+ *
+ * Released under the GPL, see the file COPYING for details.
+ */
+
+#ifndef _LINUX_MEM_NOTIFY_H
+#define _LINUX_MEM_NOTIFY_H
+
+#define MEM_NOTIFY_FREQ (HZ/5)
+
+extern atomic_long_t last_mem_notify;
+extern struct file_operations mem_notify_fops;
+
+extern void __memory_pressure_notify(struct zone *zone, int pressure);
+
+static inline void memory_pressure_notify(struct zone *zone, int pressure)
+{
+       unsigned long target;
+       unsigned long pages_high, pages_free, pages_reserve;
+
+       if (pressure) {
+               target = atomic_long_read(&last_mem_notify) + MEM_NOTIFY_FREQ;
+               if (likely(time_before(jiffies, target)))
+                       return;
+
+               pages_high = zone->pages_high;
+               pages_free = zone_page_state(zone, NR_FREE_PAGES);
+               pages_reserve = zone->lowmem_reserve[MAX_NR_ZONES-1];
+               if (unlikely(pages_free > (pages_high+pages_reserve)*2))
+                       return;
+
+       } else if (likely(!zone->mem_notify_status))
+               return;
+
+       __memory_pressure_notify(zone, pressure);
+}
+
+#endif /* _LINUX_MEM_NOTIFY_H */
Index: b/include/linux/mmzone.h
===================================================================
--- a/include/linux/mmzone.h    2008-02-03 20:59:43.000000000 +0900
+++ b/include/linux/mmzone.h    2008-02-03 20:59:46.000000000 +0900
@@ -283,6 +283,7 @@ struct zone {
         */
        int prev_priority;

+       int mem_notify_status;

        ZONE_PADDING(_pad2_)
        /* Rarely used or read-mostly fields */
Index: b/mm/Makefile
===================================================================
--- a/mm/Makefile       2008-02-03 20:59:43.000000000 +0900
+++ b/mm/Makefile       2008-02-03 20:59:46.000000000 +0900
@@ -11,7 +11,7 @@ obj-y                 := bootmem.o filemap.o mempool.o
                           page_alloc.o page-writeback.o pdflush.o \
                           readahead.o swap.o truncate.o vmscan.o \
                           prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-                          page_isolation.o $(mmu-y)
+                          page_isolation.o mem_notify.o $(mmu-y)

 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)   += bounce.o
Index: b/mm/mem_notify.c
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ b/mm/mem_notify.c   2008-02-03 21:02:30.000000000 +0900
@@ -0,0 +1,123 @@
+/*
+ * Notify applications of memory pressure via /dev/mem_notify
+ *
+ * Copyright (C) 2008 Marcelo Tosatti <[EMAIL PROTECTED]>,
+ *                    KOSAKI Motohiro <[EMAIL PROTECTED]>
+ *
+ * Released under the GPL, see the file COPYING for details.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/percpu.h>
+#include <linux/timer.h>
+#include <linux/mem_notify.h>
+
+#include <asm/atomic.h>
+
+#define MAX_PROC_WAKEUP_GUARD  (10*HZ)
+#define MAX_WAKEUP_TASKS (100)
+
+struct mem_notify_file_info {
+       unsigned long last_proc_notify;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(mem_wait);
+static atomic_long_t nr_under_memory_pressure_zones = ATOMIC_LONG_INIT(0);
+static atomic_t nr_watcher_task = ATOMIC_INIT(0);
+
+atomic_long_t last_mem_notify = ATOMIC_LONG_INIT(INITIAL_JIFFIES);
+
+void __memory_pressure_notify(struct zone *zone, int pressure)
+{
+       int nr_wakeup;
+       int flags;
+
+       spin_lock_irqsave(&mem_wait.lock, flags);
+
+       if (pressure != zone->mem_notify_status) {
+               long val = pressure ? 1 : -1;
+               atomic_long_add(val, &nr_under_memory_pressure_zones);
+               zone->mem_notify_status = pressure;
+       }
+
+       if (pressure) {
+               int nr_watcher = atomic_read(&nr_watcher_task);
+
+               atomic_long_set(&last_mem_notify, jiffies);
+               if (!nr_watcher)
+                       goto out;
+
+               nr_wakeup = (nr_watcher >> 4) + 1;
+               if (unlikely(nr_wakeup > MAX_WAKEUP_TASKS))
+                       nr_wakeup = MAX_WAKEUP_TASKS;
+
+               wake_up_locked_nr(&mem_wait, nr_wakeup);
+       }
+out:
+       spin_unlock_irqrestore(&mem_wait.lock, flags);
+}
+
+static int mem_notify_open(struct inode *inode, struct file *file)
+{
+       struct mem_notify_file_info *info;
+       int    err = 0;
+
+       info = kmalloc(sizeof(*info), GFP_KERNEL);
+       if (!info) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       info->last_proc_notify = INITIAL_JIFFIES;
+       file->private_data = info;
+       atomic_inc(&nr_watcher_task);
+out:
+       return err;
+}
+
+static int mem_notify_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+       atomic_dec(&nr_watcher_task);
+       return 0;
+}
+
+static unsigned int mem_notify_poll(struct file *file, poll_table *wait)
+{
+       struct mem_notify_file_info *info = file->private_data;
+       unsigned long now = jiffies;
+       unsigned long timeout;
+       unsigned int retval = 0;
+       unsigned long guard_time;
+
+       poll_wait_exclusive(file, &mem_wait, wait);
+
+       guard_time = min_t(unsigned long,
+                          MEM_NOTIFY_FREQ * atomic_read(&nr_watcher_task),
+                          MAX_PROC_WAKEUP_GUARD);
+       timeout = info->last_proc_notify + guard_time;
+       if (time_before(now, timeout))
+               goto out;
+
+       if (atomic_long_read(&nr_under_memory_pressure_zones) != 0) {
+               info->last_proc_notify = now;
+               retval = POLLIN;
+       }
+
+out:
+       return retval;
+}
+
+struct file_operations mem_notify_fops = {
+       .open = mem_notify_open,
+       .release = mem_notify_release,
+       .poll = mem_notify_poll,
+};
+EXPORT_SYMBOL(mem_notify_fops);
Index: b/mm/page_alloc.c
===================================================================
--- a/mm/page_alloc.c   2008-02-03 20:59:43.000000000 +0900
+++ b/mm/page_alloc.c   2008-02-03 21:01:43.000000000 +0900
@@ -3458,6 +3458,7 @@ static void __meminit free_area_init_cor
                zone->zone_pgdat = pgdat;

                zone->prev_priority = DEF_PRIORITY;
+               zone->mem_notify_status = 0;

                zone_pcp_init(zone);
                INIT_LIST_HEAD(&zone->active_list);
Index: b/Documentation/devices.txt
===================================================================
--- a/Documentation/devices.txt 2008-02-03 20:59:43.000000000 +0900
+++ b/Documentation/devices.txt 2008-02-03 20:59:46.000000000 +0900
@@ -96,6 +96,7 @@ Your cooperation is appreciated.
                 11 = /dev/kmsg         Writes to this come out as printk's
                 12 = /dev/oldmem       Used by crashdump kernels to access
                                        the memory of the kernel that crashed.
+                13 = /dev/mem_notify   Low memory notification.

   1 block      RAM disk
                  0 = /dev/ram0         First RAM disk
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to