Adds another optional mode flag, MPOL_F_RELATIVE_NODES, that specifies
nodemasks passed via set_mempolicy() or mbind() should be considered
relative to the current task's mems_allowed.

When the mempolicy is created, the passed nodemask is folded and mapped
onto the current task's mems_allowed.  For example, consider a task
using set_mempolicy() to pass MPOL_INTERLEAVE | MPOL_F_RELATIVE_NODES
with a nodemask of 1-3.  If current's mems_allowed is 4-7, the effected
nodemask is 5-7 (the second, third, and fourth node of mems_allowed).

If the same task is attached to a cpuset, the mempolicy nodemask is
rebound each time the mems are changed.  Some possible rebinds and
results are:

        mems                    result
        1-3                     1-3
        1-7                     2-4
        1,5-6                   1,5-6
        1,5-7                   5-7

Likewise, the zonelist built for MPOL_BIND acts on the set of zones
assigned to the resultant nodemask from the relative remap.

In the MPOL_PREFERRED case, the preferred node is remapped from the
currently effected nodemask to the relative nodemask.

This mempolicy mode flag was conceived of by Paul Jackson <[EMAIL PROTECTED]>.

Cc: Paul Jackson <[EMAIL PROTECTED]>
Cc: Christoph Lameter <[EMAIL PROTECTED]>
Cc: Lee Schermerhorn <[EMAIL PROTECTED]>
Cc: Andi Kleen <[EMAIL PROTECTED]>
Signed-off-by: David Rientjes <[EMAIL PROTECTED]>
---
 include/linux/mempolicy.h |    3 ++-
 mm/mempolicy.c            |   40 ++++++++++++++++++++++++++++++++++++++--
 mm/shmem.c                |    6 ++++++
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -25,12 +25,13 @@ enum {
 
 /* Flags for set_mempolicy */
 #define MPOL_F_STATIC_NODES    (1 << 15)
+#define MPOL_F_RELATIVE_NODES  (1 << 14)
 
 /*
  * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
  * either set_mempolicy() or mbind().
  */
-#define MPOL_MODE_FLAGS                (MPOL_F_STATIC_NODES)
+#define MPOL_MODE_FLAGS                (MPOL_F_STATIC_NODES | 
MPOL_F_RELATIVE_NODES)
 
 /* Flags for get_mempolicy */
 #define MPOL_F_NODE    (1<<0)  /* return next IL mode instead of node mask */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -152,7 +152,18 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
 
 static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
 {
-       return !!(pol->flags & MPOL_F_STATIC_NODES);
+       return !!(pol->flags & (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES));
+}
+
+static nodemask_t mpol_relative_nodemask(const nodemask_t *orig,
+                                        const nodemask_t *rel)
+{
+       nodemask_t ret;
+       nodemask_t tmp;
+
+       nodes_fold(tmp, *orig, nodes_weight(*rel));
+       nodes_onto(ret, tmp, *rel);
+       return ret;
 }
 
 /* Create a new policy */
@@ -173,7 +184,12 @@ static struct mempolicy *mpol_new(unsigned short mode, 
unsigned short flags,
                return ERR_PTR(-ENOMEM);
        atomic_set(&policy->refcnt, 1);
        cpuset_update_task_memory_state();
-       nodes_and(cpuset_context_nmask, *nodes, cpuset_current_mems_allowed);
+       if (flags & MPOL_F_RELATIVE_NODES)
+               cpuset_context_nmask = mpol_relative_nodemask(nodes,
+                                               &cpuset_current_mems_allowed);
+       else
+               nodes_and(cpuset_context_nmask, *nodes,
+                         cpuset_current_mems_allowed);
        switch (mode) {
        case MPOL_INTERLEAVE:
                if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask))
@@ -898,6 +914,9 @@ asmlinkage long sys_mbind(unsigned long start, unsigned 
long len,
        mode &= ~MPOL_MODE_FLAGS;
        if (mode >= MPOL_MAX)
                return -EINVAL;
+       if ((mode_flags & MPOL_F_STATIC_NODES) &&
+           (mode_flags & MPOL_F_RELATIVE_NODES))
+               return -EINVAL;
        err = get_nodes(&nodes, nmask, maxnode);
        if (err)
                return err;
@@ -916,6 +935,8 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long 
__user *nmask,
        mode &= ~MPOL_MODE_FLAGS;
        if ((unsigned int)mode >= MPOL_MAX)
                return -EINVAL;
+       if ((flags & MPOL_F_STATIC_NODES) && (flags & MPOL_F_RELATIVE_NODES))
+               return -EINVAL;
        err = get_nodes(&nodes, nmask, maxnode);
        if (err)
                return err;
@@ -1747,10 +1768,12 @@ static void mpol_rebind_policy(struct mempolicy *pol,
 {
        nodemask_t tmp;
        int static_nodes;
+       int relative_nodes;
 
        if (!pol)
                return;
        static_nodes = pol->flags & MPOL_F_STATIC_NODES;
+       relative_nodes = pol->flags & MPOL_F_RELATIVE_NODES;
        if (!mpol_store_user_nodemask(pol) &&
            nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
                return;
@@ -1761,6 +1784,9 @@ static void mpol_rebind_policy(struct mempolicy *pol,
        case MPOL_INTERLEAVE:
                if (static_nodes)
                        nodes_and(tmp, pol->w.user_nodemask, *newmask);
+               else if (relative_nodes)
+                       tmp = mpol_relative_nodemask(&pol->w.user_nodemask,
+                                                    newmask);
                else {
                        nodes_remap(tmp, pol->v.nodes,
                                    pol->w.cpuset_mems_allowed, *newmask);
@@ -1783,6 +1809,11 @@ static void mpol_rebind_policy(struct mempolicy *pol,
                                pol->v.preferred_node = node;
                        else
                                pol->v.preferred_node = -1;
+               } else if (relative_nodes) {
+                       tmp = mpol_relative_nodemask(&pol->w.user_nodemask,
+                                                    newmask);
+                       pol->v.preferred_node = 
node_remap(pol->v.preferred_node,
+                                       pol->w.cpuset_mems_allowed, tmp);
                } else {
                        pol->v.preferred_node = 
node_remap(pol->v.preferred_node,
                                        pol->w.cpuset_mems_allowed, *newmask);
@@ -1794,6 +1825,9 @@ static void mpol_rebind_policy(struct mempolicy *pol,
 
                if (static_nodes)
                        nodes_and(tmp, pol->w.user_nodemask, *newmask);
+               else if (relative_nodes)
+                       tmp = mpol_relative_nodemask(&pol->w.user_nodemask,
+                                                    newmask);
                else {
                        nodemask_t nodes;
                        struct zone **z;
@@ -1911,6 +1945,8 @@ static inline int mpol_to_str(char *buffer, int maxlen, 
struct mempolicy *pol)
 
                if (flags & MPOL_F_STATIC_NODES)
                        p += sprintf(p, "%sstatic", need_bar++ ? "|" : "");
+               if (flags & MPOL_F_RELATIVE_NODES)
+                       p += sprintf(p, "%srelative", need_bar++ ? "|" : "");
        }
 
        if (!nodes_empty(nodes)) {
diff --git a/mm/shmem.c b/mm/shmem.c
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1131,6 +1131,12 @@ static int shmem_parse_mpol(char *value, unsigned short 
*policy,
        if (flags) {
                if (!strcmp(flags, "static"))
                        *mode_flags |= MPOL_F_STATIC_NODES;
+               if (!strcmp(flags, "relative"))
+                       *mode_flags |= MPOL_F_RELATIVE_NODES;
+
+               if ((*mode_flags & MPOL_F_STATIC_NODES) &&
+                   (*mode_flags & MPOL_F_RELATIVE_NODES))
+                       err = 1;
        }
 out:
        /* Restore string for error message */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to