Re: Linux 3.14.21

Greg KH Thu, 09 Oct 2014 14:49:23 -0700
diff --git a/Makefile b/Makefile
index beb7e6f0803b..41e6e19fe2e9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 14
-SUBLEVEL = 20
+SUBLEVEL = 21
 EXTRAVERSION =
 NAME = Remembering Coco
 
diff --git a/arch/unicore32/include/asm/mmu_context.h 
b/arch/unicore32/include/asm/mmu_context.h
index fb5e4c658f7a..ef470a7a3d0f 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -14,6 +14,8 @@
 
 #include <linux/compiler.h>
 #include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/io.h>
 
 #include <asm/cacheflush.h>
@@ -73,7 +75,7 @@ do { \
                else \
                        mm->mmap = NULL; \
                rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
-               mm->mmap_cache = NULL; \
+               vmacache_invalidate(mm); \
                mm->map_count--; \
                remove_vma(high_vma); \
        } \
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index c706d50a8b06..8c16c2f97026 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -525,6 +525,12 @@ void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
        struct task_struct *opa;
 
        kref_get(&tconn->kref);
+       /* We may just have force_sig()'ed this thread
+        * to get it out of some blocking network function.
+        * Clear signals; otherwise kthread_run(), which internally uses
+        * wait_on_completion_killable(), will mistake our pending signal
+        * for a new fatal signal and fail. */
+       flush_signals(current);
        opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
        if (IS_ERR(opa)) {
                conn_err(tconn, "out of mem, failed to invoke fence-peer 
helper\n");
diff --git a/drivers/cpufreq/integrator-cpufreq.c 
b/drivers/cpufreq/integrator-cpufreq.c
index 0e27844e8c2d..8089dd2cd9d8 100644
--- a/drivers/cpufreq/integrator-cpufreq.c
+++ b/drivers/cpufreq/integrator-cpufreq.c
@@ -213,9 +213,9 @@ static int __init integrator_cpufreq_probe(struct 
platform_device *pdev)
        return cpufreq_register_driver(&integrator_driver);
 }
 
-static void __exit integrator_cpufreq_remove(struct platform_device *pdev)
+static int __exit integrator_cpufreq_remove(struct platform_device *pdev)
 {
-       cpufreq_unregister_driver(&integrator_driver);
+       return cpufreq_unregister_driver(&integrator_driver);
 }
 
 static const struct of_device_id integrator_cpufreq_match[] = {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d278be110805..1855cdca39cd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -827,6 +827,16 @@ void i915_check_and_clear_faults(struct drm_device *dev)
        POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
 }
 
+static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
+{
+       if (INTEL_INFO(dev_priv->dev)->gen < 6) {
+               intel_gtt_chipset_flush();
+       } else {
+               I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+               POSTING_READ(GFX_FLSH_CNTL_GEN6);
+       }
+}
+
 void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -843,6 +853,8 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
                                       dev_priv->gtt.base.start / PAGE_SIZE,
                                       dev_priv->gtt.base.total / PAGE_SIZE,
                                       true);
+
+       i915_ggtt_flush(dev_priv);
 }
 
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
@@ -863,7 +875,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
                i915_gem_gtt_bind_object(obj, obj->cache_level);
        }
 
-       i915_gem_chipset_flush(dev);
+       i915_ggtt_flush(dev_priv);
 }
 
 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 18cda77b4f79..4913c0690872 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -64,6 +64,10 @@
 #define cpu_to_group(cpu) cpu_to_node(cpu)
 #define ANY_GROUP NUMA_NO_NODE
 
+static bool devices_handle_discard_safely = false;
+module_param(devices_handle_discard_safely, bool, 0644);
+MODULE_PARM_DESC(devices_handle_discard_safely,
+                "Set to Y if all devices in each array reliably return zeroes 
on reads from discarded regions");
 static struct workqueue_struct *raid5_wq;
 /*
  * Stripe cache
@@ -6117,7 +6121,7 @@ static int run(struct mddev *mddev)
                mddev->queue->limits.discard_granularity = stripe;
                /*
                 * unaligned part of discard request will be ignored, so can't
-                * guarantee discard_zerors_data
+                * guarantee discard_zeroes_data
                 */
                mddev->queue->limits.discard_zeroes_data = 0;
 
@@ -6142,6 +6146,18 @@ static int run(struct mddev *mddev)
                            !bdev_get_queue(rdev->bdev)->
                                                limits.discard_zeroes_data)
                                discard_supported = false;
+                       /* Unfortunately, discard_zeroes_data is not currently
+                        * a guarantee - just a hint.  So we only allow DISCARD
+                        * if the sysadmin has confirmed that only safe devices
+                        * are in use by setting a module parameter.
+                        */
+                       if (!devices_handle_discard_safely) {
+                               if (discard_supported) {
+                                       pr_info("md/raid456: discard support 
disabled due to uncertainty.\n");
+                                       pr_info("Set 
raid456.devices_handle_discard_safely=Y to override.\n");
+                               }
+                               discard_supported = false;
+                       }
                }
 
                if (discard_supported &&
diff --git a/drivers/media/v4l2-core/videobuf2-core.c 
b/drivers/media/v4l2-core/videobuf2-core.c
index a127925c9d61..06faea4d60ee 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -745,6 +745,7 @@ static int __reqbufs(struct vb2_queue *q, struct 
v4l2_requestbuffers *req)
         * to the userspace.
         */
        req->count = allocated_buffers;
+       q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type);
 
        return 0;
 }
@@ -793,6 +794,7 @@ static int __create_bufs(struct vb2_queue *q, struct 
v4l2_create_buffers *create
                memset(q->plane_sizes, 0, sizeof(q->plane_sizes));
                memset(q->alloc_ctx, 0, sizeof(q->alloc_ctx));
                q->memory = create->memory;
+               q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type);
        }
 
        num_buffers = min(create->count, VIDEO_MAX_FRAME - q->num_buffers);
@@ -1447,6 +1449,7 @@ static int vb2_internal_qbuf(struct vb2_queue *q, struct 
v4l2_buffer *b)
         * dequeued in dqbuf.
         */
        list_add_tail(&vb->queued_entry, &q->queued_list);
+       q->waiting_for_buffers = false;
        vb->state = VB2_BUF_STATE_QUEUED;
 
        /*
@@ -1841,6 +1844,7 @@ static int vb2_internal_streamoff(struct vb2_queue *q, 
enum v4l2_buf_type type)
         * and videobuf, effectively returning control over them to userspace.
         */
        __vb2_queue_cancel(q);
+       q->waiting_for_buffers = !V4L2_TYPE_IS_OUTPUT(q->type);
 
        dprintk(3, "Streamoff successful\n");
        return 0;
@@ -2150,9 +2154,16 @@ unsigned int vb2_poll(struct vb2_queue *q, struct file 
*file, poll_table *wait)
        }
 
        /*
-        * There is nothing to wait for if no buffers have already been queued.
+        * There is nothing to wait for if the queue isn't streaming.
         */
-       if (list_empty(&q->queued_list))
+       if (!vb2_is_streaming(q))
+               return res | POLLERR;
+       /*
+        * For compatibility with vb1: if QBUF hasn't been called yet, then
+        * return POLLERR as well. This only affects capture queues, output
+        * queues will always initialize waiting_for_buffers to false.
+        */
+       if (q->waiting_for_buffers)
                return res | POLLERR;
 
        if (list_empty(&q->done_list))
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f15d4353f30f..5d12d69e2045 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -399,6 +399,8 @@ struct smb_version_operations {
                        const struct cifs_fid *, u32 *);
        int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
                        int);
+       /* check if we need to issue closedir */
+       bool (*dir_needs_close)(struct cifsFileInfo *);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8175b18df819..d375322b6cec 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -762,7 +762,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
 
        cifs_dbg(FYI, "Freeing private data in close dir\n");
        spin_lock(&cifs_file_list_lock);
-       if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+       if (server->ops->dir_needs_close(cfile)) {
                cfile->invalidHandle = true;
                spin_unlock(&cifs_file_list_lock);
                if (server->ops->close_dir)
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 2bbf11b09214..b334a89d6a66 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -593,7 +593,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon 
*tcon, loff_t pos,
                /* close and restart search */
                cifs_dbg(FYI, "search backing up - close and restart search\n");
                spin_lock(&cifs_file_list_lock);
-               if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+               if (server->ops->dir_needs_close(cfile)) {
                        cfile->invalidHandle = true;
                        spin_unlock(&cifs_file_list_lock);
                        if (server->ops->close_dir)
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d1fdfa848703..e9ad8d37bb00 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -586,7 +586,7 @@ cifs_query_path_info(const unsigned int xid, struct 
cifs_tcon *tcon,
                tmprc = CIFS_open(xid, &oparms, &oplock, NULL);
                if (tmprc == -EOPNOTSUPP)
                        *symlink = true;
-               else
+               else if (tmprc == 0)
                        CIFSSMBClose(xid, tcon, fid.netfid);
        }
 
@@ -1009,6 +1009,12 @@ cifs_is_read_op(__u32 oplock)
        return oplock == OPLOCK_READ;
 }
 
+static bool
+cifs_dir_needs_close(struct cifsFileInfo *cfile)
+{
+       return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
+}
+
 struct smb_version_operations smb1_operations = {
        .send_cancel = send_nt_cancel,
        .compare_fids = cifs_compare_fids,
@@ -1078,6 +1084,7 @@ struct smb_version_operations smb1_operations = {
        .query_mf_symlink = cifs_query_mf_symlink,
        .create_mf_symlink = cifs_create_mf_symlink,
        .is_read_op = cifs_is_read_op,
+       .dir_needs_close = cifs_dir_needs_close,
 #ifdef CONFIG_CIFS_XATTR
        .query_all_EAs = CIFSSMBQAllEAs,
        .set_EA = CIFSSMBSetEA,
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index e31a9dfdcd39..a491814cb2c0 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -214,7 +214,7 @@ static const struct status_to_posix_error 
smb2_error_map_table[] = {
        {STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"},
        {STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"},
        {STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"},
-       {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"},
+       {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"},
        {STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"},
        {STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"},
        {STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"},
@@ -256,6 +256,8 @@ static const struct status_to_posix_error 
smb2_error_map_table[] = {
        {STATUS_DLL_MIGHT_BE_INCOMPATIBLE, -EIO,
        "STATUS_DLL_MIGHT_BE_INCOMPATIBLE"},
        {STATUS_STOPPED_ON_SYMLINK, -EOPNOTSUPP, "STATUS_STOPPED_ON_SYMLINK"},
+       {STATUS_IO_REPARSE_TAG_NOT_HANDLED, -EOPNOTSUPP,
+       "STATUS_REPARSE_NOT_HANDLED"},
        {STATUS_DEVICE_REQUIRES_CLEANING, -EIO,
        "STATUS_DEVICE_REQUIRES_CLEANING"},
        {STATUS_DEVICE_DOOR_OPEN, -EIO, "STATUS_DEVICE_DOOR_OPEN"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f8977b2d9187..34a17d425be6 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1102,6 +1102,12 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch)
        return le32_to_cpu(lc->lcontext.LeaseState);
 }
 
+static bool
+smb2_dir_needs_close(struct cifsFileInfo *cfile)
+{
+       return !cfile->invalidHandle;
+}
+
 struct smb_version_operations smb20_operations = {
        .compare_fids = smb2_compare_fids,
        .setup_request = smb2_setup_request,
@@ -1175,6 +1181,7 @@ struct smb_version_operations smb20_operations = {
        .create_lease_buf = smb2_create_lease_buf,
        .parse_lease_buf = smb2_parse_lease_buf,
        .clone_range = smb2_clone_range,
+       .dir_needs_close = smb2_dir_needs_close,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -1250,6 +1257,7 @@ struct smb_version_operations smb21_operations = {
        .create_lease_buf = smb2_create_lease_buf,
        .parse_lease_buf = smb2_parse_lease_buf,
        .clone_range = smb2_clone_range,
+       .dir_needs_close = smb2_dir_needs_close,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -1328,6 +1336,7 @@ struct smb_version_operations smb30_operations = {
        .parse_lease_buf = smb3_parse_lease_buf,
        .clone_range = smb2_clone_range,
        .validate_negotiate = smb3_validate_negotiate,
+       .dir_needs_close = smb2_dir_needs_close,
 };
 
 struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 9aab8fe0e508..348792911e1f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2136,6 +2136,10 @@ SMB2_query_directory(const unsigned int xid, struct 
cifs_tcon *tcon,
        rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
 
        if (rc) {
+               if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
+                       srch_inf->endOfSearch = true;
+                       rc = 0;
+               }
                cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
                goto qdir_exit;
        }
@@ -2173,11 +2177,6 @@ SMB2_query_directory(const unsigned int xid, struct 
cifs_tcon *tcon,
        else
                cifs_dbg(VFS, "illegal search buffer type\n");
 
-       if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
-               srch_inf->endOfSearch = 1;
-       else
-               srch_inf->endOfSearch = 0;
-
        return rc;
 
 qdir_exit:
diff --git a/fs/exec.c b/fs/exec.c
index 31e46b1b358b..ea4449d0536a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,6 +26,7 @@
 #include <linux/file.h>
 #include <linux/fdtable.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
 #include <linux/swap.h>
@@ -820,7 +821,7 @@ EXPORT_SYMBOL(read_code);
 static int exec_mmap(struct mm_struct *mm)
 {
        struct task_struct *tsk;
-       struct mm_struct * old_mm, *active_mm;
+       struct mm_struct *old_mm, *active_mm;
 
        /* Notify parent that we're no longer interested in the old VM */
        tsk = current;
@@ -846,6 +847,8 @@ static int exec_mmap(struct mm_struct *mm)
        tsk->mm = mm;
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
+       tsk->mm->vmacache_seqnum = 0;
+       vmacache_flush(tsk);
        task_unlock(tsk);
        if (old_mm) {
                up_read(&old_mm->mmap_sem);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d19b30ababf1..a4a8ed56e438 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1017,6 +1017,11 @@ static int __init init_hugetlbfs_fs(void)
        int error;
        int i;
 
+       if (!hugepages_supported()) {
+               pr_info("hugetlbfs: disabling because there are no supported 
hugepage sizes\n");
+               return -ENOTSUPP;
+       }
+
        error = bdi_init(&hugetlbfs_backing_dev_info);
        if (error)
                return error;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8f788193e3d4..c4b2646b6d7c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,4 +1,5 @@
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/hugetlb.h>
 #include <linux/huge_mm.h>
 #include <linux/mount.h>
@@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 
        /*
         * We remember last_addr rather than next_addr to hit with
-        * mmap_cache most of the time. We have zero last_addr at
+        * vmacache most of the time. We have zero last_addr at
         * the beginning and also after lseek. We will have -1 last_addr
         * after the end of the vmas.
         */
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 982ce05c87ed..287cd5f23421 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1271,13 +1271,22 @@ update_time:
        return 0;
 }
 
+/*
+ * Maximum length of linked list formed by ICB hierarchy. The chosen number is
+ * arbitrary - just that we hopefully don't limit any real use of rewritten
+ * inode on write-once media but avoid looping for too long on corrupted media.
+ */
+#define UDF_MAX_ICB_NESTING 1024
+
 static void __udf_read_inode(struct inode *inode)
 {
        struct buffer_head *bh = NULL;
        struct fileEntry *fe;
        uint16_t ident;
        struct udf_inode_info *iinfo = UDF_I(inode);
+       unsigned int indirections = 0;
 
+reread:
        /*
         * Set defaults, but the inode is still incomplete!
         * Note: get_new_inode() sets the following on a new inode:
@@ -1314,28 +1323,26 @@ static void __udf_read_inode(struct inode *inode)
                ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1,
                                        &ident);
                if (ident == TAG_IDENT_IE && ibh) {
-                       struct buffer_head *nbh = NULL;
                        struct kernel_lb_addr loc;
                        struct indirectEntry *ie;
 
                        ie = (struct indirectEntry *)ibh->b_data;
                        loc = lelb_to_cpu(ie->indirectICB.extLocation);
 
-                       if (ie->indirectICB.extLength &&
-                               (nbh = udf_read_ptagged(inode->i_sb, &loc, 0,
-                                                       &ident))) {
-                               if (ident == TAG_IDENT_FE ||
-                                       ident == TAG_IDENT_EFE) {
-                                       memcpy(&iinfo->i_location,
-                                               &loc,
-                                               sizeof(struct kernel_lb_addr));
-                                       brelse(bh);
-                                       brelse(ibh);
-                                       brelse(nbh);
-                                       __udf_read_inode(inode);
+                       if (ie->indirectICB.extLength) {
+                               brelse(bh);
+                               brelse(ibh);
+                               memcpy(&iinfo->i_location, &loc,
+                                      sizeof(struct kernel_lb_addr));
+                               if (++indirections > UDF_MAX_ICB_NESTING) {
+                                       udf_err(inode->i_sb,
+                                               "too many ICBs in ICB hierarchy"
+                                               " (max %d supported)\n",
+                                               UDF_MAX_ICB_NESTING);
+                                       make_bad_inode(inode);
                                        return;
                                }
-                               brelse(nbh);
+                               goto reread;
                        }
                }
                brelse(ibh);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 3fe661fe96d1..b19d3dc2e651 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -87,25 +87,26 @@ extern void rebuild_sched_domains(void);
 extern void cpuset_print_task_mems_allowed(struct task_struct *p);
 
 /*
- * get_mems_allowed is required when making decisions involving mems_allowed
- * such as during page allocation. mems_allowed can be updated in parallel
- * and depending on the new value an operation can fail potentially causing
- * process failure. A retry loop with get_mems_allowed and put_mems_allowed
- * prevents these artificial failures.
+ * read_mems_allowed_begin is required when making decisions involving
+ * mems_allowed such as during page allocation. mems_allowed can be updated in
+ * parallel and depending on the new value an operation can fail potentially
+ * causing process failure. A retry loop with read_mems_allowed_begin and
+ * read_mems_allowed_retry prevents these artificial failures.
  */
-static inline unsigned int get_mems_allowed(void)
+static inline unsigned int read_mems_allowed_begin(void)
 {
        return read_seqcount_begin(&current->mems_allowed_seq);
 }
 
 /*
- * If this returns false, the operation that took place after get_mems_allowed
- * may have failed. It is up to the caller to retry the operation if
+ * If this returns true, the operation that took place after
+ * read_mems_allowed_begin may have failed artificially due to a concurrent
+ * update of mems_allowed. It is up to the caller to retry the operation if
  * appropriate.
  */
-static inline bool put_mems_allowed(unsigned int seq)
+static inline bool read_mems_allowed_retry(unsigned int seq)
 {
-       return !read_seqcount_retry(&current->mems_allowed_seq, seq);
+       return read_seqcount_retry(&current->mems_allowed_seq, seq);
 }
 
 static inline void set_mems_allowed(nodemask_t nodemask)
@@ -225,14 +226,14 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 {
 }
 
-static inline unsigned int get_mems_allowed(void)
+static inline unsigned int read_mems_allowed_begin(void)
 {
        return 0;
 }
 
-static inline bool put_mems_allowed(unsigned int seq)
+static inline bool read_mems_allowed_retry(unsigned int seq)
 {
-       return true;
+       return false;
 }
 
 #endif /* !CONFIG_CPUSETS */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index bd1e9bcec547..42b05c4c53e5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -400,6 +400,16 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate 
*h,
        return &mm->page_table_lock;
 }
 
+static inline bool hugepages_supported(void)
+{
+       /*
+        * Some platform decide whether they support huge pages at boot
+        * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
+        * there is no such support
+        */
+       return HPAGE_SHIFT != 0;
+}
+
 #else  /* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 #define alloc_huge_page_node(h, nid) NULL
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 1f44466c1e9d..c367cbdf73ab 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -258,23 +258,11 @@ extern unsigned long preset_lpj;
 #define SEC_JIFFIE_SC (32 - SHIFT_HZ)
 #endif
 #define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 29)
-#define USEC_JIFFIE_SC (SEC_JIFFIE_SC + 19)
 #define SEC_CONVERSION ((unsigned long)((((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) 
+\
                                 TICK_NSEC -1) / (u64)TICK_NSEC))
 
 #define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\
                                         TICK_NSEC -1) / (u64)TICK_NSEC))
-#define USEC_CONVERSION  \
-                    ((unsigned long)((((u64)NSEC_PER_USEC << USEC_JIFFIE_SC) +\
-                                        TICK_NSEC -1) / (u64)TICK_NSEC))
-/*
- * USEC_ROUND is used in the timeval to jiffie conversion.  See there
- * for more details.  It is the scaled resolution rounding value.  Note
- * that it is a 64-bit value.  Since, when it is applied, we are already
- * in jiffies (albit scaled), it is nothing but the bits we will shift
- * off.
- */
-#define USEC_ROUND (u64)(((u64)1 << USEC_JIFFIE_SC) - 1)
 /*
  * The maximum jiffie value is (MAX_INT >> 1).  Here we translate that
  * into seconds.  The 64-bit case will overflow if we are not careful,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 290901a8c1de..2b58d192ea24 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -342,9 +342,9 @@ struct mm_rss_stat {
 
 struct kioctx_table;
 struct mm_struct {
-       struct vm_area_struct * mmap;           /* list of VMAs */
+       struct vm_area_struct *mmap;            /* list of VMAs */
        struct rb_root mm_rb;
-       struct vm_area_struct * mmap_cache;     /* last find_vma result */
+       u32 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
        unsigned long (*get_unmapped_area) (struct file *filp,
                                unsigned long addr, unsigned long len,
diff --git a/include/linux/plist.h b/include/linux/plist.h
index aa0fb390bd29..8b6c970cff6c 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -98,6 +98,13 @@ struct plist_node {
 }
 
 /**
+ * PLIST_HEAD - declare and init plist_head
+ * @head:      name for struct plist_head variable
+ */
+#define PLIST_HEAD(head) \
+       struct plist_head head = PLIST_HEAD_INIT(head)
+
+/**
  * PLIST_NODE_INIT - static struct plist_node initializer
  * @node:      struct plist_node variable name
  * @__prio:    initial node priority
@@ -134,6 +141,8 @@ static inline void plist_node_init(struct plist_node *node, 
int prio)
 extern void plist_add(struct plist_node *node, struct plist_head *head);
 extern void plist_del(struct plist_node *node, struct plist_head *head);
 
+extern void plist_requeue(struct plist_node *node, struct plist_head *head);
+
 /**
  * plist_for_each - iterate over the plist
  * @pos:       the type * to use as a loop counter
@@ -143,6 +152,16 @@ extern void plist_del(struct plist_node *node, struct 
plist_head *head);
         list_for_each_entry(pos, &(head)->node_list, node_list)
 
 /**
+ * plist_for_each_continue - continue iteration over the plist
+ * @pos:       the type * to use as a loop cursor
+ * @head:      the head for your list
+ *
+ * Continue to iterate over plist, continuing after the current position.
+ */
+#define plist_for_each_continue(pos, head)     \
+        list_for_each_entry_continue(pos, &(head)->node_list, node_list)
+
+/**
  * plist_for_each_safe - iterate safely over a plist of given type
  * @pos:       the type * to use as a loop counter
  * @n: another type * to use as temporary storage
@@ -163,6 +182,18 @@ extern void plist_del(struct plist_node *node, struct 
plist_head *head);
         list_for_each_entry(pos, &(head)->node_list, mem.node_list)
 
 /**
+ * plist_for_each_entry_continue - continue iteration over list of given type
+ * @pos:       the type * to use as a loop cursor
+ * @head:      the head for your list
+ * @m:         the name of the list_struct within the struct
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define plist_for_each_entry_continue(pos, head, m)    \
+       list_for_each_entry_continue(pos, &(head)->node_list, m.node_list)
+
+/**
  * plist_for_each_entry_safe - iterate safely over list of given type
  * @pos:       the type * to use as a loop counter
  * @n:         another type * to use as temporary storage
@@ -229,6 +260,20 @@ static inline int plist_node_empty(const struct plist_node 
*node)
 #endif
 
 /**
+ * plist_next - get the next entry in list
+ * @pos:       the type * to cursor
+ */
+#define plist_next(pos) \
+       list_next_entry(pos, node_list)
+
+/**
+ * plist_prev - get the prev entry in list
+ * @pos:       the type * to cursor
+ */
+#define plist_prev(pos) \
+       list_prev_entry(pos, node_list)
+
+/**
  * plist_first - return the first node (and thus, highest priority)
  * @head:      the &struct plist_head pointer
  *
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ccd0c6f24f2c..d7ca410ace93 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -59,6 +59,10 @@ struct sched_param {
 
 #define SCHED_ATTR_SIZE_VER0   48      /* sizeof first published struct */
 
+#define VMACACHE_BITS 2
+#define VMACACHE_SIZE (1U << VMACACHE_BITS)
+#define VMACACHE_MASK (VMACACHE_SIZE - 1)
+
 /*
  * Extended scheduling parameters data structure.
  *
@@ -1228,6 +1232,9 @@ struct task_struct {
 #ifdef CONFIG_COMPAT_BRK
        unsigned brk_randomized:1;
 #endif
+       /* per-thread vma caching */
+       u32 vmacache_seqnum;
+       struct vm_area_struct *vmacache[VMACACHE_SIZE];
 #if defined(SPLIT_RSS_COUNTING)
        struct task_rss_stat    rss_stat;
 #endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 46ba0c6c219f..789324976801 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,8 +214,9 @@ struct percpu_cluster {
 struct swap_info_struct {
        unsigned long   flags;          /* SWP_USED etc: see above */
        signed short    prio;           /* swap priority of this type */
+       struct plist_node list;         /* entry in swap_active_head */
+       struct plist_node avail_list;   /* entry in swap_avail_head */
        signed char     type;           /* strange name for an index */
-       signed char     next;           /* next type on the swap list */
        unsigned int    max;            /* extent of the swap_map */
        unsigned char *swap_map;        /* vmalloc'ed array of usage counts */
        struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
@@ -255,11 +256,6 @@ struct swap_info_struct {
        struct swap_cluster_info discard_cluster_tail; /* list tail of discard 
clusters */
 };
 
-struct swap_list_t {
-       int head;       /* head of priority-ordered swapfile list */
-       int next;       /* swapfile to be used next */
-};
-
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index e282624e8c10..388293a91e8c 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,7 +6,7 @@
  * want to expose them to the dozens of source files that include swap.h
  */
 extern spinlock_t swap_lock;
-extern struct swap_list_t swap_list;
+extern struct plist_head swap_active_head;
 extern struct swap_info_struct *swap_info[];
 extern int try_to_unuse(unsigned int, bool, unsigned long);
 
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
new file mode 100644
index 000000000000..c3fa0fd43949
--- /dev/null
+++ b/include/linux/vmacache.h
@@ -0,0 +1,38 @@
+#ifndef __LINUX_VMACACHE_H
+#define __LINUX_VMACACHE_H
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+/*
+ * Hash based on the page number. Provides a good hit rate for
+ * workloads with good locality and those with random accesses as well.
+ */
+#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
+
+static inline void vmacache_flush(struct task_struct *tsk)
+{
+       memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
+}
+
+extern void vmacache_flush_all(struct mm_struct *mm);
+extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
+extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
+                                                   unsigned long addr);
+
+#ifndef CONFIG_MMU
+extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
+                                                 unsigned long start,
+                                                 unsigned long end);
+#endif
+
+static inline void vmacache_invalidate(struct mm_struct *mm)
+{
+       mm->vmacache_seqnum++;
+
+       /* deal with overflows */
+       if (unlikely(mm->vmacache_seqnum == 0))
+               vmacache_flush_all(mm);
+}
+
+#endif /* __LINUX_VMACACHE_H */
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index bef53ce555d2..b10682cb138c 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -329,6 +329,9 @@ struct v4l2_fh;
  * @retry_start_streaming: start_streaming() was called, but there were not 
enough
  *             buffers queued. If set, then retry calling start_streaming when
  *             queuing a new buffer.
+ * @waiting_for_buffers: used in poll() to check if vb2 is still waiting for
+ *             buffers. Only set for capture queues if qbuf has not yet been
+ *             called since poll() needs to return POLLERR in that situation.
  * @fileio:    file io emulator internal data, used only if emulator is active
  */
 struct vb2_queue {
@@ -362,6 +365,7 @@ struct vb2_queue {
 
        unsigned int                    streaming:1;
        unsigned int                    retry_start_streaming:1;
+       unsigned int                    waiting_for_buffers:1;
 
        struct vb2_fileio_data          *fileio;
 };
diff --git a/init/Kconfig b/init/Kconfig
index 93c5ef0c5210..8b9521a2d2c1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1389,6 +1389,7 @@ config FUTEX
 
 config HAVE_FUTEX_CMPXCHG
        bool
+       depends on FUTEX
        help
          Architectures should select this if futex_atomic_cmpxchg_inatomic()
          is implemented and always working. This removes a couple of runtime
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6b27e5c0cd86..15b3ea693225 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1022,7 +1022,7 @@ static void cpuset_change_task_nodemask(struct 
task_struct *tsk,
        task_lock(tsk);
        /*
         * Determine if a loop is necessary if another thread is doing
-        * get_mems_allowed().  If at least one node remains unchanged and
+        * read_mems_allowed_begin().  If at least one node remains unchanged 
and
         * tsk does not have a mempolicy, then an empty nodemask will not be
         * possible when mems_allowed is larger than a word.
         */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 334b3980ffc1..8865caec45fb 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -49,6 +49,7 @@
 #include <linux/pid.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/rcupdate.h>
 
 #include <asm/cacheflush.h>
@@ -224,10 +225,17 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
        if (!CACHE_FLUSH_IS_SAFE)
                return;
 
-       if (current->mm && current->mm->mmap_cache) {
-               flush_cache_range(current->mm->mmap_cache,
-                                 addr, addr + BREAK_INSTR_SIZE);
+       if (current->mm) {
+               int i;
+
+               for (i = 0; i < VMACACHE_SIZE; i++) {
+                       if (!current->vmacache[i])
+                               continue;
+                       flush_cache_range(current->vmacache[i],
+                                         addr, addr + BREAK_INSTR_SIZE);
+               }
        }
+
        /* Force flush instruction cache if it was outside the mm */
        flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
 }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3a140ca37777..4ced342f1ba9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7836,8 +7836,10 @@ int perf_event_init_task(struct task_struct *child)
 
        for_each_task_context_nr(ctxn) {
                ret = perf_event_init_context(child, ctxn);
-               if (ret)
+               if (ret) {
+                       perf_event_free_task(child);
                        return ret;
+               }
        }
 
        return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index c44bff8097f5..e2c685396295 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -28,6 +28,8 @@
 #include <linux/mman.h>
 #include <linux/mmu_notifier.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
@@ -363,7 +365,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct 
*oldmm)
 
        mm->locked_vm = 0;
        mm->mmap = NULL;
-       mm->mmap_cache = NULL;
+       mm->vmacache_seqnum = 0;
        mm->map_count = 0;
        cpumask_clear(mm_cpumask(mm));
        mm->mm_rb = RB_ROOT;
@@ -876,6 +878,9 @@ static int copy_mm(unsigned long clone_flags, struct 
task_struct *tsk)
        if (!oldmm)
                return 0;
 
+       /* initialize the new vmacache entries */
+       vmacache_flush(tsk);
+
        if (clone_flags & CLONE_VM) {
                atomic_inc(&oldmm->mm_users);
                mm = oldmm;
@@ -1323,7 +1328,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
                goto bad_fork_cleanup_policy;
        retval = audit_alloc(p);
        if (retval)
-               goto bad_fork_cleanup_policy;
+               goto bad_fork_cleanup_perf;
        /* copy all the process information */
        retval = copy_semundo(clone_flags, p);
        if (retval)
@@ -1522,8 +1527,9 @@ bad_fork_cleanup_semundo:
        exit_sem(p);
 bad_fork_cleanup_audit:
        audit_free(p);
-bad_fork_cleanup_policy:
+bad_fork_cleanup_perf:
        perf_event_free_task(p);
+bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
        mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
diff --git a/kernel/time.c b/kernel/time.c
index 7c7964c33ae7..3c49ab45f822 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -496,17 +496,20 @@ EXPORT_SYMBOL(usecs_to_jiffies);
  * that a remainder subtract here would not do the right thing as the
  * resolution values don't fall on second boundries.  I.e. the line:
  * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
+ * Note that due to the small error in the multiplier here, this
+ * rounding is incorrect for sufficiently large values of tv_nsec, but
+ * well formed timespecs should have tv_nsec < NSEC_PER_SEC, so we're
+ * OK.
  *
  * Rather, we just shift the bits off the right.
  *
  * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
  * value to a scaled second value.
  */
-unsigned long
-timespec_to_jiffies(const struct timespec *value)
+static unsigned long
+__timespec_to_jiffies(unsigned long sec, long nsec)
 {
-       unsigned long sec = value->tv_sec;
-       long nsec = value->tv_nsec + TICK_NSEC - 1;
+       nsec = nsec + TICK_NSEC - 1;
 
        if (sec >= MAX_SEC_IN_JIFFIES){
                sec = MAX_SEC_IN_JIFFIES;
@@ -517,6 +520,13 @@ timespec_to_jiffies(const struct timespec *value)
                 (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
 
 }
+
+unsigned long
+timespec_to_jiffies(const struct timespec *value)
+{
+       return __timespec_to_jiffies(value->tv_sec, value->tv_nsec);
+}
+
 EXPORT_SYMBOL(timespec_to_jiffies);
 
 void
@@ -533,31 +543,27 @@ jiffies_to_timespec(const unsigned long jiffies, struct 
timespec *value)
 }
 EXPORT_SYMBOL(jiffies_to_timespec);
 
-/* Same for "timeval"
+/*
+ * We could use a similar algorithm to timespec_to_jiffies (with a
+ * different multiplier for usec instead of nsec). But this has a
+ * problem with rounding: we can't exactly add TICK_NSEC - 1 to the
+ * usec value, since it's not necessarily integral.
  *
- * Well, almost.  The problem here is that the real system resolution is
- * in nanoseconds and the value being converted is in micro seconds.
- * Also for some machines (those that use HZ = 1024, in-particular),
- * there is a LARGE error in the tick size in microseconds.
-
- * The solution we use is to do the rounding AFTER we convert the
- * microsecond part.  Thus the USEC_ROUND, the bits to be shifted off.
- * Instruction wise, this should cost only an additional add with carry
- * instruction above the way it was done above.
+ * We could instead round in the intermediate scaled representation
+ * (i.e. in units of 1/2^(large scale) jiffies) but that's also
+ * perilous: the scaling introduces a small positive error, which
+ * combined with a division-rounding-upward (i.e. adding 2^(scale) - 1
+ * units to the intermediate before shifting) leads to accidental
+ * overflow and overestimates.
+ *
+ * At the cost of one additional multiplication by a constant, just
+ * use the timespec implementation.
  */
 unsigned long
 timeval_to_jiffies(const struct timeval *value)
 {
-       unsigned long sec = value->tv_sec;
-       long usec = value->tv_usec;
-
-       if (sec >= MAX_SEC_IN_JIFFIES){
-               sec = MAX_SEC_IN_JIFFIES;
-               usec = 0;
-       }
-       return (((u64)sec * SEC_CONVERSION) +
-               (((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
-                (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+       return __timespec_to_jiffies(value->tv_sec,
+                                    value->tv_usec * NSEC_PER_USEC);
 }
 EXPORT_SYMBOL(timeval_to_jiffies);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 773aba836e81..774a0807fe81 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3372,7 +3372,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
        iter->head = cpu_buffer->reader_page->read;
 
        iter->cache_reader_page = iter->head_page;
-       iter->cache_read = iter->head;
+       iter->cache_read = cpu_buffer->read;
 
        if (iter->head)
                iter->read_stamp = cpu_buffer->read_stamp;
diff --git a/lib/plist.c b/lib/plist.c
index 1ebc95f7a46f..0f2084d30798 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -134,6 +134,46 @@ void plist_del(struct plist_node *node, struct plist_head 
*head)
        plist_check_head(head);
 }
 
+/**
+ * plist_requeue - Requeue @node at end of same-prio entries.
+ *
+ * This is essentially an optimized plist_del() followed by
+ * plist_add().  It moves an entry already in the plist to
+ * after any other same-priority entries.
+ *
+ * @node:      &struct plist_node pointer - entry to be moved
+ * @head:      &struct plist_head pointer - list head
+ */
+void plist_requeue(struct plist_node *node, struct plist_head *head)
+{
+       struct plist_node *iter;
+       struct list_head *node_next = &head->node_list;
+
+       plist_check_head(head);
+       BUG_ON(plist_head_empty(head));
+       BUG_ON(plist_node_empty(node));
+
+       if (node == plist_last(head))
+               return;
+
+       iter = plist_next(node);
+
+       if (node->prio != iter->prio)
+               return;
+
+       plist_del(node, head);
+
+       plist_for_each_continue(iter, head) {
+               if (node->prio != iter->prio) {
+                       node_next = &iter->node_list;
+                       break;
+               }
+       }
+       list_add_tail(&node->node_list, node_next);
+
+       plist_check_head(head);
+}
+
 #ifdef CONFIG_DEBUG_PI_LIST
 #include <linux/sched.h>
 #include <linux/module.h>
@@ -170,6 +210,14 @@ static void __init plist_test_check(int nr_expect)
        BUG_ON(prio_pos->prio_list.next != &first->prio_list);
 }
 
+static void __init plist_test_requeue(struct plist_node *node)
+{
+       plist_requeue(node, &test_head);
+
+       if (node != plist_last(&test_head))
+               BUG_ON(node->prio == plist_next(node)->prio);
+}
+
 static int  __init plist_test(void)
 {
        int nr_expect = 0, i, loop;
@@ -193,6 +241,10 @@ static int  __init plist_test(void)
                        nr_expect--;
                }
                plist_test_check(nr_expect);
+               if (!plist_node_empty(test_node + i)) {
+                       plist_test_requeue(test_node + i);
+                       plist_test_check(nr_expect);
+               }
        }
 
        for (i = 0; i < ARRAY_SIZE(test_node); i++) {
diff --git a/mm/Makefile b/mm/Makefile
index 310c90a09264..c561f1f6bca0 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o 
fadvise.o \
                           readahead.o swap.o truncate.o vmscan.o shmem.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
-                          compaction.o balloon_compaction.o \
+                          compaction.o balloon_compaction.o vmacache.o \
                           interval_tree.o list_lru.o $(mmu-y)
 
 obj-y += init-mm.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 5f702ef0a65f..5e38e5706f62 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -217,21 +217,12 @@ static inline bool compact_trylock_irqsave(spinlock_t 
*lock,
 /* Returns true if the page is within a block suitable for migration to */
 static bool suitable_migration_target(struct page *page)
 {
-       int migratetype = get_pageblock_migratetype(page);
-
-       /* Don't interfere with memory hot-remove or the min_free_kbytes blocks 
*/
-       if (migratetype == MIGRATE_RESERVE)
-               return false;
-
-       if (is_migrate_isolate(migratetype))
-               return false;
-
-       /* If the page is a large free page, then allow migration */
+       /* If the page is a large free page, then disallow migration */
        if (PageBuddy(page) && page_order(page) >= pageblock_order)
-               return true;
+               return false;
 
        /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
-       if (migrate_async_suitable(migratetype))
+       if (migrate_async_suitable(get_pageblock_migratetype(page)))
                return true;
 
        /* Otherwise skip the block */
@@ -253,6 +244,7 @@ static unsigned long isolate_freepages_block(struct 
compact_control *cc,
        struct page *cursor, *valid_page = NULL;
        unsigned long flags;
        bool locked = false;
+       bool checked_pageblock = false;
 
        cursor = pfn_to_page(blockpfn);
 
@@ -284,8 +276,16 @@ static unsigned long isolate_freepages_block(struct 
compact_control *cc,
                        break;
 
                /* Recheck this is a suitable migration target under lock */
-               if (!strict && !suitable_migration_target(page))
-                       break;
+               if (!strict && !checked_pageblock) {
+                       /*
+                        * We need to check suitability of pageblock only once
+                        * and this isolate_freepages_block() is called with
+                        * pageblock range, so just check once is sufficient.
+                        */
+                       checked_pageblock = true;
+                       if (!suitable_migration_target(page))
+                               break;
+               }
 
                /* Recheck this is a buddy page under lock */
                if (!PageBuddy(page))
@@ -460,12 +460,13 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
        unsigned long last_pageblock_nr = 0, pageblock_nr;
        unsigned long nr_scanned = 0, nr_isolated = 0;
        struct list_head *migratelist = &cc->migratepages;
-       isolate_mode_t mode = 0;
        struct lruvec *lruvec;
        unsigned long flags;
        bool locked = false;
        struct page *page = NULL, *valid_page = NULL;
        bool skipped_async_unsuitable = false;
+       const isolate_mode_t mode = (!cc->sync ? ISOLATE_ASYNC_MIGRATE : 0) |
+                                   (unevictable ? ISOLATE_UNEVICTABLE : 0);
 
        /*
         * Ensure that there are not too many pages isolated from the LRU
@@ -487,7 +488,7 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
        cond_resched();
        for (; low_pfn < end_pfn; low_pfn++) {
                /* give a chance to irqs before checking need_resched() */
-               if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
+               if (locked && !(low_pfn % SWAP_CLUSTER_MAX)) {
                        if (should_release_lock(&zone->lru_lock)) {
                                spin_unlock_irqrestore(&zone->lru_lock, flags);
                                locked = false;
@@ -526,8 +527,25 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
 
                /* If isolation recently failed, do not retry */
                pageblock_nr = low_pfn >> pageblock_order;
-               if (!isolation_suitable(cc, page))
-                       goto next_pageblock;
+               if (last_pageblock_nr != pageblock_nr) {
+                       int mt;
+
+                       last_pageblock_nr = pageblock_nr;
+                       if (!isolation_suitable(cc, page))
+                               goto next_pageblock;
+
+                       /*
+                        * For async migration, also only scan in MOVABLE
+                        * blocks. Async migration is optimistic to see if
+                        * the minimum amount of work satisfies the allocation
+                        */
+                       mt = get_pageblock_migratetype(page);
+                       if (!cc->sync && !migrate_async_suitable(mt)) {
+                               cc->finished_update_migrate = true;
+                               skipped_async_unsuitable = true;
+                               goto next_pageblock;
+                       }
+               }
 
                /*
                 * Skip if free. page_order cannot be used without zone->lock
@@ -537,18 +555,6 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
                        continue;
 
                /*
-                * For async migration, also only scan in MOVABLE blocks. Async
-                * migration is optimistic to see if the minimum amount of work
-                * satisfies the allocation
-                */
-               if (!cc->sync && last_pageblock_nr != pageblock_nr &&
-                   !migrate_async_suitable(get_pageblock_migratetype(page))) {
-                       cc->finished_update_migrate = true;
-                       skipped_async_unsuitable = true;
-                       goto next_pageblock;
-               }
-
-               /*
                 * Check may be lockless but that's ok as we recheck later.
                 * It's possible to migrate LRU pages and balloon pages
                 * Skip any other type of page
@@ -557,11 +563,7 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
                        if (unlikely(balloon_page_movable(page))) {
                                if (locked && balloon_page_isolate(page)) {
                                        /* Successfully isolated */
-                                       cc->finished_update_migrate = true;
-                                       list_add(&page->lru, migratelist);
-                                       cc->nr_migratepages++;
-                                       nr_isolated++;
-                                       goto check_compact_cluster;
+                                       goto isolate_success;
                                }
                        }
                        continue;
@@ -584,6 +586,15 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
                        continue;
                }
 
+               /*
+                * Migration will fail if an anonymous page is pinned in memory,
+                * so avoid taking lru_lock and isolating it unnecessarily in an
+                * admittedly racy check.
+                */
+               if (!page_mapping(page) &&
+                   page_count(page) > page_mapcount(page))
+                       continue;
+
                /* Check if it is ok to still hold the lock */
                locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
                                                                locked, cc);
@@ -598,12 +609,6 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
                        continue;
                }
 
-               if (!cc->sync)
-                       mode |= ISOLATE_ASYNC_MIGRATE;
-
-               if (unevictable)
-                       mode |= ISOLATE_UNEVICTABLE;
-
                lruvec = mem_cgroup_page_lruvec(page, zone);
 
                /* Try isolate the page */
@@ -613,13 +618,14 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
                VM_BUG_ON_PAGE(PageTransCompound(page), page);
 
                /* Successfully isolated */
-               cc->finished_update_migrate = true;
                del_page_from_lru_list(page, lruvec, page_lru(page));
+
+isolate_success:
+               cc->finished_update_migrate = true;
                list_add(&page->lru, migratelist);
                cc->nr_migratepages++;
                nr_isolated++;
 
-check_compact_cluster:
                /* Avoid isolating too much */
                if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
                        ++low_pfn;
@@ -630,7 +636,6 @@ check_compact_cluster:
 
 next_pageblock:
                low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1;
-               last_pageblock_nr = pageblock_nr;
        }
 
        acct_isolated(zone, locked, cc);
@@ -1188,6 +1193,7 @@ static void compact_node(int nid)
        struct compact_control cc = {
                .order = -1,
                .sync = true,
+               .ignore_skip_hint = true,
        };
 
        __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/filemap.c b/mm/filemap.c
index 7a13f6ac5421..c2cc7c95eff1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -192,9 +192,11 @@ static int filemap_check_errors(struct address_space 
*mapping)
 {
        int ret = 0;
        /* Check for outstanding write errors */
-       if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
+       if (test_bit(AS_ENOSPC, &mapping->flags) &&
+           test_and_clear_bit(AS_ENOSPC, &mapping->flags))
                ret = -ENOSPC;
-       if (test_and_clear_bit(AS_EIO, &mapping->flags))
+       if (test_bit(AS_EIO, &mapping->flags) &&
+           test_and_clear_bit(AS_EIO, &mapping->flags))
                ret = -EIO;
        return ret;
 }
@@ -520,10 +522,10 @@ struct page *__page_cache_alloc(gfp_t gfp)
        if (cpuset_do_page_mem_spread()) {
                unsigned int cpuset_mems_cookie;
                do {
-                       cpuset_mems_cookie = get_mems_allowed();
+                       cpuset_mems_cookie = read_mems_allowed_begin();
                        n = cpuset_mem_spread_node();
                        page = alloc_pages_exact_node(n, gfp, 0);
-               } while (!put_mems_allowed(cpuset_mems_cookie) && !page);
+               } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
 
                return page;
        }
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 1b24bdcb3197..c30eec536f03 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -327,15 +327,12 @@ EXPORT_SYMBOL(__frontswap_invalidate_area);
 
 static unsigned long __frontswap_curr_pages(void)
 {
-       int type;
        unsigned long totalpages = 0;
        struct swap_info_struct *si = NULL;
 
        assert_spin_locked(&swap_lock);
-       for (type = swap_list.head; type >= 0; type = si->next) {
-               si = swap_info[type];
+       plist_for_each_entry(si, &swap_active_head, list)
                totalpages += atomic_read(&si->frontswap_pages);
-       }
        return totalpages;
 }
 
@@ -347,11 +344,9 @@ static int __frontswap_unuse_pages(unsigned long total, 
unsigned long *unused,
        int si_frontswap_pages;
        unsigned long total_pages_to_unuse = total;
        unsigned long pages = 0, pages_to_unuse = 0;
-       int type;
 
        assert_spin_locked(&swap_lock);
-       for (type = swap_list.head; type >= 0; type = si->next) {
-               si = swap_info[type];
+       plist_for_each_entry(si, &swap_active_head, list) {
                si_frontswap_pages = atomic_read(&si->frontswap_pages);
                if (total_pages_to_unuse < si_frontswap_pages) {
                        pages = pages_to_unuse = total_pages_to_unuse;
@@ -366,7 +361,7 @@ static int __frontswap_unuse_pages(unsigned long total, 
unsigned long *unused,
                }
                vm_unacct_memory(pages);
                *unused = pages_to_unuse;
-               *swapid = type;
+               *swapid = si->type;
                ret = 0;
                break;
        }
@@ -413,7 +408,7 @@ void frontswap_shrink(unsigned long target_pages)
        /*
         * we don't want to hold swap_lock while doing a very
         * lengthy try_to_unuse, but swap_list may change
-        * so restart scan from swap_list.head each time
+        * so restart scan from swap_active_head each time
         */
        spin_lock(&swap_lock);
        ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1c42d0c36d0b..718bfa16a36f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1819,21 +1819,24 @@ static int __split_huge_page_map(struct page *page,
        if (pmd) {
                pgtable = pgtable_trans_huge_withdraw(mm, pmd);
                pmd_populate(mm, &_pmd, pgtable);
+               if (pmd_write(*pmd))
+                       BUG_ON(page_mapcount(page) != 1);
 
                haddr = address;
                for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
                        pte_t *pte, entry;
                        BUG_ON(PageCompound(page+i));
+                       /*
+                        * Note that pmd_numa is not transferred deliberately
+                        * to avoid any possibility that pte_numa leaks to
+                        * a PROT_NONE VMA by accident.
+                        */
                        entry = mk_pte(page + i, vma->vm_page_prot);
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                        if (!pmd_write(*pmd))
                                entry = pte_wrprotect(entry);
-                       else
-                               BUG_ON(page_mapcount(page) != 1);
                        if (!pmd_young(*pmd))
                                entry = pte_mkold(entry);
-                       if (pmd_numa(*pmd))
-                               entry = pte_mknuma(entry);
                        pte = pte_offset_map(&_pmd, haddr);
                        BUG_ON(!pte_none(*pte));
                        set_pte_at(mm, haddr, pte, entry);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 923f38e62bcf..67d0c175efcf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -540,7 +540,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
                goto err;
 
 retry_cpuset:
-       cpuset_mems_cookie = get_mems_allowed();
+       cpuset_mems_cookie = read_mems_allowed_begin();
        zonelist = huge_zonelist(vma, address,
                                        htlb_alloc_mask(h), &mpol, &nodemask);
 
@@ -562,7 +562,7 @@ retry_cpuset:
        }
 
        mpol_cond_put(mpol);
-       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+       if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
                goto retry_cpuset;
        return page;
 
@@ -2071,6 +2071,9 @@ static int hugetlb_sysctl_handler_common(bool 
obey_mempolicy,
        unsigned long tmp;
        int ret;
 
+       if (!hugepages_supported())
+               return -ENOTSUPP;
+
        tmp = h->max_huge_pages;
 
        if (write && h->order >= MAX_ORDER)
@@ -2124,6 +2127,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, 
int write,
        unsigned long tmp;
        int ret;
 
+       if (!hugepages_supported())
+               return -ENOTSUPP;
+
        tmp = h->nr_overcommit_huge_pages;
 
        if (write && h->order >= MAX_ORDER)
@@ -2149,6 +2155,8 @@ out:
 void hugetlb_report_meminfo(struct seq_file *m)
 {
        struct hstate *h = &default_hstate;
+       if (!hugepages_supported())
+               return;
        seq_printf(m,
                        "HugePages_Total:   %5lu\n"
                        "HugePages_Free:    %5lu\n"
@@ -2165,6 +2173,8 @@ void hugetlb_report_meminfo(struct seq_file *m)
 int hugetlb_report_node_meminfo(int nid, char *buf)
 {
        struct hstate *h = &default_hstate;
+       if (!hugepages_supported())
+               return 0;
        return sprintf(buf,
                "Node %d HugePages_Total: %5u\n"
                "Node %d HugePages_Free:  %5u\n"
@@ -2179,6 +2189,9 @@ void hugetlb_show_meminfo(void)
        struct hstate *h;
        int nid;
 
+       if (!hugepages_supported())
+               return;
+
        for_each_node_state(nid, N_MEMORY)
                for_each_hstate(h)
                        pr_info("Node %d hugepages_total=%u hugepages_free=%u 
hugepages_surp=%u hugepages_size=%lukB\n",
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 15a8ea031526..796c7e6cf93b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1897,7 +1897,7 @@ int node_random(const nodemask_t *maskp)
  * If the effective policy is 'BIND, returns a pointer to the mempolicy's
  * @nodemask for filtering the zonelist.
  *
- * Must be protected by get_mems_allowed()
+ * Must be protected by read_mems_allowed_begin()
  */
 struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
                                gfp_t gfp_flags, struct mempolicy **mpol,
@@ -2061,7 +2061,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
vm_area_struct *vma,
 
 retry_cpuset:
        pol = get_vma_policy(current, vma, addr);
-       cpuset_mems_cookie = get_mems_allowed();
+       cpuset_mems_cookie = read_mems_allowed_begin();
 
        if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
                unsigned nid;
@@ -2069,7 +2069,7 @@ retry_cpuset:
                nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
                mpol_cond_put(pol);
                page = alloc_page_interleave(gfp, order, nid);
-               if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+               if (unlikely(!page && 
read_mems_allowed_retry(cpuset_mems_cookie)))
                        goto retry_cpuset;
 
                return page;
@@ -2079,7 +2079,7 @@ retry_cpuset:
                                      policy_nodemask(gfp, pol));
        if (unlikely(mpol_needs_cond_ref(pol)))
                __mpol_put(pol);
-       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+       if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
                goto retry_cpuset;
        return page;
 }
@@ -2113,7 +2113,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned 
order)
                pol = &default_policy;
 
 retry_cpuset:
-       cpuset_mems_cookie = get_mems_allowed();
+       cpuset_mems_cookie = read_mems_allowed_begin();
 
        /*
         * No reference counting needed for current->mempolicy
@@ -2126,7 +2126,7 @@ retry_cpuset:
                                policy_zonelist(gfp, pol, numa_node_id()),
                                policy_nodemask(gfp, pol));
 
-       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+       if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
                goto retry_cpuset;
 
        return page;
diff --git a/mm/migrate.c b/mm/migrate.c
index bed48809e5d0..13f47fbe3550 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -148,8 +148,11 @@ static int remove_migration_pte(struct page *new, struct 
vm_area_struct *vma,
        pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
        if (pte_swp_soft_dirty(*ptep))
                pte = pte_mksoft_dirty(pte);
+
+       /* Recheck VMA as permissions can change since migration started  */
        if (is_write_migration_entry(entry))
-               pte = pte_mkwrite(pte);
+               pte = maybe_mkwrite(pte, vma);
+
 #ifdef CONFIG_HUGETLB_PAGE
        if (PageHuge(new)) {
                pte = pte_mkhuge(pte);
diff --git a/mm/mmap.c b/mm/mmap.c
index 20ff0c33274c..dfe90657a6db 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/shm.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
@@ -681,8 +682,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct 
*vma,
        prev->vm_next = next = vma->vm_next;
        if (next)
                next->vm_prev = prev;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = prev;
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
 }
 
 /*
@@ -1989,34 +1991,33 @@ EXPORT_SYMBOL(get_unmapped_area);
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
-       struct vm_area_struct *vma = NULL;
+       struct rb_node *rb_node;
+       struct vm_area_struct *vma;
 
        /* Check the cache first. */
-       /* (Cache hit rate is typically around 35%.) */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
-               struct rb_node *rb_node;
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
+               return vma;
 
-               rb_node = mm->mm_rb.rb_node;
-               vma = NULL;
+       rb_node = mm->mm_rb.rb_node;
+       vma = NULL;
 
-               while (rb_node) {
-                       struct vm_area_struct *vma_tmp;
-
-                       vma_tmp = rb_entry(rb_node,
-                                          struct vm_area_struct, vm_rb);
-
-                       if (vma_tmp->vm_end > addr) {
-                               vma = vma_tmp;
-                               if (vma_tmp->vm_start <= addr)
-                                       break;
-                               rb_node = rb_node->rb_left;
-                       } else
-                               rb_node = rb_node->rb_right;
-               }
-               if (vma)
-                       mm->mmap_cache = vma;
+       while (rb_node) {
+               struct vm_area_struct *tmp;
+
+               tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+               if (tmp->vm_end > addr) {
+                       vma = tmp;
+                       if (tmp->vm_start <= addr)
+                               break;
+                       rb_node = rb_node->rb_left;
+               } else
+                       rb_node = rb_node->rb_right;
        }
+
+       if (vma)
+               vmacache_update(addr, vma);
        return vma;
 }
 
@@ -2388,7 +2389,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct 
vm_area_struct *vma,
        } else
                mm->highest_vm_end = prev ? prev->vm_end : 0;
        tail_vma->vm_next = NULL;
-       mm->mmap_cache = NULL;          /* Kill the cache. */
+
+       /* Kill the cache */
+       vmacache_invalidate(mm);
 }
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index 8740213b1647..3ee4f74fbfbe 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -15,6 +15,7 @@
 
 #include <linux/export.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/file.h>
@@ -768,16 +769,23 @@ static void add_vma_to_mm(struct mm_struct *mm, struct 
vm_area_struct *vma)
  */
 static void delete_vma_from_mm(struct vm_area_struct *vma)
 {
+       int i;
        struct address_space *mapping;
        struct mm_struct *mm = vma->vm_mm;
+       struct task_struct *curr = current;
 
        kenter("%p", vma);
 
        protect_vma(vma, 0);
 
        mm->map_count--;
-       if (mm->mmap_cache == vma)
-               mm->mmap_cache = NULL;
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               /* if the vma is cached, invalidate the entire cache */
+               if (curr->vmacache[i] == vma) {
+                       vmacache_invalidate(curr->mm);
+                       break;
+               }
+       }
 
        /* remove the VMA from the mapping */
        if (vma->vm_file) {
@@ -825,8 +833,8 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, 
unsigned long addr)
        struct vm_area_struct *vma;
 
        /* check the cache first */
-       vma = ACCESS_ONCE(mm->mmap_cache);
-       if (vma && vma->vm_start <= addr && vma->vm_end > addr)
+       vma = vmacache_find(mm, addr);
+       if (likely(vma))
                return vma;
 
        /* trawl the list (there may be multiple mappings in which addr
@@ -835,7 +843,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, 
unsigned long addr)
                if (vma->vm_start > addr)
                        return NULL;
                if (vma->vm_end > addr) {
-                       mm->mmap_cache = vma;
+                       vmacache_update(addr, vma);
                        return vma;
                }
        }
@@ -874,8 +882,8 @@ static struct vm_area_struct *find_vma_exact(struct 
mm_struct *mm,
        unsigned long end = addr + len;
 
        /* check the cache first */
-       vma = mm->mmap_cache;
-       if (vma && vma->vm_start == addr && vma->vm_end == end)
+       vma = vmacache_find_exact(mm, addr, end);
+       if (vma)
                return vma;
 
        /* trawl the list (there may be multiple mappings in which addr
@@ -886,7 +894,7 @@ static struct vm_area_struct *find_vma_exact(struct 
mm_struct *mm,
                if (vma->vm_start > addr)
                        return NULL;
                if (vma->vm_end == end) {
-                       mm->mmap_cache = vma;
+                       vmacache_update(addr, vma);
                        return vma;
                }
        }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 62e400d00e3f..ff0f6b13f32f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1869,7 +1869,7 @@ static void __paginginit init_zone_allows_reclaim(int nid)
 {
        int i;
 
-       for_each_online_node(i)
+       for_each_node_state(i, N_MEMORY)
                if (node_distance(nid, i) <= RECLAIM_DISTANCE)
                        node_set(i, NODE_DATA(nid)->reclaim_nodes);
                else
@@ -2736,7 +2736,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
                return NULL;
 
 retry_cpuset:
-       cpuset_mems_cookie = get_mems_allowed();
+       cpuset_mems_cookie = read_mems_allowed_begin();
 
        /* The preferred zone is used for statistics later */
        first_zones_zonelist(zonelist, high_zoneidx,
@@ -2791,7 +2791,7 @@ out:
         * the mask is being updated. If a page allocation is about to fail,
         * check if the cpuset changed during allocation and if so, retry.
         */
-       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+       if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
                goto retry_cpuset;
 
        memcg_kmem_commit_charge(page, memcg, order);
@@ -3059,9 +3059,9 @@ bool skip_free_areas_node(unsigned int flags, int nid)
                goto out;
 
        do {
-               cpuset_mems_cookie = get_mems_allowed();
+               cpuset_mems_cookie = read_mems_allowed_begin();
                ret = !node_isset(nid, cpuset_current_mems_allowed);
-       } while (!put_mems_allowed(cpuset_mems_cookie));
+       } while (read_mems_allowed_retry(cpuset_mems_cookie));
 out:
        return ret;
 }
@@ -4933,7 +4933,8 @@ void __paginginit free_area_init_node(int nid, unsigned 
long *zones_size,
 
        pgdat->node_id = nid;
        pgdat->node_start_pfn = node_start_pfn;
-       init_zone_allows_reclaim(nid);
+       if (node_state(nid, N_MEMORY))
+               init_zone_allows_reclaim(nid);
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
        get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 #endif
diff --git a/mm/readahead.c b/mm/readahead.c
index 0de2360d65f3..1fa0d6fca556 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -233,14 +233,14 @@ int force_page_cache_readahead(struct address_space 
*mapping, struct file *filp,
        return 0;
 }
 
+#define MAX_READAHEAD   ((512*4096)/PAGE_CACHE_SIZE)
 /*
  * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
  * sensible upper limit.
  */
 unsigned long max_sane_readahead(unsigned long nr)
 {
-       return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE_FILE)
-               + node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
+       return min(nr, MAX_READAHEAD);
 }
 
 /*
diff --git a/mm/slab.c b/mm/slab.c
index ea854eb2388c..0b1c2a58559d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3122,7 +3122,7 @@ static void *fallback_alloc(struct kmem_cache *cache, 
gfp_t flags)
        local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
 
 retry_cpuset:
-       cpuset_mems_cookie = get_mems_allowed();
+       cpuset_mems_cookie = read_mems_allowed_begin();
        zonelist = node_zonelist(slab_node(), flags);
 
 retry:
@@ -3180,7 +3180,7 @@ retry:
                }
        }
 
-       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj))
+       if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie)))
                goto retry_cpuset;
        return obj;
 }
diff --git a/mm/slub.c b/mm/slub.c
index 25f14ad8f817..7611f148ee81 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1684,7 +1684,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t 
flags,
                return NULL;
 
        do {
-               cpuset_mems_cookie = get_mems_allowed();
+               cpuset_mems_cookie = read_mems_allowed_begin();
                zonelist = node_zonelist(slab_node(), flags);
                for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
                        struct kmem_cache_node *n;
@@ -1696,19 +1696,17 @@ static void *get_any_partial(struct kmem_cache *s, 
gfp_t flags,
                                object = get_partial_node(s, n, c, flags);
                                if (object) {
                                        /*
-                                        * Return the object even if
-                                        * put_mems_allowed indicated that
-                                        * the cpuset mems_allowed was
-                                        * updated in parallel. It's a
-                                        * harmless race between the alloc
-                                        * and the cpuset update.
+                                        * Don't check read_mems_allowed_retry()
+                                        * here - if mems_allowed was updated in
+                                        * parallel, that was a harmless race
+                                        * between allocation and the cpuset
+                                        * update
                                         */
-                                       put_mems_allowed(cpuset_mems_cookie);
                                        return object;
                                }
                        }
                }
-       } while (!put_mems_allowed(cpuset_mems_cookie));
+       } while (read_mems_allowed_retry(cpuset_mems_cookie));
 #endif
        return NULL;
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4a7f7e6992b6..beeeef8a1b2d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -51,14 +51,32 @@ atomic_long_t nr_swap_pages;
 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
 long total_swap_pages;
 static int least_priority;
-static atomic_t highest_priority_index = ATOMIC_INIT(-1);
 
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
 static const char Unused_offset[] = "Unused swap offset entry ";
 
-struct swap_list_t swap_list = {-1, -1};
+/*
+ * all active swap_info_structs
+ * protected with swap_lock, and ordered by priority.
+ */
+PLIST_HEAD(swap_active_head);
+
+/*
+ * all available (active, not full) swap_info_structs
+ * protected with swap_avail_lock, ordered by priority.
+ * This is used by get_swap_page() instead of swap_active_head
+ * because swap_active_head includes all swap_info_structs,
+ * but get_swap_page() doesn't need to look at full ones.
+ * This uses its own lock instead of swap_lock because when a
+ * swap_info_struct changes between not-full/full, it needs to
+ * add/remove itself to/from this list, but the swap_info_struct->lock
+ * is held and the locking order requires swap_lock to be taken
+ * before any swap_info_struct->lock.
+ */
+static PLIST_HEAD(swap_avail_head);
+static DEFINE_SPINLOCK(swap_avail_lock);
 
 struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
@@ -591,6 +609,9 @@ checks:
        if (si->inuse_pages == si->pages) {
                si->lowest_bit = si->max;
                si->highest_bit = 0;
+               spin_lock(&swap_avail_lock);
+               plist_del(&si->avail_list, &swap_avail_head);
+               spin_unlock(&swap_avail_lock);
        }
        si->swap_map[offset] = usage;
        inc_cluster_info_page(si, si->cluster_info, offset);
@@ -640,71 +661,65 @@ no_page:
 
 swp_entry_t get_swap_page(void)
 {
-       struct swap_info_struct *si;
+       struct swap_info_struct *si, *next;
        pgoff_t offset;
-       int type, next;
-       int wrapped = 0;
-       int hp_index;
 
-       spin_lock(&swap_lock);
        if (atomic_long_read(&nr_swap_pages) <= 0)
                goto noswap;
        atomic_long_dec(&nr_swap_pages);
 
-       for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
-               hp_index = atomic_xchg(&highest_priority_index, -1);
-               /*
-                * highest_priority_index records current highest priority swap
-                * type which just frees swap entries. If its priority is
-                * higher than that of swap_list.next swap type, we use it.  It
-                * isn't protected by swap_lock, so it can be an invalid value
-                * if the corresponding swap type is swapoff. We double check
-                * the flags here. It's even possible the swap type is swapoff
-                * and swapon again and its priority is changed. In such rare
-                * case, low prority swap type might be used, but eventually
-                * high priority swap will be used after several rounds of
-                * swap.
-                */
-               if (hp_index != -1 && hp_index != type &&
-                   swap_info[type]->prio < swap_info[hp_index]->prio &&
-                   (swap_info[hp_index]->flags & SWP_WRITEOK)) {
-                       type = hp_index;
-                       swap_list.next = type;
-               }
-
-               si = swap_info[type];
-               next = si->next;
-               if (next < 0 ||
-                   (!wrapped && si->prio != swap_info[next]->prio)) {
-                       next = swap_list.head;
-                       wrapped++;
-               }
+       spin_lock(&swap_avail_lock);
 
+start_over:
+       plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
+               /* requeue si to after same-priority siblings */
+               plist_requeue(&si->avail_list, &swap_avail_head);
+               spin_unlock(&swap_avail_lock);
                spin_lock(&si->lock);
-               if (!si->highest_bit) {
+               if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
+                       spin_lock(&swap_avail_lock);
+                       if (plist_node_empty(&si->avail_list)) {
+                               spin_unlock(&si->lock);
+                               goto nextsi;
+                       }
+                       WARN(!si->highest_bit,
+                            "swap_info %d in list but !highest_bit\n",
+                            si->type);
+                       WARN(!(si->flags & SWP_WRITEOK),
+                            "swap_info %d in list but !SWP_WRITEOK\n",
+                            si->type);
+                       plist_del(&si->avail_list, &swap_avail_head);
                        spin_unlock(&si->lock);
-                       continue;
+                       goto nextsi;
                }
-               if (!(si->flags & SWP_WRITEOK)) {
-                       spin_unlock(&si->lock);
-                       continue;
-               }
-
-               swap_list.next = next;
 
-               spin_unlock(&swap_lock);
                /* This is called for allocating swap entry for cache */
                offset = scan_swap_map(si, SWAP_HAS_CACHE);
                spin_unlock(&si->lock);
                if (offset)
-                       return swp_entry(type, offset);
-               spin_lock(&swap_lock);
-               next = swap_list.next;
+                       return swp_entry(si->type, offset);
+               pr_debug("scan_swap_map of si %d failed to find offset\n",
+                      si->type);
+               spin_lock(&swap_avail_lock);
+nextsi:
+               /*
+                * if we got here, it's likely that si was almost full before,
+                * and since scan_swap_map() can drop the si->lock, multiple
+                * callers probably all tried to get a page from the same si
+                * and it filled up before we could get one; or, the si filled
+                * up between us dropping swap_avail_lock and taking si->lock.
+                * Since we dropped the swap_avail_lock, the swap_avail_head
+                * list may have been modified; so if next is still in the
+                * swap_avail_head list then try it, otherwise start over.
+                */
+               if (plist_node_empty(&next->avail_list))
+                       goto start_over;
        }
 
+       spin_unlock(&swap_avail_lock);
+
        atomic_long_inc(&nr_swap_pages);
 noswap:
-       spin_unlock(&swap_lock);
        return (swp_entry_t) {0};
 }
 
@@ -766,27 +781,6 @@ out:
        return NULL;
 }
 
-/*
- * This swap type frees swap entry, check if it is the highest priority swap
- * type which just frees swap entry. get_swap_page() uses
- * highest_priority_index to search highest priority swap type. The
- * swap_info_struct.lock can't protect us if there are multiple swap types
- * active, so we use atomic_cmpxchg.
- */
-static void set_highest_priority_index(int type)
-{
-       int old_hp_index, new_hp_index;
-
-       do {
-               old_hp_index = atomic_read(&highest_priority_index);
-               if (old_hp_index != -1 &&
-                       swap_info[old_hp_index]->prio >= swap_info[type]->prio)
-                       break;
-               new_hp_index = type;
-       } while (atomic_cmpxchg(&highest_priority_index,
-               old_hp_index, new_hp_index) != old_hp_index);
-}
-
 static unsigned char swap_entry_free(struct swap_info_struct *p,
                                     swp_entry_t entry, unsigned char usage)
 {
@@ -828,9 +822,18 @@ static unsigned char swap_entry_free(struct 
swap_info_struct *p,
                dec_cluster_info_page(p, p->cluster_info, offset);
                if (offset < p->lowest_bit)
                        p->lowest_bit = offset;
-               if (offset > p->highest_bit)
+               if (offset > p->highest_bit) {
+                       bool was_full = !p->highest_bit;
                        p->highest_bit = offset;
-               set_highest_priority_index(p->type);
+                       if (was_full && (p->flags & SWP_WRITEOK)) {
+                               spin_lock(&swap_avail_lock);
+                               WARN_ON(!plist_node_empty(&p->avail_list));
+                               if (plist_node_empty(&p->avail_list))
+                                       plist_add(&p->avail_list,
+                                                 &swap_avail_head);
+                               spin_unlock(&swap_avail_lock);
+                       }
+               }
                atomic_long_inc(&nr_swap_pages);
                p->inuse_pages--;
                frontswap_invalidate_page(p->type, offset);
@@ -1765,30 +1768,37 @@ static void _enable_swap_info(struct swap_info_struct 
*p, int prio,
                                unsigned char *swap_map,
                                struct swap_cluster_info *cluster_info)
 {
-       int i, prev;
-
        if (prio >= 0)
                p->prio = prio;
        else
                p->prio = --least_priority;
+       /*
+        * the plist prio is negated because plist ordering is
+        * low-to-high, while swap ordering is high-to-low
+        */
+       p->list.prio = -p->prio;
+       p->avail_list.prio = -p->prio;
        p->swap_map = swap_map;
        p->cluster_info = cluster_info;
        p->flags |= SWP_WRITEOK;
        atomic_long_add(p->pages, &nr_swap_pages);
        total_swap_pages += p->pages;
 
-       /* insert swap space into swap_list: */
-       prev = -1;
-       for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
-               if (p->prio >= swap_info[i]->prio)
-                       break;
-               prev = i;
-       }
-       p->next = i;
-       if (prev < 0)
-               swap_list.head = swap_list.next = p->type;
-       else
-               swap_info[prev]->next = p->type;
+       assert_spin_locked(&swap_lock);
+       /*
+        * both lists are plists, and thus priority ordered.
+        * swap_active_head needs to be priority ordered for swapoff(),
+        * which on removal of any swap_info_struct with an auto-assigned
+        * (i.e. negative) priority increments the auto-assigned priority
+        * of any lower-priority swap_info_structs.
+        * swap_avail_head needs to be priority ordered for get_swap_page(),
+        * which allocates swap pages from the highest available priority
+        * swap_info_struct.
+        */
+       plist_add(&p->list, &swap_active_head);
+       spin_lock(&swap_avail_lock);
+       plist_add(&p->avail_list, &swap_avail_head);
+       spin_unlock(&swap_avail_lock);
 }
 
 static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1823,8 +1833,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        struct address_space *mapping;
        struct inode *inode;
        struct filename *pathname;
-       int i, type, prev;
-       int err;
+       int err, found = 0;
        unsigned int old_block_size;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -1842,17 +1851,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, 
specialfile)
                goto out;
 
        mapping = victim->f_mapping;
-       prev = -1;
        spin_lock(&swap_lock);
-       for (type = swap_list.head; type >= 0; type = swap_info[type]->next) {
-               p = swap_info[type];
+       plist_for_each_entry(p, &swap_active_head, list) {
                if (p->flags & SWP_WRITEOK) {
-                       if (p->swap_file->f_mapping == mapping)
+                       if (p->swap_file->f_mapping == mapping) {
+                               found = 1;
                                break;
+                       }
                }
-               prev = type;
        }
-       if (type < 0) {
+       if (!found) {
                err = -EINVAL;
                spin_unlock(&swap_lock);
                goto out_dput;
@@ -1864,20 +1872,21 @@ SYSCALL_DEFINE1(swapoff, const char __user *, 
specialfile)
                spin_unlock(&swap_lock);
                goto out_dput;
        }
-       if (prev < 0)
-               swap_list.head = p->next;
-       else
-               swap_info[prev]->next = p->next;
-       if (type == swap_list.next) {
-               /* just pick something that's safe... */
-               swap_list.next = swap_list.head;
-       }
+       spin_lock(&swap_avail_lock);
+       plist_del(&p->avail_list, &swap_avail_head);
+       spin_unlock(&swap_avail_lock);
        spin_lock(&p->lock);
        if (p->prio < 0) {
-               for (i = p->next; i >= 0; i = swap_info[i]->next)
-                       swap_info[i]->prio = p->prio--;
+               struct swap_info_struct *si = p;
+
+               plist_for_each_entry_continue(si, &swap_active_head, list) {
+                       si->prio++;
+                       si->list.prio--;
+                       si->avail_list.prio--;
+               }
                least_priority++;
        }
+       plist_del(&p->list, &swap_active_head);
        atomic_long_sub(p->pages, &nr_swap_pages);
        total_swap_pages -= p->pages;
        p->flags &= ~SWP_WRITEOK;
@@ -1885,7 +1894,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        spin_unlock(&swap_lock);
 
        set_current_oom_origin();
-       err = try_to_unuse(type, false, 0); /* force all pages to be unused */
+       err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
        clear_current_oom_origin();
 
        if (err) {
@@ -1926,7 +1935,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        frontswap_map = frontswap_map_get(p);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
-       frontswap_invalidate_area(type);
+       frontswap_invalidate_area(p->type);
        frontswap_map_set(p, NULL);
        mutex_unlock(&swapon_mutex);
        free_percpu(p->percpu_cluster);
@@ -1935,7 +1944,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        vfree(cluster_info);
        vfree(frontswap_map);
        /* Destroy swap account information */
-       swap_cgroup_swapoff(type);
+       swap_cgroup_swapoff(p->type);
 
        inode = mapping->host;
        if (S_ISBLK(inode->i_mode)) {
@@ -2142,8 +2151,9 @@ static struct swap_info_struct *alloc_swap_info(void)
                 */
        }
        INIT_LIST_HEAD(&p->first_swap_extent.list);
+       plist_node_init(&p->list, 0);
+       plist_node_init(&p->avail_list, 0);
        p->flags = SWP_USED;
-       p->next = -1;
        spin_unlock(&swap_lock);
        spin_lock_init(&p->lock);
 
diff --git a/mm/vmacache.c b/mm/vmacache.c
new file mode 100644
index 000000000000..1037a3bab505
--- /dev/null
+++ b/mm/vmacache.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014 Davidlohr Bueso.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmacache.h>
+
+/*
+ * Flush vma caches for threads that share a given mm.
+ *
+ * The operation is safe because the caller holds the mmap_sem
+ * exclusively and other threads accessing the vma cache will
+ * have mmap_sem held at least for read, so no extra locking
+ * is required to maintain the vma cache.
+ */
+void vmacache_flush_all(struct mm_struct *mm)
+{
+       struct task_struct *g, *p;
+
+       rcu_read_lock();
+       for_each_process_thread(g, p) {
+               /*
+                * Only flush the vmacache pointers as the
+                * mm seqnum is already set and curr's will
+                * be set upon invalidation when the next
+                * lookup is done.
+                */
+               if (mm == p->mm)
+                       vmacache_flush(p);
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * This task may be accessing a foreign mm via (for example)
+ * get_user_pages()->find_vma().  The vmacache is task-local and this
+ * task's vmacache pertains to a different mm (ie, its own).  There is
+ * nothing we can do here.
+ *
+ * Also handle the case where a kernel thread has adopted this mm via use_mm().
+ * That kernel thread's vmacache is not applicable to this mm.
+ */
+static bool vmacache_valid_mm(struct mm_struct *mm)
+{
+       return current->mm == mm && !(current->flags & PF_KTHREAD);
+}
+
+void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
+{
+       if (vmacache_valid_mm(newvma->vm_mm))
+               current->vmacache[VMACACHE_HASH(addr)] = newvma;
+}
+
+static bool vmacache_valid(struct mm_struct *mm)
+{
+       struct task_struct *curr;
+
+       if (!vmacache_valid_mm(mm))
+               return false;
+
+       curr = current;
+       if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
+               /*
+                * First attempt will always be invalid, initialize
+                * the new cache for this task here.
+                */
+               curr->vmacache_seqnum = mm->vmacache_seqnum;
+               vmacache_flush(curr);
+               return false;
+       }
+       return true;
+}
+
+struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
+{
+       int i;
+
+       if (!vmacache_valid(mm))
+               return NULL;
+
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               struct vm_area_struct *vma = current->vmacache[i];
+
+               if (!vma)
+                       continue;
+               if (WARN_ON_ONCE(vma->vm_mm != mm))
+                       break;
+               if (vma->vm_start <= addr && vma->vm_end > addr)
+                       return vma;
+       }
+
+       return NULL;
+}
+
+#ifndef CONFIG_MMU
+struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
+                                          unsigned long start,
+                                          unsigned long end)
+{
+       int i;
+
+       if (!vmacache_valid(mm))
+               return NULL;
+
+       for (i = 0; i < VMACACHE_SIZE; i++) {
+               struct vm_area_struct *vma = current->vmacache[i];
+
+               if (vma && vma->vm_start == start && vma->vm_end == end)
+                       return vma;
+       }
+
+       return NULL;
+}
+#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6ef484f0777f..0c0b36e5b4f8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -224,15 +224,15 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct 
shrinker *shrinker,
        unsigned long freed = 0;
        unsigned long long delta;
        long total_scan;
-       long max_pass;
+       long freeable;
        long nr;
        long new_nr;
        int nid = shrinkctl->nid;
        long batch_size = shrinker->batch ? shrinker->batch
                                          : SHRINK_BATCH;
 
-       max_pass = shrinker->count_objects(shrinker, shrinkctl);
-       if (max_pass == 0)
+       freeable = shrinker->count_objects(shrinker, shrinkctl);
+       if (freeable == 0)
                return 0;
 
        /*
@@ -244,14 +244,14 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct 
shrinker *shrinker,
 
        total_scan = nr;
        delta = (4 * nr_pages_scanned) / shrinker->seeks;
-       delta *= max_pass;
+       delta *= freeable;
        do_div(delta, lru_pages + 1);
        total_scan += delta;
        if (total_scan < 0) {
                printk(KERN_ERR
                "shrink_slab: %pF negative objects to delete nr=%ld\n",
                       shrinker->scan_objects, total_scan);
-               total_scan = max_pass;
+               total_scan = freeable;
        }
 
        /*
@@ -260,26 +260,26 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct 
shrinker *shrinker,
         * shrinkers to return -1 all the time. This results in a large
         * nr being built up so when a shrink that can do some work
         * comes along it empties the entire cache due to nr >>>
-        * max_pass.  This is bad for sustaining a working set in
+        * freeable. This is bad for sustaining a working set in
         * memory.
         *
         * Hence only allow the shrinker to scan the entire cache when
         * a large delta change is calculated directly.
         */
-       if (delta < max_pass / 4)
-               total_scan = min(total_scan, max_pass / 2);
+       if (delta < freeable / 4)
+               total_scan = min(total_scan, freeable / 2);
 
        /*
         * Avoid risking looping forever due to too large nr value:
         * never try to free more than twice the estimate number of
         * freeable entries.
         */
-       if (total_scan > max_pass * 2)
-               total_scan = max_pass * 2;
+       if (total_scan > freeable * 2)
+               total_scan = freeable * 2;
 
        trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
                                nr_pages_scanned, lru_pages,
-                               max_pass, delta, total_scan);
+                               freeable, delta, total_scan);
 
        /*
         * Normally, we should not scan less than batch_size objects in one
@@ -292,12 +292,12 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct 
shrinker *shrinker,
         *
         * We detect the "tight on memory" situations by looking at the total
         * number of objects we want to scan (total_scan). If it is greater
-        * than the total number of objects on slab (max_pass), we must be
+        * than the total number of objects on slab (freeable), we must be
         * scanning at high prio and therefore should try to reclaim as much as
         * possible.
         */
        while (total_scan >= batch_size ||
-              total_scan >= max_pass) {
+              total_scan >= freeable) {
                unsigned long ret;
                unsigned long nr_to_scan = min(batch_size, total_scan);
 
@@ -1144,7 +1144,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone 
*zone,
                        TTU_UNMAP|TTU_IGNORE_ACCESS,
                        &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
        list_splice(&clean_pages, page_list);
-       __mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
+       mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
        return ret;
 }
 
@@ -2424,8 +2424,8 @@ static unsigned long do_try_to_free_pages(struct zonelist 
*zonelist,
                        unsigned long lru_pages = 0;
 
                        nodes_clear(shrink->nodes_to_scan);
-                       for_each_zone_zonelist(zone, z, zonelist,
-                                       gfp_zone(sc->gfp_mask)) {
+                       for_each_zone_zonelist_nodemask(zone, z, zonelist,
+                                       gfp_zone(sc->gfp_mask), sc->nodemask) {
                                if (!cpuset_zone_allowed_hardwall(zone, 
GFP_KERNEL))
                                        continue;
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Re: Linux 3.14.21

Reply via email to