The file_ra_state.age is node specific,
comparing ages between two nodes is a bug.

So add code to
- take down the node id in file_ra_state.flags;
- ensure that we are comparing the ages from the same node.

Signed-off-by: Fengguang Wu <[EMAIL PROTECTED]>
---
 include/linux/fs.h |    2 +-
 mm/readahead.c     |   30 +++++++++++++++++++++++++-----
 2 files changed, 26 insertions(+), 6 deletions(-)

--- linux-2.6.21-rc3-mm2.orig/include/linux/fs.h
+++ linux-2.6.21-rc3-mm2/include/linux/fs.h
@@ -738,7 +738,7 @@ struct file_ra_state {
        unsigned long mmap_hit;         /* Cache hit stat for mmap accesses */
        unsigned long mmap_miss;        /* Cache miss stat for mmap accesses */
 
-       unsigned long flags;    /* RA_FLAG_xxx | ra_class_old | ra_class_new */
+       unsigned long flags;    /* RA_FLAG_xxx | node_id | class_old | 
class_new */
        unsigned long prev_page;        /* Cache last read() position */
        unsigned long ra_pages;         /* Maximum readahead window */
 };
--- linux-2.6.21-rc3-mm2.orig/mm/readahead.c
+++ linux-2.6.21-rc3-mm2/mm/readahead.c
@@ -52,11 +52,13 @@ EXPORT_SYMBOL_GPL(readahead_ratio);
 int readahead_hit_rate = 1;
 #endif /* CONFIG_ADAPTIVE_READAHEAD */
 
+#define RA_CLASS_SHIFT 4
+#define RA_CLASS_MASK  ((1 << RA_CLASS_SHIFT) - 1)
+#define RA_NODE_SHIFT  (2 * RA_CLASS_SHIFT)
+#define RA_NODE_MASK   ((MAX_NUMNODES-1) << RA_NODE_SHIFT)
 /*
  * Detailed classification of read-ahead behaviors.
  */
-#define RA_CLASS_SHIFT 4
-#define RA_CLASS_MASK  ((1 << RA_CLASS_SHIFT) - 1)
 enum ra_class {
        RA_CLASS_ALL,
        RA_CLASS_INITIAL,
@@ -802,6 +804,11 @@ static inline enum ra_class ra_class_old
        return (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
 }
 
+static inline int ra_node_id(struct file_ra_state *ra)
+{
+       return (ra->flags >> RA_NODE_SHIFT) & RA_NODE_MASK;
+}
+
 static unsigned long ra_readahead_size(struct file_ra_state *ra)
 {
        return ra->readahead_index - ra->ra_index;
@@ -864,6 +871,18 @@ static void ra_set_size(struct file_ra_s
 }
 
 /*
+ * Save the current node id and age.
+ */
+static void ra_save_node_age(struct file_ra_state *ra)
+{
+       int nid = numa_node_id();
+
+       ra->flags &= ~RA_NODE_MASK;
+       ra->flags |= nid << RA_NODE_SHIFT;
+       ra->age = nr_scanned_pages_node(nid);
+}
+
+/*
  * Submit IO for the read-ahead request in file_ra_state.
  */
 static unsigned long ra_submit(struct file_ra_state *ra,
@@ -901,7 +920,7 @@ static unsigned long ra_submit(struct fi
        }
 
        /* Take down the current read-ahead aging value. */
-       ra->age = nr_scanned_pages_node(numa_node_id());
+       ra_save_node_age(ra);
 
        ra_size = ra_readahead_size(ra);
        la_size = ra_lookahead_size(ra);
@@ -1025,9 +1044,10 @@ static unsigned long compute_thrashing_t
        unsigned long stream_shift;
        unsigned long ra_size;
        uint64_t ll;
+       int nid = ra_node_id(ra);
 
-       global_size = nr_free_inactive_pages_node(numa_node_id());
-       global_shift = nr_scanned_pages_node(numa_node_id()) - ra->age;
+       global_size = nr_free_inactive_pages_node(nid);
+       global_shift = nr_scanned_pages_node(nid) - ra->age;
        global_shift |= 1UL;
        stream_shift = ra_invoke_interval(ra);
 

--
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to