Vasilis Liaskovitis found that before we parse SRAT and fulfill numa_meminfo,
the nids of all the regions in memblock.reserve[] are MAX_NUMNODES. That is
because nids have not been mapped at that time.

When we arrange ZONE_MOVABLE in each node later, we need nid in memblock. So
after we parse SRAT and fulfill nume_meminfo, synchronize the nid info to
memblock.reserve[] immediately.

Signed-off-by: Tang Chen <tangc...@cn.fujitsu.com>
Signed-off-by: Vasilis Liaskovitis <vasilis.liaskovi...@profitbricks.com>
---
 arch/x86/mm/numa.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 50 insertions(+), 0 deletions(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 5013583..f2a3984 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -548,6 +548,48 @@ static void __init numa_init_array(void)
        }
 }
 
+/*
+ * early_numa_find_range_nid - Find nid for a memory range at early time.
+ * @start: start address of the memory range (physaddr)
+ * @size: size of the memory range
+ *
+ * Return nid of the memory range, or MAX_NUMNODES if it failed to find the 
nid.
+ *
+ * NOTE: This function uses numa_meminfo to find the range's nid, so it should
+ *       be called after numa_meminfo has been initialized.
+ */
+int __init early_numa_find_range_nid(u64 start, u64 size)
+{
+       int i;
+       struct numa_meminfo *mi = &numa_meminfo;
+
+       for (i = 0; i < mi->nr_blks; i++)
+               if (start >= mi->blk[i].start &&
+                   (start + size - 1) <= mi->blk[i].end)
+                       return mi->blk[i].nid;
+
+       return MAX_NUMNODES;
+}
+
+/*
+ * numa_sync_memblock_nid - Synchronize nid info in memblock.reserve[] to
+ *                          numa_meminfo.
+ *
+ * This function will synchronize the nid fields of regions in
+ * memblock.reserve[] to numa_meminfo.
+ */
+static void __init numa_sync_memblock_nid()
+{
+       int i, nid;
+       struct memblock_type *res = &memblock.reserved;
+
+       for (i = 0; i < res->cnt; i++) {
+               nid = early_numa_find_range_nid(res->regions[i].base,
+                                               res->regions[i].size);
+               memblock_set_region_node(&res->regions[i], nid);
+       }
+}
+
 static int __init numa_init(int (*init_func)(void))
 {
        int i;
@@ -585,6 +627,14 @@ static int __init numa_init(int (*init_func)(void))
                        numa_clear_node(i);
        }
        numa_init_array();
+
+       /*
+        * Before fulfilling numa_meminfo, all regions allocated by memblock
+        * are reserved with nid MAX_NUMNODES because there is no numa node
+        * info at such an early time. Now, fill the correct nid into memblock.
+        */
+       numa_sync_memblock_nid();
+
        return 0;
 }
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to