According to the user-provided assignment bind the respective part
of the guest's memory to the given host node. This uses Linux'
libnuma interface to realize the pinning right after the allocation.
Failures are not fatal, but produce a warning.

Signed-off-by: Andre Przywara <andre.przyw...@amd.com>
---
 hw/pc.c |   51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 51 insertions(+), 0 deletions(-)

diff --git a/hw/pc.c b/hw/pc.c
index 1f61609..b6d4d7a 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -41,6 +41,11 @@
 #include "device-assignment.h"
 #include "kvm.h"
 
+#ifdef CONFIG_NUMA
+#include <numa.h>
+#include <numaif.h>
+#endif
+
 /* output Bochs bios info messages */
 //#define DEBUG_BIOS
 
@@ -874,6 +879,49 @@ void pc_cpus_init(const char *cpu_model)
     }
 }
 
+static void bind_numa(ram_addr_t ram_addr, ram_addr_t border_4g,
+                      int below_4g)
+{
+#ifdef CONFIG_NUMA
+    int i, skip;
+    char* ram_ptr;
+    nodemask_t nodemask;
+    ram_addr_t len, ram_offset;
+
+    ram_ptr = qemu_get_ram_ptr(ram_addr);
+
+    ram_offset = 0;
+    skip = !below_4g;
+    for (i = 0; i < nb_numa_nodes; i++) {
+        len = node_mem[i];
+        if (ram_offset <= border_4g && ram_offset + len > border_4g) {
+            len = border_4g - ram_offset;
+               if (skip) {
+                ram_offset = 0;
+                len = node_mem[i] - len;
+                skip = 0;
+            }
+        }
+        if (skip && ram_offset + len <= border_4g) {
+            ram_offset += len;
+            continue;
+        }
+        if (!skip && node_pin[i] >= 0) {
+            nodemask_zero(&nodemask);
+            nodemask_set_compat(&nodemask, node_pin[i]);
+               if (mbind(ram_ptr + ram_offset, len, MPOL_BIND,
+                   nodemask.n, NUMA_NUM_NODES, 0)) {
+                       perror("mbind");
+            }
+        }
+        ram_offset += len;
+        if (below_4g && ram_offset >= border_4g)
+            return;
+    }
+#endif
+    return;
+}
+
 void pc_memory_init(ram_addr_t ram_size,
                     const char *kernel_filename,
                     const char *kernel_cmdline,
@@ -906,6 +954,8 @@ void pc_memory_init(ram_addr_t ram_size,
                  below_4g_mem_size - 0x100000,
                  ram_addr + 0x100000);
 
+    bind_numa(ram_addr, below_4g_mem_size, 1);
+
     /* above 4giga memory allocation */
     if (above_4g_mem_size > 0) {
 #if TARGET_PHYS_ADDR_BITS == 32
@@ -915,6 +965,7 @@ void pc_memory_init(ram_addr_t ram_size,
         cpu_register_physical_memory(0x100000000ULL,
                                      above_4g_mem_size,
                                      ram_addr);
+        bind_numa(ram_addr, below_4g_mem_size, 0);
 #endif
     }
 
-- 
1.6.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to