numaX: 
cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]

example:
-------
sockets:4
cores:2
memory:4096
numa: 1
numa0: cpus=0-1,memory=1024,hostnodes=0-1,policy=interleave
numa1: cpus=2-3,memory=3072,hostnodes=2,policy=bind

qemu command line
-----------------
-object 
memory-backend-ram,size=1024M,policy=interleave,host-nodes=0-1,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0

-object memory-backend-ram,size=3072M,policy=bind,host-nodes=2,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1

Signed-off-by: Alexandre Derumier <aderum...@odiso.com>
---
 PVE/QemuServer.pm |  114 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 107 insertions(+), 7 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 94fd523..b758016 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -489,6 +489,19 @@ my $MAX_UNUSED_DISKS = 8;
 my $MAX_HOSTPCI_DEVICES = 4;
 my $MAX_SERIAL_PORTS = 4;
 my $MAX_PARALLEL_PORTS = 3;
+my $MAX_NUMA = 8;
+
+my $numadesc = {
+    optional => 1,
+    type => 'string', format => 'pve-qm-numanode',
+    typetext => 
"cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]",
+    description => "numa topology",
+};
+PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
+
+for (my $i = 0; $i < $MAX_NUMA; $i++)  {
+    $confdesc->{"numa$i"} = $numadesc;
+}
 
 my $nic_model_list = ['rtl8139', 'ne2k_pci', 'e1000',  'pcnet',  'virtio',
                      'ne2k_isa', 'i82551', 'i82557b', 'i82559er', 'vmxnet3'];
@@ -1278,6 +1291,31 @@ sub drive_is_cdrom {
 
 }
 
+sub parse_numa {
+    my ($data) = @_;
+
+    my $res = {};
+
+    foreach my $kvp (split(/,/, $data)) {
+
+       if ($kvp =~ m/^memory=(\S+)$/) {
+           $res->{memory} = $1;
+       } elsif ($kvp =~ m/^policy=(preferred|bind|interleave)$/) {
+           $res->{policy} = $1;
+       } elsif ($kvp =~ m/^cpus=(\d+)(-(\d+))?$/) {
+           $res->{cpus}->{start} = $1;
+           $res->{cpus}->{end} = $3;
+       } elsif ($kvp =~ m/^hostnodes=(\d+)(-(\d+))?$/) {
+           $res->{hostnodes}->{start} = $1;
+           $res->{hostnodes}->{end} = $3;
+       } else {
+           return undef;
+       }
+    }
+
+    return $res;
+}
+
 sub parse_hostpci {
     my ($value) = @_;
 
@@ -1458,6 +1496,17 @@ sub verify_bootdisk {
     die "invalid boot disk '$value'\n";
 }
 
+PVE::JSONSchema::register_format('pve-qm-numanode', \&verify_numa);
+sub verify_numa {
+    my ($value, $noerr) = @_;
+
+    return $value if parse_numa($value);
+
+    return undef if $noerr;
+
+    die "unable to parse numa options\n";
+}
+
 PVE::JSONSchema::register_format('pve-qm-net', \&verify_net);
 sub verify_net {
     my ($value, $noerr) = @_;
@@ -2697,18 +2746,69 @@ sub config_to_command {
 
     if($conf->{numa}){
 
-       my $numa_memory = ($memory / $sockets)."M";
+       my $numa_totalmemory = undef;
+       for (my $i = 0; $i < $MAX_NUMA; $i++) {
+           next if !$conf->{"numa$i"};
+           my $numa = parse_numa($conf->{"numa$i"});
+           next if !$numa;
+           #memory
+           die "missing numa node$i memory value" if !$numa->{memory};
+           my $numa_memory = $numa->{memory};
+           $numa_totalmemory += $numa_memory;
+           my $numa_object = 
"memory-backend-ram,id=ram-node$i,size=$numa_memory"."M";
+
+           #cpus
+           my $cpus_start = $numa->{cpus}->{start};
+           die "missing numa node$i cpus" if !defined($cpus_start);
+           my $cpus_end = $numa->{cpus}->{end} if 
defined($numa->{cpus}->{end});
+           my $cpus = $cpus_start;
+           if (defined($cpus_end)) {
+               $cpus .= "-$cpus_end";
+               die "numa node$i :  cpu range $cpus is incorrect" if $cpus_end 
<= $cpus_start;
+           }
 
-       for (my $i = 0; $i < $sockets; $i++)  {
+           #hostnodes
+           my $hostnodes_start = $numa->{hostnodes}->{start};
+           if (defined($hostnodes_start)) {
+               my $hostnodes_end = $numa->{hostnodes}->{end} if 
defined($numa->{hostnodes}->{end});
+               my $hostnodes = $hostnodes_start;
+               if (defined($hostnodes_end)) {
+                   $hostnodes .= "-$hostnodes_end";
+                   die "host node $hostnodes range is incorrect" if 
$hostnodes_end <= $hostnodes_start;
+               }
 
-           my $cpustart = ($cores * $i);
-           my $cpuend = ($cpustart + $cores - 1) if $cores && $cores > 1;
-           my $cpus = $cpustart;
-           $cpus .= "-$cpuend" if $cpuend;
+               my $hostnodes_end_range = defined($hostnodes_end) ? 
$hostnodes_end : $hostnodes_start;
+               for (my $i = $hostnodes_start; $i <= $hostnodes_end_range; $i++ 
) {
+                   die "host numa node$i don't exist" if !(-d 
"/sys/devices/system/node/node$i/");
+               }
 
-           push @$cmd, '-object', 
"memory-backend-ram,size=$numa_memory,id=ram-node$i";
+               #policy
+               my $policy = $numa->{policy};
+               die "you need to define a policy for hostnode $hostnodes" if 
!$policy;
+               $numa_object .= ",host-nodes=$hostnodes,policy=$policy";        
+           }
+
+           push @$cmd, '-object', $numa_object;
            push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
        }
+       die "total memory for NUMA nodes must be equal to vm memory" if 
$numa_totalmemory && $numa_totalmemory != $memory;
+
+       #if no custom tology, we split memory and cores across numa nodes
+       if(!$numa_totalmemory) {
+
+           my $numa_memory = ($memory / $sockets)."M";
+
+           for (my $i = 0; $i < $sockets; $i++)  {
+
+               my $cpustart = ($cores * $i);
+               my $cpuend = ($cpustart + $cores - 1) if $cores && $cores > 1;
+               my $cpus = $cpustart;
+               $cpus .= "-$cpuend" if $cpuend;
+
+               push @$cmd, '-object', 
"memory-backend-ram,size=$numa_memory,id=ram-node$i";
+               push @$cmd, '-numa', 
"node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+           }
+       }
     }
 
     push @$cmd, '-S' if $conf->{freeze};
-- 
1.7.10.4

_______________________________________________
pve-devel mailing list
pve-devel@pve.proxmox.com
http://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to