Currently both of these sysctls make a copy to userspace for each index of
various query arrays. We should try to copy whole arrays instead.

This requires some changes in sysctl's public data structure, thus bump
interface version.

Report topology for all present (not just online) cpus.

Rename xen_sysctl_topologyinfo and XEN_SYSCTL_topologyinfo to reflect the fact
that these are used for CPU topology. Subsequent patch will add support for
PCI topology sysctl.

Clarify some somments in sysctl.h.

Signed-off-by: Boris Ostrovsky <boris.ostrov...@oracle.com>
---
 tools/libxc/include/xenctrl.h     |    4 +-
 tools/libxc/xc_misc.c             |   10 ++--
 tools/libxl/libxl.c               |   71 +++++++------------
 tools/misc/xenpm.c                |   69 +++++++-----------
 tools/python/xen/lowlevel/xc/xc.c |   77 ++++++++------------
 xen/common/sysctl.c               |  141 ++++++++++++++++++++++---------------
 xen/include/public/sysctl.h       |   75 ++++++++++++--------
 7 files changed, 221 insertions(+), 226 deletions(-)

diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 0ad8b8d..0cb6743 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -1226,7 +1226,7 @@ int xc_readconsolering(xc_interface *xch,
 int xc_send_debug_keys(xc_interface *xch, char *keys);
 
 typedef xen_sysctl_physinfo_t xc_physinfo_t;
-typedef xen_sysctl_topologyinfo_t xc_topologyinfo_t;
+typedef xen_sysctl_cputopoinfo_t xc_cputopoinfo_t;
 typedef xen_sysctl_numainfo_t xc_numainfo_t;
 
 typedef uint32_t xc_cpu_to_node_t;
@@ -1237,7 +1237,7 @@ typedef uint64_t xc_node_to_memfree_t;
 typedef uint32_t xc_node_to_node_dist_t;
 
 int xc_physinfo(xc_interface *xch, xc_physinfo_t *info);
-int xc_topologyinfo(xc_interface *xch, xc_topologyinfo_t *info);
+int xc_cputopoinfo(xc_interface *xch, xc_cputopoinfo_t *info);
 int xc_numainfo(xc_interface *xch, xc_numainfo_t *info);
 
 int xc_sched_id(xc_interface *xch,
diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
index e253a58..be68291 100644
--- a/tools/libxc/xc_misc.c
+++ b/tools/libxc/xc_misc.c
@@ -177,20 +177,20 @@ int xc_physinfo(xc_interface *xch,
     return 0;
 }
 
-int xc_topologyinfo(xc_interface *xch,
-                xc_topologyinfo_t *put_info)
+int xc_cputopoinfo(xc_interface *xch,
+                   xc_cputopoinfo_t *put_info)
 {
     int ret;
     DECLARE_SYSCTL;
 
-    sysctl.cmd = XEN_SYSCTL_topologyinfo;
+    sysctl.cmd = XEN_SYSCTL_cputopoinfo;
 
-    memcpy(&sysctl.u.topologyinfo, put_info, sizeof(*put_info));
+    memcpy(&sysctl.u.cputopoinfo, put_info, sizeof(*put_info));
 
     if ( (ret = do_sysctl(xch, &sysctl)) != 0 )
         return ret;
 
-    memcpy(put_info, &sysctl.u.topologyinfo, sizeof(*put_info));
+    memcpy(put_info, &sysctl.u.cputopoinfo, sizeof(*put_info));
 
     return 0;
 }
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 50a8928..b05c0bb 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -5072,10 +5072,8 @@ int libxl_get_physinfo(libxl_ctx *ctx, libxl_physinfo 
*physinfo)
 libxl_cputopology *libxl_get_cpu_topology(libxl_ctx *ctx, int *nb_cpu_out)
 {
     GC_INIT(ctx);
-    xc_topologyinfo_t tinfo;
-    DECLARE_HYPERCALL_BUFFER(xc_cpu_to_core_t, coremap);
-    DECLARE_HYPERCALL_BUFFER(xc_cpu_to_socket_t, socketmap);
-    DECLARE_HYPERCALL_BUFFER(xc_cpu_to_node_t, nodemap);
+    xc_cputopoinfo_t tinfo;
+    DECLARE_HYPERCALL_BUFFER(xen_sysctl_cputopo_t, cputopo);
     libxl_cputopology *ret = NULL;
     int i;
     int max_cpus;
@@ -5088,24 +5086,18 @@ libxl_cputopology *libxl_get_cpu_topology(libxl_ctx 
*ctx, int *nb_cpu_out)
         goto out;
     }
 
-    coremap = xc_hypercall_buffer_alloc
-        (ctx->xch, coremap, sizeof(*coremap) * max_cpus);
-    socketmap = xc_hypercall_buffer_alloc
-        (ctx->xch, socketmap, sizeof(*socketmap) * max_cpus);
-    nodemap = xc_hypercall_buffer_alloc
-        (ctx->xch, nodemap, sizeof(*nodemap) * max_cpus);
-    if ((coremap == NULL) || (socketmap == NULL) || (nodemap == NULL)) {
+    cputopo = xc_hypercall_buffer_alloc(ctx->xch, cputopo,
+                                        sizeof(*cputopo) * max_cpus);
+    if (cputopo == NULL) {
         LIBXL__LOG_ERRNOVAL(ctx, XTL_ERROR, ENOMEM,
                             "Unable to allocate hypercall arguments");
         goto fail;
     }
 
-    set_xen_guest_handle(tinfo.cpu_to_core, coremap);
-    set_xen_guest_handle(tinfo.cpu_to_socket, socketmap);
-    set_xen_guest_handle(tinfo.cpu_to_node, nodemap);
+    set_xen_guest_handle(tinfo.cputopo, cputopo);
     tinfo.max_cpu_index = max_cpus - 1;
-    if (xc_topologyinfo(ctx->xch, &tinfo) != 0) {
-        LIBXL__LOG_ERRNO(ctx, XTL_ERROR, "Topology info hypercall failed");
+    if (xc_cputopoinfo(ctx->xch, &tinfo) != 0) {
+        LIBXL__LOG_ERRNO(ctx, XTL_ERROR, "CPU topology info hypercall failed");
         goto fail;
     }
 
@@ -5115,18 +5107,16 @@ libxl_cputopology *libxl_get_cpu_topology(libxl_ctx 
*ctx, int *nb_cpu_out)
     ret = libxl__zalloc(NOGC, sizeof(libxl_cputopology) * max_cpus);
 
     for (i = 0; i < max_cpus; i++) {
-#define V(map, i) (map[i] == INVALID_TOPOLOGY_ID) ? \
-    LIBXL_CPUTOPOLOGY_INVALID_ENTRY : map[i]
-        ret[i].core = V(coremap, i);
-        ret[i].socket = V(socketmap, i);
-        ret[i].node = V(nodemap, i);
+#define V(map, i, invalid) ( cputopo[i].map == invalid) ? \
+    LIBXL_CPUTOPOLOGY_INVALID_ENTRY :  cputopo[i].map
+        ret[i].core = V(core, i, INVALID_CORE_ID);
+        ret[i].socket = V(socket, i, INVALID_SOCKET_ID);
+        ret[i].node = V(node, i, INVALID_NODE_ID);
 #undef V
     }
 
 fail:
-    xc_hypercall_buffer_free(ctx->xch, coremap);
-    xc_hypercall_buffer_free(ctx->xch, socketmap);
-    xc_hypercall_buffer_free(ctx->xch, nodemap);
+    xc_hypercall_buffer_free(ctx->xch, cputopo);
 
     if (ret)
         *nb_cpu_out = max_cpus;
@@ -5139,9 +5129,8 @@ libxl_numainfo *libxl_get_numainfo(libxl_ctx *ctx, int 
*nr)
 {
     GC_INIT(ctx);
     xc_numainfo_t ninfo;
-    DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, memsize);
-    DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, memfree);
-    DECLARE_HYPERCALL_BUFFER(uint32_t, node_dists);
+    DECLARE_HYPERCALL_BUFFER(xen_sysctl_meminfo_t, meminfo);
+    DECLARE_HYPERCALL_BUFFER(uint8_t, distance);
     libxl_numainfo *ret = NULL;
     int i, j, max_nodes;
 
@@ -5153,21 +5142,16 @@ libxl_numainfo *libxl_get_numainfo(libxl_ctx *ctx, int 
*nr)
         goto out;
     }
 
-    memsize = xc_hypercall_buffer_alloc
-        (ctx->xch, memsize, sizeof(*memsize) * max_nodes);
-    memfree = xc_hypercall_buffer_alloc
-        (ctx->xch, memfree, sizeof(*memfree) * max_nodes);
-    node_dists = xc_hypercall_buffer_alloc
-        (ctx->xch, node_dists, sizeof(*node_dists) * max_nodes * max_nodes);
-    if ((memsize == NULL) || (memfree == NULL) || (node_dists == NULL)) {
+    meminfo = xc_hypercall_buffer_alloc(ctx->xch, meminfo, sizeof(*meminfo) * 
max_nodes);
+    distance = xc_hypercall_buffer_alloc(ctx->xch, distance, sizeof(*distance) 
* max_nodes * max_nodes);
+    if ((meminfo == NULL) || (distance == NULL)) {
         LIBXL__LOG_ERRNOVAL(ctx, XTL_ERROR, ENOMEM,
                             "Unable to allocate hypercall arguments");
         goto fail;
     }
 
-    set_xen_guest_handle(ninfo.node_to_memsize, memsize);
-    set_xen_guest_handle(ninfo.node_to_memfree, memfree);
-    set_xen_guest_handle(ninfo.node_to_node_distance, node_dists);
+    set_xen_guest_handle(ninfo.meminfo, meminfo);
+    set_xen_guest_handle(ninfo.distance, distance);
     ninfo.max_node_index = max_nodes - 1;
     if (xc_numainfo(ctx->xch, &ninfo) != 0) {
         LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "getting numainfo");
@@ -5181,23 +5165,22 @@ libxl_numainfo *libxl_get_numainfo(libxl_ctx *ctx, int 
*nr)
 
     ret = libxl__zalloc(NOGC, sizeof(libxl_numainfo) * max_nodes);
     for (i = 0; i < max_nodes; i++)
-        ret[i].dists = libxl__calloc(NOGC, max_nodes, sizeof(*node_dists));
+        ret[i].dists = libxl__calloc(NOGC, max_nodes, sizeof(*distance));
 
     for (i = 0; i < max_nodes; i++) {
-#define V(mem, i) (mem[i] == INVALID_NUMAINFO_ID) ? \
-    LIBXL_NUMAINFO_INVALID_ENTRY : mem[i]
+#define V(mem, i) (meminfo[i].mem == INVALID_MEM_SZ) ? \
+    LIBXL_NUMAINFO_INVALID_ENTRY : meminfo[i].mem
         ret[i].size = V(memsize, i);
         ret[i].free = V(memfree, i);
         ret[i].num_dists = max_nodes;
         for (j = 0; j < ret[i].num_dists; j++)
-            ret[i].dists[j] = V(node_dists, i * max_nodes + j);
+            ret[i].dists[j] = distance[i*max_nodes + j];
 #undef V
     }
 
  fail:
-    xc_hypercall_buffer_free(ctx->xch, memsize);
-    xc_hypercall_buffer_free(ctx->xch, memfree);
-    xc_hypercall_buffer_free(ctx->xch, node_dists);
+    xc_hypercall_buffer_free(ctx->xch, meminfo);
+    xc_hypercall_buffer_free(ctx->xch, distance);
 
  out:
     GC_FREE;
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index e43924c..f7fe620 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -355,16 +355,13 @@ static void signal_int_handler(int signo)
     int i, j, k;
     struct timeval tv;
     int cx_cap = 0, px_cap = 0;
-    DECLARE_HYPERCALL_BUFFER(uint32_t, cpu_to_core);
-    DECLARE_HYPERCALL_BUFFER(uint32_t, cpu_to_socket);
-    DECLARE_HYPERCALL_BUFFER(uint32_t, cpu_to_node);
-    xc_topologyinfo_t info = { 0 };
+    DECLARE_HYPERCALL_BUFFER(xen_sysctl_cputopo_t, cputopo);
+    xc_cputopoinfo_t info = { 0 };
 
-    cpu_to_core = xc_hypercall_buffer_alloc(xc_handle, cpu_to_core, 
sizeof(*cpu_to_core) * MAX_NR_CPU);
-    cpu_to_socket = xc_hypercall_buffer_alloc(xc_handle, cpu_to_socket, 
sizeof(*cpu_to_socket) * MAX_NR_CPU);
-    cpu_to_node = xc_hypercall_buffer_alloc(xc_handle, cpu_to_node, 
sizeof(*cpu_to_node) * MAX_NR_CPU);
+    cputopo = xc_hypercall_buffer_alloc(xc_handle, cputopo,
+                                        sizeof(*cputopo) * MAX_NR_CPU);
 
-    if ( cpu_to_core == NULL || cpu_to_socket == NULL || cpu_to_node == NULL )
+    if ( cputopo == NULL )
     {
        fprintf(stderr, "failed to allocate hypercall buffers\n");
        goto out;
@@ -448,12 +445,10 @@ static void signal_int_handler(int signo)
             printf("  Avg freq\t%d\tKHz\n", avgfreq[i]);
     }
 
-    set_xen_guest_handle(info.cpu_to_core, cpu_to_core);
-    set_xen_guest_handle(info.cpu_to_socket, cpu_to_socket);
-    set_xen_guest_handle(info.cpu_to_node, cpu_to_node);
+    set_xen_guest_handle(info.cputopo, cputopo);
     info.max_cpu_index = MAX_NR_CPU - 1;
 
-    if ( cx_cap && !xc_topologyinfo(xc_handle, &info) )
+    if ( cx_cap && !xc_cputopoinfo(xc_handle, &info) )
     {
         uint32_t socket_ids[MAX_NR_CPU];
         uint32_t core_ids[MAX_NR_CPU];
@@ -465,8 +460,8 @@ static void signal_int_handler(int signo)
         /* check validity */
         for ( i = 0; i <= info.max_cpu_index; i++ )
         {
-            if ( cpu_to_core[i] == INVALID_TOPOLOGY_ID ||
-                 cpu_to_socket[i] == INVALID_TOPOLOGY_ID )
+            if ( cputopo[i].core == INVALID_CORE_ID ||
+                 cputopo[i].socket == INVALID_SOCKET_ID )
                 break;
         }
         if ( i > info.max_cpu_index )
@@ -475,20 +470,20 @@ static void signal_int_handler(int signo)
             for ( i = 0; i <= info.max_cpu_index; i++ )
             {
                 for ( j = 0; j < socket_nr; j++ )
-                    if ( cpu_to_socket[i] == socket_ids[j] )
+                    if ( cputopo[i].socket == socket_ids[j] )
                         break;
                 if ( j == socket_nr )
                 {
-                    socket_ids[j] = cpu_to_socket[i];
+                    socket_ids[j] = cputopo[i].socket;
                     socket_nr++;
                 }
 
                 for ( j = 0; j < core_nr; j++ )
-                    if ( cpu_to_core[i] == core_ids[j] )
+                    if ( cputopo[i].core == core_ids[j] )
                         break;
                 if ( j == core_nr )
                 {
-                    core_ids[j] = cpu_to_core[i];
+                    core_ids[j] = cputopo[i].core;
                     core_nr++;
                 }
             }
@@ -501,7 +496,7 @@ static void signal_int_handler(int signo)
 
                 for ( j = 0; j <= info.max_cpu_index; j++ )
                 {
-                    if ( cpu_to_socket[j] == socket_ids[i] )
+                    if ( cputopo[j].socket == socket_ids[i] )
                         break;
                 }
                 printf("\nSocket %d\n", socket_ids[i]);
@@ -520,8 +515,8 @@ static void signal_int_handler(int signo)
                 {
                     for ( j = 0; j <= info.max_cpu_index; j++ )
                     {
-                        if ( cpu_to_socket[j] == socket_ids[i] &&
-                             cpu_to_core[j] == core_ids[k] )
+                        if ( cputopo[j].socket == socket_ids[i] &&
+                             cputopo[j].core == core_ids[k] )
                             break;
                     }
                     printf("\t Core %d CPU %d\n", core_ids[k], j);
@@ -556,9 +551,7 @@ static void signal_int_handler(int signo)
     free(sum);
     free(avgfreq);
 out:
-    xc_hypercall_buffer_free(xc_handle, cpu_to_core);
-    xc_hypercall_buffer_free(xc_handle, cpu_to_socket);
-    xc_hypercall_buffer_free(xc_handle, cpu_to_node);
+    xc_hypercall_buffer_free(xc_handle, cputopo);
     xc_interface_close(xc_handle);
     exit(0);
 }
@@ -965,28 +958,22 @@ void scaling_governor_func(int argc, char *argv[])
 
 void cpu_topology_func(int argc, char *argv[])
 {
-    DECLARE_HYPERCALL_BUFFER(uint32_t, cpu_to_core);
-    DECLARE_HYPERCALL_BUFFER(uint32_t, cpu_to_socket);
-    DECLARE_HYPERCALL_BUFFER(uint32_t, cpu_to_node);
-    xc_topologyinfo_t info = { 0 };
+    DECLARE_HYPERCALL_BUFFER(xen_sysctl_cputopo_t, cputopo);
+    xc_cputopoinfo_t info = { 0 };
     int i, rc = ENOMEM;
 
-    cpu_to_core = xc_hypercall_buffer_alloc(xc_handle, cpu_to_core, 
sizeof(*cpu_to_core) * MAX_NR_CPU);
-    cpu_to_socket = xc_hypercall_buffer_alloc(xc_handle, cpu_to_socket, 
sizeof(*cpu_to_socket) * MAX_NR_CPU);
-    cpu_to_node = xc_hypercall_buffer_alloc(xc_handle, cpu_to_node, 
sizeof(*cpu_to_node) * MAX_NR_CPU);
-
-    if ( cpu_to_core == NULL || cpu_to_socket == NULL || cpu_to_node == NULL )
+    cputopo = xc_hypercall_buffer_alloc(xc_handle, cputopo,
+                                        sizeof(*cputopo) * MAX_NR_CPU);
+    if ( cputopo == NULL )
     {
        fprintf(stderr, "failed to allocate hypercall buffers\n");
        goto out;
     }
 
-    set_xen_guest_handle(info.cpu_to_core, cpu_to_core);
-    set_xen_guest_handle(info.cpu_to_socket, cpu_to_socket);
-    set_xen_guest_handle(info.cpu_to_node, cpu_to_node);
+    set_xen_guest_handle(info.cputopo, cputopo);
     info.max_cpu_index = MAX_NR_CPU-1;
 
-    if ( xc_topologyinfo(xc_handle, &info) )
+    if ( xc_cputopoinfo(xc_handle, &info) )
     {
         rc = errno;
         fprintf(stderr, "Cannot get Xen CPU topology (%d - %s)\n",
@@ -1000,16 +987,14 @@ void cpu_topology_func(int argc, char *argv[])
     printf("CPU\tcore\tsocket\tnode\n");
     for ( i = 0; i <= info.max_cpu_index; i++ )
     {
-        if ( cpu_to_core[i] == INVALID_TOPOLOGY_ID )
+        if ( cputopo[i].core == INVALID_CORE_ID )
             continue;
         printf("CPU%d\t %d\t %d\t %d\n",
-               i, cpu_to_core[i], cpu_to_socket[i], cpu_to_node[i]);
+               i, cputopo[i].core, cputopo[i].socket, cputopo[i].node);
     }
     rc = 0;
 out:
-    xc_hypercall_buffer_free(xc_handle, cpu_to_core);
-    xc_hypercall_buffer_free(xc_handle, cpu_to_socket);
-    xc_hypercall_buffer_free(xc_handle, cpu_to_node);
+    xc_hypercall_buffer_free(xc_handle, cputopo);
     if ( rc )
         exit(rc);
 }
diff --git a/tools/python/xen/lowlevel/xc/xc.c 
b/tools/python/xen/lowlevel/xc/xc.c
index 2aa0dc7..4275968 100644
--- a/tools/python/xen/lowlevel/xc/xc.c
+++ b/tools/python/xen/lowlevel/xc/xc.c
@@ -1221,30 +1221,20 @@ static PyObject *pyxc_getcpuinfo(XcObject *self, 
PyObject *args, PyObject *kwds)
 static PyObject *pyxc_topologyinfo(XcObject *self)
 {
 #define MAX_CPU_INDEX 255
-    xc_topologyinfo_t tinfo = { 0 };
+    xc_cputopoinfo_t tinfo = { 0 };
     int i, max_cpu_index;
     PyObject *ret_obj = NULL;
     PyObject *cpu_to_core_obj, *cpu_to_socket_obj, *cpu_to_node_obj;
-    DECLARE_HYPERCALL_BUFFER(xc_cpu_to_core_t, coremap);
-    DECLARE_HYPERCALL_BUFFER(xc_cpu_to_socket_t, socketmap);
-    DECLARE_HYPERCALL_BUFFER(xc_cpu_to_node_t, nodemap);
 
-    coremap = xc_hypercall_buffer_alloc(self->xc_handle, coremap, 
sizeof(*coremap) * (MAX_CPU_INDEX+1));
-    if ( coremap == NULL )
-        goto out;
-    socketmap = xc_hypercall_buffer_alloc(self->xc_handle, socketmap, 
sizeof(*socketmap) * (MAX_CPU_INDEX+1));
-    if ( socketmap == NULL  )
-        goto out;
-    nodemap = xc_hypercall_buffer_alloc(self->xc_handle, nodemap, 
sizeof(*nodemap) * (MAX_CPU_INDEX+1));
-    if ( nodemap == NULL )
-        goto out;
+    DECLARE_HYPERCALL_BUFFER(xen_sysctl_cputopo_t, cputopo);
 
-    set_xen_guest_handle(tinfo.cpu_to_core, coremap);
-    set_xen_guest_handle(tinfo.cpu_to_socket, socketmap);
-    set_xen_guest_handle(tinfo.cpu_to_node, nodemap);
+    cputopo = xc_hypercall_buffer_alloc(self->xc_handle, cputopo, 
sizeof(*cputopo) * (MAX_CPU_INDEX+1));
+    if ( cputopo == NULL )
+       goto out;
+    set_xen_guest_handle(tinfo.cputopo, cputopo);
     tinfo.max_cpu_index = MAX_CPU_INDEX;
 
-    if ( xc_topologyinfo(self->xc_handle, &tinfo) != 0 )
+    if ( xc_cputopoinfo(self->xc_handle, &tinfo) != 0 )
         goto out;
 
     max_cpu_index = tinfo.max_cpu_index;
@@ -1257,35 +1247,35 @@ static PyObject *pyxc_topologyinfo(XcObject *self)
     cpu_to_node_obj = PyList_New(0);
     for ( i = 0; i <= max_cpu_index; i++ )
     {
-        if ( coremap[i] == INVALID_TOPOLOGY_ID )
+        if ( cputopo[i].core == INVALID_CORE_ID )
         {
             PyList_Append(cpu_to_core_obj, Py_None);
         }
         else
         {
-            PyObject *pyint = PyInt_FromLong(coremap[i]);
+            PyObject *pyint = PyInt_FromLong(cputopo[i].core);
             PyList_Append(cpu_to_core_obj, pyint);
             Py_DECREF(pyint);
         }
 
-        if ( socketmap[i] == INVALID_TOPOLOGY_ID )
+        if ( cputopo[i].socket == INVALID_SOCKET_ID )
         {
             PyList_Append(cpu_to_socket_obj, Py_None);
         }
         else
         {
-            PyObject *pyint = PyInt_FromLong(socketmap[i]);
+            PyObject *pyint = PyInt_FromLong(cputopo[i].socket);
             PyList_Append(cpu_to_socket_obj, pyint);
             Py_DECREF(pyint);
         }
 
-        if ( nodemap[i] == INVALID_TOPOLOGY_ID )
+        if ( cputopo[i].node == INVALID_NODE_ID )
         {
             PyList_Append(cpu_to_node_obj, Py_None);
         }
         else
         {
-            PyObject *pyint = PyInt_FromLong(nodemap[i]);
+            PyObject *pyint = PyInt_FromLong(cputopo[i].node);
             PyList_Append(cpu_to_node_obj, pyint);
             Py_DECREF(pyint);
         }
@@ -1303,9 +1293,7 @@ static PyObject *pyxc_topologyinfo(XcObject *self)
     Py_DECREF(cpu_to_node_obj);
 
 out:
-    xc_hypercall_buffer_free(self->xc_handle, coremap);
-    xc_hypercall_buffer_free(self->xc_handle, socketmap);
-    xc_hypercall_buffer_free(self->xc_handle, nodemap);
+    xc_hypercall_buffer_free(self->xc_handle, cputopo);
     return ret_obj ? ret_obj : pyxc_error_to_exception(self->xc_handle);
 #undef MAX_CPU_INDEX
 }
@@ -1314,28 +1302,23 @@ static PyObject *pyxc_numainfo(XcObject *self)
 {
 #define MAX_NODE_INDEX 31
     xc_numainfo_t ninfo = { 0 };
-    int i, j, max_node_index;
+    int i, j, max_node_index, invalid_node;
     uint64_t free_heap;
     PyObject *ret_obj = NULL, *node_to_node_dist_list_obj;
     PyObject *node_to_memsize_obj, *node_to_memfree_obj;
     PyObject *node_to_dma32_mem_obj, *node_to_node_dist_obj;
-    DECLARE_HYPERCALL_BUFFER(xc_node_to_memsize_t, node_memsize);
-    DECLARE_HYPERCALL_BUFFER(xc_node_to_memfree_t, node_memfree);
-    DECLARE_HYPERCALL_BUFFER(xc_node_to_node_dist_t, nodes_dist);
+    DECLARE_HYPERCALL_BUFFER(xen_sysctl_meminfo_t, meminfo);
+    DECLARE_HYPERCALL_BUFFER(uint8_t, distance);
 
-    node_memsize = xc_hypercall_buffer_alloc(self->xc_handle, node_memsize, 
sizeof(*node_memsize)*(MAX_NODE_INDEX+1));
-    if ( node_memsize == NULL )
-        goto out;
-    node_memfree = xc_hypercall_buffer_alloc(self->xc_handle, node_memfree, 
sizeof(*node_memfree)*(MAX_NODE_INDEX+1));
-    if ( node_memfree == NULL )
+    meminfo = xc_hypercall_buffer_alloc(self->xc_handle, meminfo, 
sizeof(*meminfo) * (MAX_NODE_INDEX+1));
+    if ( meminfo == NULL )
         goto out;
-    nodes_dist = xc_hypercall_buffer_alloc(self->xc_handle, nodes_dist, 
sizeof(*nodes_dist)*(MAX_NODE_INDEX+1)*(MAX_NODE_INDEX+1));
-    if ( nodes_dist == NULL )
+    distance = xc_hypercall_buffer_alloc(self->xc_handle, distance, 
sizeof(*distance)*(MAX_NODE_INDEX+1)*(MAX_NODE_INDEX+1));
+    if ( distance == NULL )
         goto out;
 
-    set_xen_guest_handle(ninfo.node_to_memsize, node_memsize);
-    set_xen_guest_handle(ninfo.node_to_memfree, node_memfree);
-    set_xen_guest_handle(ninfo.node_to_node_distance, nodes_dist);
+    set_xen_guest_handle(ninfo.meminfo, meminfo);
+    set_xen_guest_handle(ninfo.distance, distance);
     ninfo.max_node_index = MAX_NODE_INDEX;
 
     if ( xc_numainfo(self->xc_handle, &ninfo) != 0 )
@@ -1355,12 +1338,12 @@ static PyObject *pyxc_numainfo(XcObject *self)
         PyObject *pyint;
 
         /* Total Memory */
-        pyint = PyInt_FromLong(node_memsize[i] >> 20); /* MB */
+        pyint = PyInt_FromLong(meminfo[i].memsize >> 20); /* MB */
         PyList_Append(node_to_memsize_obj, pyint);
         Py_DECREF(pyint);
 
         /* Free Memory */
-        pyint = PyInt_FromLong(node_memfree[i] >> 20); /* MB */
+        pyint = PyInt_FromLong(meminfo[i].memfree >> 20); /* MB */
         PyList_Append(node_to_memfree_obj, pyint);
         Py_DECREF(pyint);
 
@@ -1372,10 +1355,11 @@ static PyObject *pyxc_numainfo(XcObject *self)
 
         /* Node to Node Distance */
         node_to_node_dist_obj = PyList_New(0);
+        invalid_node = (meminfo[i].memsize == INVALID_MEM_SZ);
         for ( j = 0; j <= max_node_index; j++ )
         {
-            uint32_t dist = nodes_dist[i*(max_node_index+1) + j];
-            if ( dist == INVALID_TOPOLOGY_ID )
+            uint32_t dist = distance[i * (max_node_index + 1) + j];
+            if ( invalid_node )
             {
                 PyList_Append(node_to_node_dist_obj, Py_None);
             }
@@ -1406,9 +1390,8 @@ static PyObject *pyxc_numainfo(XcObject *self)
     Py_DECREF(node_to_node_dist_list_obj);
 
 out:
-    xc_hypercall_buffer_free(self->xc_handle, node_memsize);
-    xc_hypercall_buffer_free(self->xc_handle, node_memfree);
-    xc_hypercall_buffer_free(self->xc_handle, nodes_dist);
+    xc_hypercall_buffer_free(self->xc_handle, meminfo);
+    xc_hypercall_buffer_free(self->xc_handle, distance);
     return ret_obj ? ret_obj : pyxc_error_to_exception(self->xc_handle);
 #undef MAX_NODE_INDEX
 }
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 0cb6ee1..30c5806 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -274,85 +274,114 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) 
u_sysctl)
 
     case XEN_SYSCTL_numainfo:
     {
-        uint32_t i, j, max_node_index, last_online_node;
+        uint32_t i, j, num_nodes, last_online_node;
         xen_sysctl_numainfo_t *ni = &op->u.numainfo;
+        xen_sysctl_meminfo_t *meminfo;
+        uint8_t *distance;
 
         last_online_node = last_node(node_online_map);
-        max_node_index = min_t(uint32_t, ni->max_node_index, last_online_node);
-        ni->max_node_index = last_online_node;
+        num_nodes = min_t(uint32_t, ni->max_node_index, last_online_node) + 1;
 
-        for ( i = 0; i <= max_node_index; i++ )
+        if ( guest_handle_is_null(ni->meminfo) ||
+             guest_handle_is_null(ni->distance) )
         {
-            if ( !guest_handle_is_null(ni->node_to_memsize) )
-            {
-                uint64_t memsize = node_online(i) ?
-                                   node_spanned_pages(i) << PAGE_SHIFT : 0ul;
-                if ( copy_to_guest_offset(ni->node_to_memsize, i, &memsize, 1) 
)
-                    break;
-            }
-            if ( !guest_handle_is_null(ni->node_to_memfree) )
-            {
-                uint64_t memfree = node_online(i) ?
-                                   avail_node_heap_pages(i) << PAGE_SHIFT : 
0ul;
-                if ( copy_to_guest_offset(ni->node_to_memfree, i, &memfree, 1) 
)
-                    break;
-            }
+            ret = -EINVAL;
+            break;
+        }
 
-            if ( !guest_handle_is_null(ni->node_to_node_distance) )
+        meminfo = xmalloc_array(xen_sysctl_meminfo_t, num_nodes);
+        distance = xmalloc_array(uint8_t, num_nodes * num_nodes);
+        if ( !meminfo || !distance )
+        {
+            xfree(meminfo);
+            xfree(distance);
+            ret = -ENOMEM;
+            break;
+        }
+
+        for ( i = 0; i < num_nodes; i++ )
+        {
+            if ( node_online(i) )
             {
-                for ( j = 0; j <= max_node_index; j++)
-                {
-                    uint32_t distance = ~0u;
-                    if ( node_online(i) && node_online(j) )
-                        distance = __node_distance(i, j);
-                    if ( copy_to_guest_offset(
-                        ni->node_to_node_distance,
-                        i*(max_node_index+1) + j, &distance, 1) )
-                        break;
-                }
-                if ( j <= max_node_index )
-                    break;
+                meminfo[i].memsize = node_spanned_pages(i) << PAGE_SHIFT;
+                meminfo[i].memfree = avail_node_heap_pages(i) << PAGE_SHIFT;
+                for ( j = 0; j < num_nodes; j++ )
+                    distance[i * num_nodes + j] = __node_distance(i, j);
             }
+            else
+                meminfo[i].memsize = meminfo[i].memfree = INVALID_MEM_SZ;
+        }
+
+        if ( ni->max_node_index != last_online_node )
+        {
+            ni->max_node_index = last_online_node;
+            if ( __copy_field_to_guest(u_sysctl, op,
+                                       u.numainfo.max_node_index) )
+                ret = -EFAULT;
         }
 
-        ret = ((i <= max_node_index) || copy_to_guest(u_sysctl, op, 1))
-            ? -EFAULT : 0;
+        if ( !ret &&
+             (copy_to_guest_offset(ni->meminfo, 0, meminfo, num_nodes) ||
+              copy_to_guest_offset(ni->distance, 0, distance,
+                                   num_nodes * num_nodes)) )
+            ret = -EFAULT;
+
+        xfree(meminfo);
+        xfree(distance);
     }
     break;
 
-    case XEN_SYSCTL_topologyinfo:
+    case XEN_SYSCTL_cputopoinfo:
     {
-        uint32_t i, max_cpu_index, last_online_cpu;
-        xen_sysctl_topologyinfo_t *ti = &op->u.topologyinfo;
+        uint32_t i, num_cpus, last_present_cpu;
+        xen_sysctl_cputopoinfo_t *ti = &op->u.cputopoinfo;
+        struct xen_sysctl_cputopo *cputopo;
 
-        last_online_cpu = cpumask_last(&cpu_online_map);
-        max_cpu_index = min_t(uint32_t, ti->max_cpu_index, last_online_cpu);
-        ti->max_cpu_index = last_online_cpu;
+        if ( guest_handle_is_null(ti->cputopo) )
+        {
+            ret = -EINVAL;
+            break;
+        }
 
-        for ( i = 0; i <= max_cpu_index; i++ )
+        last_present_cpu = cpumask_last(&cpu_present_map);
+        num_cpus = min_t(uint32_t, ti->max_cpu_index, last_present_cpu) + 1;
+
+        cputopo = xmalloc_array(xen_sysctl_cputopo_t, num_cpus);
+        if ( !cputopo )
         {
-            if ( !guest_handle_is_null(ti->cpu_to_core) )
-            {
-                uint32_t core = cpu_online(i) ? cpu_to_core(i) : ~0u;
-                if ( copy_to_guest_offset(ti->cpu_to_core, i, &core, 1) )
-                    break;
-            }
-            if ( !guest_handle_is_null(ti->cpu_to_socket) )
+            ret = -ENOMEM;
+            break;
+        }
+
+        for ( i = 0; i < num_cpus; i++ )
+        {
+            if ( cpu_present(i) )
             {
-                uint32_t socket = cpu_online(i) ? cpu_to_socket(i) : ~0u;
-                if ( copy_to_guest_offset(ti->cpu_to_socket, i, &socket, 1) )
-                    break;
+                cputopo[i].core = cpu_to_core(i);
+                cputopo[i].socket = cpu_to_socket(i);
+                cputopo[i].node = cpu_to_node(i);
             }
-            if ( !guest_handle_is_null(ti->cpu_to_node) )
+            else
             {
-                uint32_t node = cpu_online(i) ? cpu_to_node(i) : ~0u;
-                if ( copy_to_guest_offset(ti->cpu_to_node, i, &node, 1) )
-                    break;
+                cputopo[i].core = INVALID_CORE_ID;
+                cputopo[i].socket = INVALID_SOCKET_ID;
+                cputopo[i].node = INVALID_NODE_ID;
             }
         }
 
-        ret = ((i <= max_cpu_index) || copy_to_guest(u_sysctl, op, 1))
-            ? -EFAULT : 0;
+        if ( ti->max_cpu_index != last_present_cpu )
+        {
+            ti->max_cpu_index = last_present_cpu;
+            if ( __copy_field_to_guest(u_sysctl, op,
+                                       u.cputopoinfo.max_cpu_index) )
+                ret = -EFAULT;
+        }
+
+
+        if ( !ret && copy_to_guest_offset(ti->cputopo, 0, cputopo,  num_cpus) )
+            ret = -EFAULT;
+
+        xfree(cputopo);
     }
     break;
 
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index b3713b3..7c78f81 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -34,7 +34,7 @@
 #include "xen.h"
 #include "domctl.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000B
+#define XEN_SYSCTL_INTERFACE_VERSION 0x0000000C
 
 /*
  * Read console content from Xen buffer ring.
@@ -462,52 +462,67 @@ struct xen_sysctl_lockprof_op {
 typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);
 
-/* XEN_SYSCTL_topologyinfo */
-#define INVALID_TOPOLOGY_ID  (~0U)
-struct xen_sysctl_topologyinfo {
+/* XEN_SYSCTL_cputopoinfo */
+#define INVALID_CORE_ID  (~0U)
+#define INVALID_SOCKET_ID  (~0U)
+#define INVALID_NODE_ID  ((uint8_t)~0)
+
+struct xen_sysctl_cputopo {
+    uint32_t core;
+    uint32_t socket;
+    uint8_t node;
+};
+typedef struct xen_sysctl_cputopo xen_sysctl_cputopo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopo_t);
+
+struct xen_sysctl_cputopoinfo {
     /*
-     * IN: maximum addressable entry in the caller-provided arrays.
+     * IN: maximum addressable entry in the caller-provided cputopo arary
      * OUT: largest cpu identifier in the system.
-     * If OUT is greater than IN then the arrays are truncated!
-     * If OUT is leass than IN then the array tails are not written by sysctl.
      */
     uint32_t max_cpu_index;
 
     /*
-     * If not NULL, these arrays are filled with core/socket/node identifier
-     * for each cpu.
-     * If a cpu has no core/socket/node information (e.g., cpu not present) 
-     * then the sentinel value ~0u is written to each array.
-     * The number of array elements written by the sysctl is:
+     * OUT: core/socket/node identifier for each cpu.
+     * If information for a particular entry is not avalable it is set to
+     * INVALID_CORE/SOCKET/NODE_ID.
+     * The number of array elements for CPU topology written by the sysctl is:
      *   min(@max_cpu_index_IN,@max_cpu_index_OUT)+1
      */
-    XEN_GUEST_HANDLE_64(uint32) cpu_to_core;
-    XEN_GUEST_HANDLE_64(uint32) cpu_to_socket;
-    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
+    XEN_GUEST_HANDLE_64(xen_sysctl_cputopo_t) cputopo;
 };
-typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t;
-DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t);
+typedef struct xen_sysctl_cputopoinfo xen_sysctl_cputopoinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cputopoinfo_t);
 
 /* XEN_SYSCTL_numainfo */
-#define INVALID_NUMAINFO_ID (~0U)
+#define INVALID_MEM_SZ (~0U)
+
+struct xen_sysctl_meminfo {
+    uint64_t memsize;
+    uint64_t memfree;
+};
+typedef struct xen_sysctl_meminfo xen_sysctl_meminfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_meminfo_t);
+
 struct xen_sysctl_numainfo {
     /*
      * IN: maximum addressable entry in the caller-provided arrays.
      * OUT: largest node identifier in the system.
-     * If OUT is greater than IN then the arrays are truncated!
      */
     uint32_t max_node_index;
 
-    /* NB. Entries are 0 if node is not present. */
-    XEN_GUEST_HANDLE_64(uint64) node_to_memsize;
-    XEN_GUEST_HANDLE_64(uint64) node_to_memfree;
+    /*
+     * OUT: max_node_index-sized array. Entries are INVALID_MEM_SZ
+     * if node is not present.
+     */
+    XEN_GUEST_HANDLE_64(xen_sysctl_meminfo_t) meminfo;
 
     /*
-     * Array, of size (max_node_index+1)^2, listing memory access distances
-     * between nodes. If an entry has no node distance information (e.g., node 
-     * not present) then the value ~0u is written.
-     * 
-     * Note that the array rows must be indexed by multiplying by the minimum 
+     * OUT: Array, of size (max_node_index+1)^2, listing memory access
+     * distances between nodes. If an entry has no node distance information
+     * (e.g., node not present) then the value ~0u is written.
+     *
+     * Note that the array rows must be indexed by multiplying by the minimum
      * of the caller-provided max_node_index and the returned value of
      * max_node_index. That is, if the largest node index in the system is
      * smaller than the caller can handle, a smaller 2-d array is constructed
@@ -516,7 +531,7 @@ struct xen_sysctl_numainfo {
      * in the system is larger than the caller can handle, then a 2-d array of
      * the maximum size handleable by the caller is constructed.
      */
-    XEN_GUEST_HANDLE_64(uint32) node_to_node_distance;
+    XEN_GUEST_HANDLE_64(uint8) distance;
 };
 typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t;
 DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t);
@@ -671,7 +686,7 @@ struct xen_sysctl {
 #define XEN_SYSCTL_pm_op                         12
 #define XEN_SYSCTL_page_offline_op               14
 #define XEN_SYSCTL_lockprof_op                   15
-#define XEN_SYSCTL_topologyinfo                  16 
+#define XEN_SYSCTL_cputopoinfo                   16
 #define XEN_SYSCTL_numainfo                      17
 #define XEN_SYSCTL_cpupool_op                    18
 #define XEN_SYSCTL_scheduler_op                  19
@@ -682,7 +697,7 @@ struct xen_sysctl {
         struct xen_sysctl_readconsole       readconsole;
         struct xen_sysctl_tbuf_op           tbuf_op;
         struct xen_sysctl_physinfo          physinfo;
-        struct xen_sysctl_topologyinfo      topologyinfo;
+        struct xen_sysctl_cputopoinfo       cputopoinfo;
         struct xen_sysctl_numainfo          numainfo;
         struct xen_sysctl_sched_id          sched_id;
         struct xen_sysctl_perfc_op          perfc_op;
-- 
1.7.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

Reply via email to