Hello Terry,
Here's a patch that should help. It cleans the code and makes all arrays
dynamic. I artificially set the initial array sizes to 4 to experience
the code on our 24-way T1 machine. I will set it to 256 or so in the
final commit. Please let me know if it helps on your 1440-way machine.
Brice
diff --git a/include/hwloc/rename.h b/include/hwloc/rename.h
index a938811..129926c 100644
--- a/include/hwloc/rename.h
+++ b/include/hwloc/rename.h
@@ -489,7 +489,6 @@ extern "C" {
#define hwloc_bitmap_printf_value HWLOC_NAME(bitmap_printf_value)
#define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object)
#define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object)
-#define hwloc_setup_level HWLOC_NAME(setup_level)
#define hwloc_alloc_heap HWLOC_NAME(alloc_heap)
#define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap)
diff --git a/include/private/private.h b/include/private/private.h
index 648058e..6767960 100644
--- a/include/private/private.h
+++ b/include/private/private.h
@@ -302,39 +302,6 @@ hwloc_alloc_setup_object(hwloc_obj_type_t type, signed idx)
}
extern void hwloc_free_unlinked_object(hwloc_obj_t obj);
-
-#define hwloc_object_cpuset_from_array(l, _value, _array, _max) do { \
- struct hwloc_obj *__l = (l); \
- unsigned int *__a = (_array); \
- int k; \
- __l->cpuset = hwloc_bitmap_alloc(); \
- for(k=0; k<_max; k++) \
- if (__a[k] == _value) \
- hwloc_bitmap_set(__l->cpuset, k); \
- } while (0)
-
-/* Configures an array of NUM objects of type TYPE with physical IDs OSPHYSIDS
- * and for which processors have ID PROC_PHYSIDS, and add them to the topology.
- * */
-static __hwloc_inline void
-hwloc_setup_level(int procid_max, unsigned num, unsigned *osphysids, unsigned *proc_physids, struct hwloc_topology *topology, hwloc_obj_type_t type)
-{
- struct hwloc_obj *obj;
- unsigned j;
-
- hwloc_debug("%d %s\n", num, hwloc_obj_type_string(type));
-
- for (j = 0; j < num; j++)
- {
- obj = hwloc_alloc_setup_object(type, osphysids[j]);
- hwloc_object_cpuset_from_array(obj, j, proc_physids, procid_max);
- hwloc_debug_2args_bitmap("%s %d has cpuset %s\n",
- hwloc_obj_type_string(type),
- j, obj->cpuset);
- hwloc_insert_object_by_cpuset(topology, obj);
- }
- hwloc_debug("%s", "\n");
-}
#endif
/* This can be used for the alloc field to get allocated data that can be freed by free() */
diff --git a/src/topology-solaris.c b/src/topology-solaris.c
index 9758955..c49bbf5 100644
--- a/src/topology-solaris.c
+++ b/src/topology-solaris.c
@@ -438,7 +438,6 @@ hwloc_look_lgrp(struct hwloc_topology *topology)
#ifdef HAVE_LIBKSTAT
#include <kstat.h>
-#define HWLOC_NBMAXCPUS 1024 /* FIXME: drop */
static int
hwloc_look_kstat(struct hwloc_topology *topology)
{
@@ -451,38 +450,48 @@ hwloc_look_kstat(struct hwloc_topology *topology)
kstat_named_t *stat;
unsigned look_cores = 1, look_chips = 1;
- unsigned numsockets = 0;
- unsigned proc_physids[HWLOC_NBMAXCPUS];
- unsigned proc_osphysids[HWLOC_NBMAXCPUS];
- unsigned osphysids[HWLOC_NBMAXCPUS];
-
- unsigned numcores = 0;
- unsigned proc_coreids[HWLOC_NBMAXCPUS];
- unsigned oscoreids[HWLOC_NBMAXCPUS];
-
- unsigned core_osphysids[HWLOC_NBMAXCPUS];
-
- unsigned numprocs = 0;
- unsigned proc_procids[HWLOC_NBMAXCPUS];
- unsigned osprocids[HWLOC_NBMAXCPUS];
-
- unsigned physid, coreid, cpuid;
- unsigned procid_max = 0;
+ unsigned Pproc_max = 0;
+ unsigned Pproc_alloc = 4;
+ struct hwloc_solaris_Pproc {
+ unsigned Lsock, Psock, Lcore, Lproc;
+ } * Pproc = malloc(Pproc_alloc * sizeof(*Pproc));
+
+ unsigned Lproc_num = 0;
+ unsigned Lproc_alloc = 4;
+ struct hwloc_solaris_Lproc {
+ unsigned Pproc;
+ } * Lproc = malloc(Lproc_alloc * sizeof(*Lproc));
+
+ unsigned Lcore_num = 0;
+ unsigned Lcore_alloc = 4;
+ struct hwloc_solaris_Lcore {
+ unsigned Pcore, Psock;
+ } * Lcore = malloc(Lcore_alloc * sizeof(*Lcore));
+
+ unsigned Lsock_num = 0;
+ unsigned Lsock_alloc = 4;
+ struct hwloc_solaris_Lsock {
+ unsigned Psock;
+ } * Lsock = malloc(Lsock_alloc * sizeof(*Lsock));
+
+ unsigned sockid, coreid, cpuid;
unsigned i;
- for (cpuid = 0; cpuid < HWLOC_NBMAXCPUS; cpuid++)
- {
- proc_procids[cpuid] = -1;
- proc_physids[cpuid] = -1;
- proc_osphysids[cpuid] = -1;
- proc_coreids[cpuid] = -1;
- }
+ for (i = 0; i < Pproc_alloc; i++) {
+ Pproc[i].Lproc = -1;
+ Pproc[i].Lsock = -1;
+ Pproc[i].Psock = -1;
+ Pproc[i].Lcore = -1;
+ }
- if (!kc)
- {
- hwloc_debug("kstat_open failed: %s\n", strerror(errno));
- return 0;
- }
+ if (!kc) {
+ hwloc_debug("kstat_open failed: %s\n", strerror(errno));
+ free(Pproc);
+ free(Lproc);
+ free(Lcore);
+ free(Lsock);
+ return 0;
+ }
for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next)
{
@@ -490,11 +499,6 @@ hwloc_look_kstat(struct hwloc_topology *topology)
continue;
cpuid = ksp->ks_instance;
- if (cpuid > HWLOC_NBMAXCPUS)
- {
- fprintf(stderr,"CPU id too big: %u\n", cpuid);
- continue;
- }
if (kstat_read(kc, ksp, NULL) == -1)
{
@@ -503,12 +507,28 @@ hwloc_look_kstat(struct hwloc_topology *topology)
}
hwloc_debug("cpu%u\n", cpuid);
- proc_procids[cpuid] = numprocs;
- osprocids[numprocs] = cpuid;
- numprocs++;
- if (cpuid >= procid_max)
- procid_max = cpuid + 1;
+ if (cpuid >= Pproc_alloc) {
+ Pproc_alloc *= 2;
+ Pproc = realloc(Pproc, Pproc_alloc * sizeof(*Pproc));
+ for(i = Pproc_alloc/2; i < Pproc_alloc; i++) {
+ Pproc[i].Lproc = -1;
+ Pproc[i].Lsock = -1;
+ Pproc[i].Psock = -1;
+ Pproc[i].Lcore = -1;
+ }
+ }
+ Pproc[cpuid].Lproc = Lproc_num;
+
+ if (Lproc_num >= Lproc_alloc) {
+ Lproc_alloc *= 2;
+ Lproc = realloc(Lproc, Lproc_alloc * sizeof(*Lproc));
+ }
+ Lproc[Lproc_num].Pproc = cpuid;
+ Lproc_num++;
+
+ if (cpuid >= Pproc_max)
+ Pproc_max = cpuid + 1;
stat = (kstat_named_t *) kstat_data_lookup(ksp, "state");
if (!stat)
@@ -528,7 +548,7 @@ hwloc_look_kstat(struct hwloc_topology *topology)
stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id");
if (!stat)
{
- if (numsockets)
+ if (Lsock_num)
fprintf(stderr, "could not read socket id for CPU%u: %s\n", cpuid, strerror(errno));
else
hwloc_debug("could not read socket id for CPU%u: %s\n", cpuid, strerror(errno));
@@ -537,17 +557,17 @@ hwloc_look_kstat(struct hwloc_topology *topology)
}
switch (stat->data_type) {
case KSTAT_DATA_INT32:
- physid = stat->value.i32;
+ sockid = stat->value.i32;
break;
case KSTAT_DATA_UINT32:
- physid = stat->value.ui32;
+ sockid = stat->value.ui32;
break;
#ifdef _INT64_TYPE
case KSTAT_DATA_UINT64:
- physid = stat->value.ui64;
+ sockid = stat->value.ui64;
break;
case KSTAT_DATA_INT64:
- physid = stat->value.i64;
+ sockid = stat->value.i64;
break;
#endif
default:
@@ -555,14 +575,19 @@ hwloc_look_kstat(struct hwloc_topology *topology)
look_chips = 0;
continue;
}
- proc_osphysids[cpuid] = physid;
- for (i = 0; i < numsockets; i++)
- if (physid == osphysids[i])
+ Pproc[cpuid].Psock = sockid;
+ for (i = 0; i < Lsock_num; i++)
+ if (sockid == Lsock[i].Psock)
break;
- proc_physids[cpuid] = i;
- hwloc_debug("%u on socket %u (%u)\n", cpuid, i, physid);
- if (i == numsockets)
- osphysids[numsockets++] = physid;
+ Pproc[cpuid].Lsock = i;
+ hwloc_debug("%u on socket %u (%u)\n", cpuid, i, sockid);
+ if (i == Lsock_num) {
+ if (Lsock_num == Lsock_alloc) {
+ Lsock_alloc *= 2;
+ Lsock = realloc(Lsock, Lsock_alloc * sizeof(*Lsock));
+ }
+ Lsock[Lsock_num++].Psock = sockid;
+ }
} while(0);
if (look_cores) do {
@@ -570,7 +595,7 @@ hwloc_look_kstat(struct hwloc_topology *topology)
stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id");
if (!stat)
{
- if (numcores)
+ if (Lcore_num)
fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
else
hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
@@ -597,16 +622,19 @@ hwloc_look_kstat(struct hwloc_topology *topology)
look_cores = 0;
continue;
}
- for (i = 0; i < numcores; i++)
- if (coreid == oscoreids[i] && proc_osphysids[cpuid] == core_osphysids[i])
+ for (i = 0; i < Lcore_num; i++)
+ if (coreid == Lcore[i].Pcore && Pproc[cpuid].Psock == Lcore[i].Psock)
break;
- proc_coreids[cpuid] = i;
+ Pproc[cpuid].Lcore = i;
hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid);
- if (i == numcores)
- {
- core_osphysids[numcores] = proc_osphysids[cpuid];
- oscoreids[numcores++] = coreid;
+ if (i == Lcore_num) {
+ if (Lcore_num == Lcore_alloc) {
+ Lcore_alloc *= 2;
+ Lcore = realloc(Lcore, Lcore_alloc * sizeof(*Lcore));
}
+ Lcore[Lcore_num].Psock = Pproc[cpuid].Psock;
+ Lcore[Lcore_num++].Pcore = coreid;
+ }
} while(0);
/* Note: there is also clog_id for the Thread ID (not unique) and
@@ -616,30 +644,63 @@ hwloc_look_kstat(struct hwloc_topology *topology)
if (look_chips) {
struct hwloc_obj *obj;
- unsigned j;
- hwloc_debug("%d Sockets\n", numsockets);
- for (j = 0; j < numsockets; j++) {
- obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, osphysids[j]);
+ unsigned j,k;
+ hwloc_debug("%d Sockets\n", Lsock_num);
+ for (j = 0; j < Lsock_num; j++) {
+ obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, Lsock[j].Psock);
if (CPUType)
hwloc_obj_add_info(obj, "CPUType", CPUType);
if (CPUModel)
hwloc_obj_add_info(obj, "CPUModel", CPUModel);
- hwloc_object_cpuset_from_array(obj, j, proc_physids, procid_max);
+ obj->cpuset = hwloc_bitmap_alloc();
+ for(k=0; k<Pproc_max; k++)
+ if (Pproc[k].Lsock == j)
+ hwloc_bitmap_set(obj->cpuset, k);
hwloc_debug_1arg_bitmap("Socket %d has cpuset %s\n", j, obj->cpuset);
hwloc_insert_object_by_cpuset(topology, obj);
}
hwloc_debug("%s", "\n");
}
- if (look_cores)
- hwloc_setup_level(procid_max, numcores, oscoreids, proc_coreids, topology, HWLOC_OBJ_CORE);
-
- if (numprocs)
- hwloc_setup_level(procid_max, numprocs, osprocids, proc_procids, topology, HWLOC_OBJ_PU);
+ if (look_cores) {
+ struct hwloc_obj *obj;
+ unsigned j,k;
+ hwloc_debug("%d Cores\n", Lcore_num);
+ for (j = 0; j < Lcore_num; j++) {
+ obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, Lcore[j].Pcore);
+ obj->cpuset = hwloc_bitmap_alloc();
+ for(k=0; k<Pproc_max; k++)
+ if (Pproc[k].Lcore == j)
+ hwloc_bitmap_set(obj->cpuset, k);
+ hwloc_debug_1arg_bitmap("Core %d has cpuset %s\n", j, obj->cpuset);
+ hwloc_insert_object_by_cpuset(topology, obj);
+ }
+ hwloc_debug("%s", "\n");
+ }
+ if (Lproc_num) {
+ struct hwloc_obj *obj;
+ unsigned j,k;
+ hwloc_debug("%d PUs\n", Lproc_num);
+ for (j = 0; j < Lproc_num; j++) {
+ obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, Lproc[j].Pproc);
+ obj->cpuset = hwloc_bitmap_alloc();
+ for(k=0; k<Pproc_max; k++)
+ if (Pproc[k].Lproc == j)
+ hwloc_bitmap_set(obj->cpuset, k);
+ hwloc_debug_1arg_bitmap("PU %d has cpuset %s\n", j, obj->cpuset);
+ hwloc_insert_object_by_cpuset(topology, obj);
+ }
+ hwloc_debug("%s", "\n");
+ }
kstat_close(kc);
- return numprocs > 0;
+ free(Pproc);
+ free(Lproc);
+ free(Lcore);
+ free(Lsock);
+
+ return Lproc_num > 0;
}
#endif /* LIBKSTAT */