Index: 3rdparty/cpuinfo/src/x86/openbsd/init.c
--- 3rdparty/cpuinfo/src/x86/openbsd/init.c.orig
+++ 3rdparty/cpuinfo/src/x86/openbsd/init.c
@@ -0,0 +1,986 @@
+/*
+ * BSD 3-Clause License
+ *
+ * Copyright (c) 2017, Nan Xiao
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ *   list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of the copyright holder nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h> 
+#include <sys/sysctl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <err.h>
+#include <getopt.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include <x86/api.h>
+
+#if defined(__amd64__) || defined(__i386__)
+#include <cpuid.h>
+#endif
+
+/* macro definitions */
+#define CACHE_SIZE_LEN  (8)
+
+#define ARRAY_LEN(array)    (sizeof(array) / sizeof(array[0]))
+
+#define CPUID_STANDARD_0_MASK   (0x00)
+#define CPUID_STANDARD_1_MASK   (0x01)
+#define CPUID_STANDARD_2_MASK   (0x02)
+#define CPUID_STANDARD_4_MASK   (0x04)
+#define CPUID_STANDARD_7_MASK   (0x07)
+#define CPUID_STANDARD_B_MASK   (0x0B)
+
+#define CPUID_EXTENDED_1_MASK   (0x01)
+#define CPUID_EXTENDED_5_MASK   (0x05)
+#define CPUID_EXTENDED_6_MASK   (0x06)
+#define CPUID_EXTENDED_8_MASK   (0x08)
+#define CPUID_EXTENDED_1E_MASK  (0x1E)
+
+#define CPUID_MAX_STANDARD_FUNCTION (0x17)
+#define CPUID_MAX_EXTENDED_FUNCTION (0x1E)
+
+/* struct definitions */
+typedef struct {
+	char arch[16];
+	int byte_order;
+	char model[128];
+	char vendor[32];
+	int active_cpu_num;
+	int total_cpu_num;
+	int speed;
+} gen_cpu_info;
+
+typedef struct {
+	int mib_code;
+	void *old;
+	size_t old_len;
+	char *err_msg;
+} sysctl_get_cpu_info;
+
+typedef struct {
+	int standard_mask;
+	int extended_mask;
+	int intel_use_leaf_4_get_cache;
+	char vendor[13];
+	unsigned char stepping;
+	unsigned char model;
+	unsigned short family;
+	int threads_per_core;
+	int cores_per_socket;
+	char *l1d_cache;
+	char *l1i_cache;
+	char *l2_cache;
+	char *l3_cache;
+} x86_cpu_info;
+
+/* function declarations */
+#if defined(__amd64__) || defined(__i386__)
+static int is_amd_cpu(char *vendor);
+static int is_intel_cpu(char *vendor);
+static void get_x86_cpu_info(x86_cpu_info *x86_info);
+#endif
+
+/* variables definitions */
+gen_cpu_info gen_info;
+x86_cpu_info x86_info;
+char intel_l1d_cache[CACHE_SIZE_LEN];
+char intel_l1i_cache[CACHE_SIZE_LEN];
+char intel_l2_cache[CACHE_SIZE_LEN];
+char intel_l3_cache[CACHE_SIZE_LEN];
+char amd_l1d_cache[CACHE_SIZE_LEN];
+char amd_l1i_cache[CACHE_SIZE_LEN];
+char amd_l2_cache[CACHE_SIZE_LEN];
+char amd_l3_cache[CACHE_SIZE_LEN];
+
+/* function definitions */
+#if defined(__amd64__) || defined(__i386__)
+static int
+is_amd_cpu(char *vendor)
+{
+	return (!strcmp(vendor, "AMDisbetter!") ||
+	    !strcmp(vendor, "AuthenticAMD"));
+}
+
+static int
+is_intel_cpu(char *vendor)
+{
+	return !strcmp(vendor, "GenuineIntel");
+}
+
+static int
+x86_cpu_support_standard_flag(int flag, int mask)
+{
+	return (flag & (1 << mask));
+}
+
+static void
+parse_intel_cache_value(x86_cpu_info *x86_info, unsigned char value)
+{
+	switch (value) {
+	case 0x06: {
+		x86_info->l1i_cache = "8K";
+		break;
+	}
+	case 0x08: {
+		x86_info->l1i_cache = "16K";
+		break;
+	}
+	case 0x09:
+	case 0x30: {
+		x86_info->l1i_cache = "32K";
+		break;
+	}
+	case 0x0A:
+	case 0x66: {
+		x86_info->l1d_cache = "8K";
+		break;
+	}
+	case 0x0C:
+	case 0x0D:
+	case 0x60:
+	case 0x67: {
+		x86_info->l1d_cache = "16K";
+		break;
+	}
+	case 0x68:
+	case 0x2C: {
+		x86_info->l1d_cache = "32K";
+		break;
+	}
+	case 0x39:
+	case 0x3B:
+	case 0x41:
+	case 0x79: {
+		x86_info->l2_cache = "128K";
+		break;
+	}
+	case 0x3A: {
+		x86_info->l2_cache = "192K";
+		break;
+	}
+	case 0x3C:
+	case 0x42:
+	case 0x7A:
+	case 0x82: {
+		x86_info->l2_cache = "256K";
+		break;
+	}
+	case 0x3D: {
+		x86_info->l2_cache = "384K";
+		break;
+	}
+	case 0x3E:
+	case 0x43:
+	case 0x7B:
+	case 0x7F:
+	case 0x83:
+	case 0x86: {
+		x86_info->l2_cache = "512K";
+		break;
+	}
+	case 0x44:
+	case 0x7C:
+	case 0x84:
+	case 0x87: {
+		x86_info->l2_cache = "1M";
+		break;
+	}
+	case 0x45:
+	case 0x7D:
+	case 0x85: {
+		x86_info->l2_cache = "2M";
+		break;
+	}
+	case 0x48: {
+		x86_info->l2_cache = "3M";
+		break;
+	}
+	case 0x4E: {
+		x86_info->l2_cache = "6M";
+		break;
+	}
+	case 0x49: {
+		x86_info->l3_cache = x86_info->l2_cache = "4M";
+		break;
+	}
+	case 0xD0: {
+		x86_info->l3_cache = "512K";
+		break;
+	}
+	case 0x23:
+	case 0xD1:
+	case 0xD6: {
+		x86_info->l3_cache = "1M";
+		break;
+	}
+	case 0xDC: {
+		x86_info->l3_cache = "1.5M";
+		break;
+	}
+	case 0xDD: {
+		x86_info->l3_cache = "3M";
+		break;
+	}
+	case 0x25:
+	case 0xD2:
+	case 0xD7:
+	case 0xE2: {
+		x86_info->l3_cache = "2M";
+		break;
+	}
+	case 0x29:
+	case 0x46:
+	case 0xD8:
+	case 0xE3: {
+		x86_info->l3_cache = "4M";
+		break;
+	}
+	case 0x4A:
+	case 0xDE: {
+		x86_info->l3_cache = "6M";
+		break;
+	}
+	case 0x47:
+	case 0x4B:
+	case 0xE4: {
+		x86_info->l3_cache = "8M";
+		break;
+	}
+	case 0x4C:
+	case 0xEA: {
+		x86_info->l3_cache = "12M";
+		break;
+	}
+	case 0x4D: {
+		x86_info->l3_cache = "16M";
+		break;
+	}
+	case 0xEB: {
+		x86_info->l3_cache = "18M";
+		break;
+	}
+	case 0xEC: {
+		x86_info->l3_cache = "24M";
+		break;
+	}
+	case 0xFF: {
+		x86_info->intel_use_leaf_4_get_cache = 1;
+		break;
+	}
+	default: {
+		break;
+	}
+	}
+	return;
+}
+
+static void
+get_x86_cpu_info(x86_cpu_info *x86_info)
+{
+	int i = 0, flag_len = 0;
+	uint32_t eax, ebx, ecx, edx;
+
+	__cpuid(0, eax, ebx, ecx, edx);
+	memcpy(x86_info->vendor, &ebx, sizeof(ebx));
+	memcpy(&(x86_info->vendor[4]), &edx, sizeof(edx));
+	memcpy(&(x86_info->vendor[8]), &ecx, sizeof(ecx));
+	for (i = 0; (i <= eax) && (i <= CPUID_MAX_STANDARD_FUNCTION); i++) {
+		x86_info->standard_mask |= (1 << i);
+	}
+
+	__cpuid(0x80000000, eax, ebx, ecx, edx);
+	eax &= ~0x80000000;
+	for (i = 0; (i <= eax) && (i <= CPUID_MAX_EXTENDED_FUNCTION); i++) {
+		x86_info->extended_mask |= (1 << i);
+	}
+
+	eax = CPUID_STANDARD_1_MASK;
+	if (x86_info->standard_mask & (1 << eax)) {
+		__cpuid(eax, eax, ebx, ecx, edx);
+		x86_info->stepping = eax & 0xF;
+		x86_info->family = (eax >> 8) & 0xF;
+		x86_info->model = (eax >> 4) & 0xF;
+		if ((x86_info->family == 6) || (x86_info->family == 15)) {
+			x86_info->model |= (eax >> 12) & 0xF0;
+			if (x86_info->family == 15) {
+				x86_info->family += (eax >> 20) & 0xFF;
+			}
+		}
+	}
+
+	eax = CPUID_STANDARD_2_MASK;
+	if ((is_intel_cpu(x86_info->vendor)) &&
+	    (x86_info->standard_mask & (1 << eax))) {
+		int i = 0, count = 0;
+		uint32_t cache[4]; /* eax, ebx, ecx, edx */
+
+		__cpuid(eax, cache[0], cache[1], cache[2], cache[3]);
+		count = cache[0] & 0xFF;
+		while (count--) {
+			for (i = 0; i < 4; i++) {
+				if (!(cache[i] & 0x80000000)) {
+					if (i) {
+						parse_intel_cache_value(x86_info,
+						    cache[i] & 0xFF);
+					}
+					parse_intel_cache_value(x86_info,
+					    (cache[i] >> 8) & (0xFF));
+					parse_intel_cache_value(x86_info,
+					    (cache[i] >> 16) & (0xFF));
+					parse_intel_cache_value(x86_info,
+					    (cache[i] >> 24) & (0xFF));
+				}
+			}
+		}
+	}
+
+	if ((is_intel_cpu(x86_info->vendor)) &&
+	    (x86_info->standard_mask & (1 << eax)) &&
+	    (x86_info->intel_use_leaf_4_get_cache)) {
+		int subleaf = 0;
+		for (subleaf = 0;; subleaf++) {
+			unsigned char cache_type = 0, cache_level = 0;
+			int cache_size;
+
+			__cpuid_count(CPUID_STANDARD_4_MASK, subleaf, eax, ebx,
+			    ecx, edx);
+
+			cache_type = eax & 0x1F;
+			if (!cache_type) {
+				break;
+			}
+
+			cache_size = (int)((((ebx >> 22) & 0x3FF) + 1) *
+			    (((ebx >> 10) & 0x3FF) + 1) * ((ebx & 0xFFF) + 1) *
+			    (ecx + 1) / 1024);
+			if (!cache_size) {
+				break;
+			}
+
+			cache_level = (eax >> 5) & 0x7;
+			switch (cache_level) {
+			case 1: {
+				if (cache_type == 1) {
+					snprintf(intel_l1d_cache,
+					    sizeof(intel_l1d_cache), "%dK",
+					    cache_size);
+					x86_info->l1d_cache = intel_l1d_cache;
+				} else if (cache_type == 2) {
+					snprintf(intel_l1i_cache,
+					    sizeof(intel_l1i_cache), "%dK",
+					    cache_size);
+					x86_info->l1i_cache = intel_l1i_cache;
+				}
+				break;
+			}
+			case 2: {
+				if (cache_type == 3) {
+					snprintf(intel_l2_cache,
+					    sizeof(intel_l2_cache), "%dK",
+					    cache_size);
+					x86_info->l2_cache = intel_l2_cache;
+				}
+				break;
+			}
+			case 3: {
+				if (cache_type == 3) {
+					int mega_size = cache_size / 1024;
+					if (mega_size) {
+						snprintf(intel_l3_cache,
+						    sizeof(intel_l3_cache),
+						    "%dM", cache_size / 1024);
+					} else {
+						snprintf(intel_l3_cache,
+						    sizeof(intel_l3_cache),
+						    "%dK", cache_size);
+					}
+					x86_info->l3_cache = intel_l3_cache;
+				}
+				break;
+			}
+			default: {
+				break;
+			}
+			}
+		}
+	}
+
+	eax = CPUID_STANDARD_7_MASK;
+	ecx = 0;
+	if (x86_info->standard_mask & (1 << eax)) {
+		__cpuid(eax, eax, ebx, ecx, edx);
+	}
+
+	if ((is_intel_cpu(x86_info->vendor)) &&
+	    (x86_info->standard_mask & (1 << CPUID_STANDARD_B_MASK))) {
+		int subleaf = 0;
+		for (subleaf = 0;; subleaf++) {
+			int level_type = 0;
+			__cpuid_count(CPUID_STANDARD_B_MASK, subleaf, eax, ebx,
+			    ecx, edx);
+
+			if (!eax && !ebx) {
+				break;
+			}
+
+			level_type = (ecx >> 8) & 0xFF;
+			if (level_type == 1) {
+				x86_info->threads_per_core = ebx;
+			} else if (level_type == 2) {
+				x86_info->cores_per_socket = ebx;
+			}
+		}
+
+		if (x86_info->threads_per_core) {
+			x86_info->cores_per_socket = x86_info->cores_per_socket /
+			    x86_info->threads_per_core;
+		}
+	}
+
+	if (x86_info->extended_mask & (1 << CPUID_EXTENDED_1_MASK)) {
+		__cpuid(0x80000000 | CPUID_EXTENDED_1_MASK, eax, ebx, ecx, edx);
+	}
+
+	if ((is_amd_cpu(x86_info->vendor)) &&
+	    (x86_info->extended_mask & (1 << CPUID_EXTENDED_5_MASK))) {
+		int kilo_size = 0;
+		__cpuid(0x80000000 | CPUID_EXTENDED_5_MASK, eax, ebx, ecx, edx);
+
+		kilo_size = (ecx >> 24) & 0xFF;
+		if (kilo_size) {
+			snprintf(amd_l1d_cache, sizeof(amd_l1d_cache), "%dK",
+			    kilo_size);
+			x86_info->l1d_cache = amd_l1d_cache;
+		}
+
+		kilo_size = (edx >> 24) & 0xFF;
+		if (kilo_size) {
+			snprintf(amd_l1i_cache, sizeof(amd_l1i_cache), "%dK",
+			    kilo_size);
+			x86_info->l1i_cache = amd_l1i_cache;
+		}
+	}
+
+	if ((is_amd_cpu(x86_info->vendor)) &&
+	    (x86_info->extended_mask & (1 << CPUID_EXTENDED_6_MASK))) {
+		int kilo_size = 0, mega_size = 0;
+
+		__cpuid(0x80000000 | CPUID_EXTENDED_6_MASK, eax, ebx, ecx, edx);
+
+		kilo_size = (ecx >> 16) & 0xFFFF;
+		if (kilo_size) {
+			snprintf(amd_l2_cache, sizeof(amd_l2_cache), "%dK",
+			    kilo_size);
+			x86_info->l2_cache = amd_l2_cache;
+		}
+
+		kilo_size = ((edx >> 18) & 0x3FFF) * 512;
+		if (kilo_size) {
+			mega_size = kilo_size / 1024;
+
+			if (mega_size) {
+				snprintf(amd_l3_cache, sizeof(amd_l3_cache),
+				    "%dM", mega_size);
+			} else {
+				snprintf(amd_l3_cache, sizeof(amd_l3_cache),
+				    "%dK", kilo_size);
+			}
+			x86_info->l3_cache = amd_l3_cache;
+		}
+	}
+
+	if (is_amd_cpu(x86_info->vendor)) {
+		if (x86_info->extended_mask & (1 << CPUID_EXTENDED_8_MASK)) {
+			__cpuid(0x80000000 | CPUID_EXTENDED_8_MASK, eax, ebx,
+			    ecx, edx);
+			x86_info->cores_per_socket = (ecx & 0xFF) + 1;
+		} else {
+	    /* fall back to standard CPUID leaf 1 on old processors */
+			__cpuid(0x00000001, eax, ebx, ecx, edx);
+			x86_info->cores_per_socket = (ebx >> 16) & 0xFF;
+		}
+
+		if (x86_info->extended_mask & (1 << CPUID_EXTENDED_1E_MASK)) {
+			__cpuid(0x80000000 | CPUID_EXTENDED_1E_MASK, eax, ebx,
+			    ecx, edx);
+			x86_info->threads_per_core = ((ebx >> 8) & 0xFF) + 1;
+
+			if (x86_info->threads_per_core) {
+				x86_info->cores_per_socket = x86_info->cores_per_socket /
+				    x86_info->threads_per_core;
+			}
+		}
+	}
+
+	return;
+}
+#endif
+
+static inline uint32_t
+max(uint32_t a, uint32_t b)
+{
+	return a > b ? a : b;
+}
+
+static inline uint32_t
+bit_mask(uint32_t bits)
+{
+	return (UINT32_C(1) << bits) - UINT32_C(1);
+}
+
+void
+cpuinfo_x86_openbsd_init(void)
+{
+	int mib[2], ch = 0, i = 0;
+	struct cpuinfo_processor *processors = NULL;
+	struct cpuinfo_core *cores = NULL;
+	struct cpuinfo_cluster *clusters = NULL;
+	struct cpuinfo_package *packages = NULL;
+	struct cpuinfo_cache *l1i = NULL;
+	struct cpuinfo_cache *l1d = NULL;
+	struct cpuinfo_cache *l2 = NULL;
+	struct cpuinfo_cache *l3 = NULL;
+	struct cpuinfo_cache *l4 = NULL;
+	int npackages, ncores, nthreads, nthreads_per_core;
+
+	sysctl_get_cpu_info sysctl_array[] = {
+		{HW_MACHINE, gen_info.arch, sizeof(gen_info.arch), "HW_MACHINE"},
+		{HW_BYTEORDER, &(gen_info.byte_order), sizeof(gen_info.byte_order), "HW_BYTEORDER"},
+		{HW_MODEL, gen_info.model, sizeof(gen_info.model), "HW_MODEL"},
+		{HW_NCPU, &(gen_info.active_cpu_num), sizeof(gen_info.active_cpu_num), "HW_NCPU"},
+		{HW_VENDOR, gen_info.vendor, sizeof(gen_info.vendor), "HW_VENDOR"},
+		{HW_NCPUFOUND, &(gen_info.total_cpu_num), sizeof(gen_info.total_cpu_num), "HW_NCPUFOUND"},
+		{HW_CPUSPEED, &(gen_info.speed), sizeof(gen_info.speed), "HW_CPUSPEED"},
+	};
+
+	for (i = 0; i < ARRAY_LEN(sysctl_array); i++) {
+		mib[0] = CTL_HW;
+		mib[1] = sysctl_array[i].mib_code;
+		if (sysctl(mib, ARRAY_LEN(mib), sysctl_array[i].old,
+		    &sysctl_array[i].old_len, NULL, 0) == -1) {
+			if (errno == EOPNOTSUPP) {
+				continue;
+			}
+			err(1, "%s", sysctl_array[i].err_msg);
+		}
+	}
+
+#if defined(__amd64__) || defined(__i386__)
+	get_x86_cpu_info(&x86_info);
+#endif
+
+	nthreads = gen_info.active_cpu_num;
+	nthreads_per_core = x86_info.threads_per_core;
+	ncores = nthreads / nthreads_per_core;
+	npackages = ncores; /* not sure how to find the true value yet */
+
+	processors = calloc(nthreads, sizeof(struct cpuinfo_processor));
+	if (processors == NULL) {
+		cpuinfo_log_error(
+		    "failed to allocate %zu bytes for descriptions of %" PRIu32 " logical processors",
+		    nthreads * sizeof(struct cpuinfo_processor),
+		    nthreads);
+		goto cleanup;
+	}
+
+	cores = calloc(ncores, sizeof(struct cpuinfo_core));
+	if (cores == NULL) {
+		cpuinfo_log_error(
+		    "failed to allocate %zu bytes for descriptions of %" PRIu32 " cores",
+		    ncores * sizeof(struct cpuinfo_core),
+		    ncores);
+		goto cleanup;
+	}
+
+	/* On x86 a cluster of cores is the biggest group of cores that shares a
+	 * cache. */
+	clusters = calloc(npackages, sizeof(struct cpuinfo_cluster));
+	if (clusters == NULL) {
+		cpuinfo_log_error(
+		    "failed to allocate %zu bytes for descriptions of %" PRIu32 " core clusters",
+		    npackages * sizeof(struct cpuinfo_cluster),
+		    npackages);
+		goto cleanup;
+	}
+	packages = calloc(npackages, sizeof(struct cpuinfo_package));
+	if (packages == NULL) {
+		cpuinfo_log_error(
+		    "failed to allocate %zu bytes for descriptions of %" PRIu32 " physical packages",
+		    npackages * sizeof(struct cpuinfo_package),
+		    npackages);
+		goto cleanup;
+	}
+
+	struct cpuinfo_x86_processor x86_processor;
+	memset(&x86_processor, 0, sizeof(x86_processor));
+	cpuinfo_x86_init_processor(&x86_processor);
+	char brand_string[48];
+	cpuinfo_x86_normalize_brand_string(x86_processor.brand_string,
+	    brand_string);
+
+	const uint32_t threads_per_core = nthreads_per_core;
+	const uint32_t threads_per_package = nthreads / npackages;
+	const uint32_t cores_per_package = ncores / npackages;
+	for (uint32_t i = 0; i < npackages; i++) {
+		clusters[i] = (struct cpuinfo_cluster){
+		    .processor_start = i * threads_per_package,
+		    .processor_count = threads_per_package,
+		    .core_start = i * cores_per_package,
+		    .core_count = cores_per_package,
+		    .cluster_id = 0,
+		    .package = packages + i,
+		    .vendor = x86_processor.vendor,
+		    .uarch = x86_processor.uarch,
+		    .cpuid = x86_processor.cpuid,
+		};
+		packages[i].processor_start = i * threads_per_package;
+		packages[i].processor_count = threads_per_package;
+		packages[i].core_start = i * cores_per_package;
+		packages[i].core_count = cores_per_package;
+		packages[i].cluster_start = i;
+		packages[i].cluster_count = 1;
+		cpuinfo_x86_format_package_name(x86_processor.vendor,
+		    brand_string, packages[i].name);
+	}
+	for (uint32_t i = 0; i < ncores; i++) {
+		cores[i] = (struct cpuinfo_core){
+		    .processor_start = i * threads_per_core,
+		    .processor_count = threads_per_core,
+		    .core_id = i % cores_per_package,
+		    .cluster = clusters + i / cores_per_package,
+		    .package = packages + i / cores_per_package,
+		    .vendor = x86_processor.vendor,
+		    .uarch = x86_processor.uarch,
+		    .cpuid = x86_processor.cpuid,
+		};
+	}
+	for (uint32_t i = 0; i < nthreads; i++) {
+		const uint32_t smt_id = i % threads_per_core;
+		const uint32_t core_id = i / threads_per_core;
+		const uint32_t package_id = i / threads_per_package;
+
+		/* Reconstruct APIC IDs from topology components */
+		const uint32_t thread_bits_mask = bit_mask(
+		    x86_processor.topology.thread_bits_length);
+		const uint32_t core_bits_mask = bit_mask(
+		    x86_processor.topology.core_bits_length);
+		const uint32_t package_bits_offset =
+		    max(x86_processor.topology.thread_bits_offset +
+		    x86_processor.topology.thread_bits_length,
+		    x86_processor.topology.core_bits_offset +
+		    x86_processor.topology.core_bits_length);
+		const uint32_t apic_id = ((smt_id & thread_bits_mask) <<
+		    x86_processor.topology.thread_bits_offset) |
+		    ((core_id & core_bits_mask) <<
+		     x86_processor.topology.core_bits_offset) |
+		    (package_id << package_bits_offset);
+		cpuinfo_log_debug("reconstructed APIC ID 0x%08"
+		    PRIx32 " for thread %" PRIu32,
+		    apic_id, i);
+
+		processors[i].smt_id = smt_id;
+		processors[i].core = cores + i / threads_per_core;
+		processors[i].cluster = clusters + i / threads_per_package;
+		processors[i].package = packages + i / threads_per_package;
+		processors[i].apic_id = apic_id;
+	}
+
+	uint32_t threads_per_l1 = 0, l1_count = 0;
+	if (x86_processor.cache.l1i.size != 0 ||
+	    x86_processor.cache.l1d.size != 0) {
+		/* Assume that threads on the same core share L1 */
+		threads_per_l1 = nthreads / ncores;
+		if (threads_per_l1 == 0) {
+			cpuinfo_log_error("failed to detect threads_per_l1");
+			goto cleanup;
+		}
+		cpuinfo_log_warning(
+		    "openbsd kernel did not report number of "
+		    "threads sharing L1 cache; assume %" PRIu32,
+		    threads_per_l1);
+		l1_count = nthreads / threads_per_l1;
+		cpuinfo_log_debug("detected %" PRIu32 " L1 caches", l1_count);
+	}
+
+	uint32_t threads_per_l2 = 0, l2_count = 0;
+	if (x86_processor.cache.l2.size != 0) {
+		if (x86_processor.cache.l3.size != 0) {
+			/* This is not a last-level cache; assume that threads
+			 * on the same core share L2 */
+			threads_per_l2 = nthreads / ncores;
+		} else {
+			/* This is a last-level cache; assume that threads on
+			 * the same package share L2 */
+			threads_per_l2 = nthreads / npackages;
+		}
+		if (threads_per_l2 == 0) {
+			cpuinfo_log_error("failed to detect threads_per_l1");
+			goto cleanup;
+		}
+		cpuinfo_log_warning(
+		    "openbsd kernel did not report number of "
+		    "threads sharing L2 cache; assume %" PRIu32,
+		    threads_per_l2);
+		l2_count = nthreads / threads_per_l2;
+		cpuinfo_log_debug("detected %" PRIu32 " L2 caches", l2_count);
+	}
+
+	uint32_t threads_per_l3 = 0, l3_count = 0;
+	if (x86_processor.cache.l3.size != 0) {
+		/*
+		 * Assume that threads on the same package share L3.
+		 * However, is it not necessarily the last-level cache (there
+		 * may be L4 cache as well)
+		 */
+		threads_per_l3 = nthreads / npackages;
+		if (threads_per_l3 == 0) {
+			cpuinfo_log_error("failed to detect threads_per_l3");
+			goto cleanup;
+		}
+		cpuinfo_log_warning(
+		    "openbsd kernel did not report number of "
+		    "threads sharing L3 cache; assume %" PRIu32,
+		    threads_per_l3);
+		l3_count = nthreads / threads_per_l3;
+		cpuinfo_log_debug("detected %" PRIu32 " L3 caches", l3_count);
+	}
+
+	uint32_t threads_per_l4 = 0, l4_count = 0;
+	if (x86_processor.cache.l4.size != 0) {
+		/*
+		 * Assume that all threads share this L4.
+		 * As of now, L4 cache exists only on notebook x86 CPUs, which
+		 * are single-package, but multi-socket systems could have
+		 * shared L4 (like on IBM POWER8).
+		 */
+		threads_per_l4 = nthreads;
+		if (threads_per_l4 == 0) {
+			cpuinfo_log_error("failed to detect threads_per_l4");
+			goto cleanup;
+		}
+		cpuinfo_log_warning(
+		    "openbsd kernel did not report number of "
+		    "threads sharing L4 cache; assume %" PRIu32,
+		    threads_per_l4);
+		l4_count = nthreads / threads_per_l4;
+		cpuinfo_log_debug("detected %" PRIu32 " L4 caches", l4_count);
+	}
+
+	if (x86_processor.cache.l1i.size != 0) {
+		l1i = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1i == NULL) {
+			cpuinfo_log_error(
+			    "failed to allocate %zu bytes for descriptions of "
+			    "%" PRIu32 " L1I caches",
+			    l1_count * sizeof(struct cpuinfo_cache),
+			    l1_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1i[c] = (struct cpuinfo_cache){
+			    .size = x86_processor.cache.l1i.size,
+			    .associativity = x86_processor.cache.l1i.associativity,
+			    .sets = x86_processor.cache.l1i.sets,
+			    .partitions = x86_processor.cache.l1i.partitions,
+			    .line_size = x86_processor.cache.l1i.line_size,
+			    .flags = x86_processor.cache.l1i.flags,
+			    .processor_start = c * threads_per_l1,
+			    .processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < nthreads; t++) {
+			processors[t].cache.l1i = &l1i[t / threads_per_l1];
+		}
+	}
+
+	if (x86_processor.cache.l1d.size != 0) {
+		l1d = calloc(l1_count, sizeof(struct cpuinfo_cache));
+		if (l1d == NULL) {
+			cpuinfo_log_error(
+			    "failed to allocate %zu bytes for descriptions of "
+			    "%" PRIu32 " L1D caches",
+			    l1_count * sizeof(struct cpuinfo_cache),
+			    l1_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l1_count; c++) {
+			l1d[c] = (struct cpuinfo_cache){
+			    .size = x86_processor.cache.l1d.size,
+			    .associativity = x86_processor.cache.l1d.associativity,
+			    .sets = x86_processor.cache.l1d.sets,
+			    .partitions = x86_processor.cache.l1d.partitions,
+			    .line_size = x86_processor.cache.l1d.line_size,
+			    .flags = x86_processor.cache.l1d.flags,
+			    .processor_start = c * threads_per_l1,
+			    .processor_count = threads_per_l1,
+			};
+		}
+		for (uint32_t t = 0; t < nthreads; t++) {
+			processors[t].cache.l1d = &l1d[t / threads_per_l1];
+		}
+	}
+
+	if (l2_count != 0) {
+		l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
+		if (l2 == NULL) {
+			cpuinfo_log_error(
+			    "failed to allocate %zu bytes for descriptions of "
+			    "%" PRIu32 " L2 caches",
+			    l2_count * sizeof(struct cpuinfo_cache),
+			    l2_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l2_count; c++) {
+			l2[c] = (struct cpuinfo_cache){
+			    .size = x86_processor.cache.l2.size,
+			    .associativity = x86_processor.cache.l2.associativity,
+			    .sets = x86_processor.cache.l2.sets,
+			    .partitions = x86_processor.cache.l2.partitions,
+			    .line_size = x86_processor.cache.l2.line_size,
+			    .flags = x86_processor.cache.l2.flags,
+			    .processor_start = c * threads_per_l2,
+			    .processor_count = threads_per_l2,
+			};
+		}
+		for (uint32_t t = 0; t < nthreads; t++) {
+			processors[t].cache.l2 = &l2[t / threads_per_l2];
+		}
+	}
+
+	if (l3_count != 0) {
+		l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
+		if (l3 == NULL) {
+			cpuinfo_log_error(
+			    "failed to allocate %zu bytes for descriptions of "
+			    "%" PRIu32 " L3 caches",
+			    l3_count * sizeof(struct cpuinfo_cache),
+			    l3_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l3_count; c++) {
+			l3[c] = (struct cpuinfo_cache){
+			    .size = x86_processor.cache.l3.size,
+			    .associativity = x86_processor.cache.l3.associativity,
+			    .sets = x86_processor.cache.l3.sets,
+			    .partitions = x86_processor.cache.l3.partitions,
+			    .line_size = x86_processor.cache.l3.line_size,
+			    .flags = x86_processor.cache.l3.flags,
+			    .processor_start = c * threads_per_l3,
+			    .processor_count = threads_per_l3,
+			};
+		}
+		for (uint32_t t = 0; t < nthreads; t++) {
+			processors[t].cache.l3 = &l3[t / threads_per_l3];
+		}
+	}
+
+	if (l4_count != 0) {
+		l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
+		if (l4 == NULL) {
+			cpuinfo_log_error(
+			    "failed to allocate %zu bytes for descriptions of "
+			    "%" PRIu32 " L4 caches",
+			    l4_count * sizeof(struct cpuinfo_cache),
+			    l4_count);
+			goto cleanup;
+		}
+		for (uint32_t c = 0; c < l4_count; c++) {
+			l4[c] = (struct cpuinfo_cache){
+			    .size = x86_processor.cache.l4.size,
+			    .associativity = x86_processor.cache.l4.associativity,
+			    .sets = x86_processor.cache.l4.sets,
+			    .partitions = x86_processor.cache.l4.partitions,
+			    .line_size = x86_processor.cache.l4.line_size,
+			    .flags = x86_processor.cache.l4.flags,
+			    .processor_start = c * threads_per_l4,
+			    .processor_count = threads_per_l4,
+			};
+		}
+		for (uint32_t t = 0; t < nthreads; t++) {
+			processors[t].cache.l4 = &l4[t / threads_per_l4];
+		}
+	}
+
+	/* Commit changes */
+	cpuinfo_processors = processors;
+	cpuinfo_cores = cores;
+	cpuinfo_clusters = clusters;
+	cpuinfo_packages = packages;
+	cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
+	cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
+	cpuinfo_cache[cpuinfo_cache_level_2] = l2;
+	cpuinfo_cache[cpuinfo_cache_level_3] = l3;
+	cpuinfo_cache[cpuinfo_cache_level_4] = l4;
+
+	cpuinfo_processors_count = nthreads;
+	cpuinfo_cores_count = ncores;
+	cpuinfo_clusters_count = npackages;
+	cpuinfo_packages_count = npackages;
+	cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
+	cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
+	cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+	cpuinfo_global_uarch = (struct cpuinfo_uarch_info){
+	    .uarch = x86_processor.uarch,
+	    .cpuid = x86_processor.cpuid,
+	    .processor_count = nthreads,
+	    .core_count = ncores,
+	};
+
+	__sync_synchronize();
+
+	cpuinfo_is_initialized = true;
+
+	processors = NULL;
+	cores = NULL;
+	clusters = NULL;
+	packages = NULL;
+	l1i = l1d = l2 = l3 = l4 = NULL;
+
+cleanup:
+	free(processors);
+	free(cores);
+	free(clusters);
+	free(packages);
+	free(l1i);
+	free(l1d);
+	free(l2);
+	free(l3);
+	free(l4);
+}
