Module Name:    src
Committed By:   cegger
Date:           Wed Nov 25 13:17:06 UTC 2009

Modified Files:
        src/sys/dev/acpi: files.acpi
Added Files:
        src/sys/dev/acpi: acpi_srat.c acpi_srat.h

Log Message:
Add ACPI SRAT parser. This is a part of NUMA support.
Tested on 1-node, 2-node and 8-node machines.
Patch presented on tech-kern@, port-i386@ and port-am...@.

No comments.


To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/dev/acpi/acpi_srat.c \
    src/sys/dev/acpi/acpi_srat.h
cvs rdiff -u -r1.60 -r1.61 src/sys/dev/acpi/files.acpi

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/acpi/files.acpi
diff -u src/sys/dev/acpi/files.acpi:1.60 src/sys/dev/acpi/files.acpi:1.61
--- src/sys/dev/acpi/files.acpi:1.60	Fri Oct  2 16:47:52 2009
+++ src/sys/dev/acpi/files.acpi	Wed Nov 25 13:17:06 2009
@@ -1,4 +1,4 @@
-#	$NetBSD: files.acpi,v 1.60 2009/10/02 16:47:52 jmcneill Exp $
+#	$NetBSD: files.acpi,v 1.61 2009/11/25 13:17:06 cegger Exp $
 
 include "dev/acpi/acpica/files.acpica"
 
@@ -20,6 +20,7 @@
 file	dev/acpi/acpi_quirks.c		acpi
 file	dev/acpi/acpi_timer.c		acpi
 file	dev/acpi/acpi_wakedev.c		acpi
+file	dev/acpi/acpi_srat.c		acpi
 
 # ACPI/apm emulation.
 attach  apm at acpiapmbus with acpiapm: sysmon_envsys

Added files:

Index: src/sys/dev/acpi/acpi_srat.c
diff -u /dev/null src/sys/dev/acpi/acpi_srat.c:1.1
--- /dev/null	Wed Nov 25 13:17:06 2009
+++ src/sys/dev/acpi/acpi_srat.c	Wed Nov 25 13:17:06 2009
@@ -0,0 +1,513 @@
+/* $NetBSD $ */
+
+/*
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christoph Egger.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <sys/kmem.h>
+
+#include <dev/acpi/acpica.h>
+#include <dev/acpi/acpivar.h>
+#include <dev/acpi/acpi_srat.h>
+
+static ACPI_TABLE_SRAT *srat;
+
+struct acpisrat_node {
+	acpisrat_nodeid_t nodeid;
+	uint32_t ncpus; /* Number of cpus in this node */
+	struct acpisrat_cpu **cpu; /* Array of cpus */
+	uint32_t nmems; /* Number of memory ranges in this node */
+	struct acpisrat_mem **mem; /* Array of memory ranges */
+};
+
+static uint32_t nnodes; /* Number of NUMA nodes */
+static struct acpisrat_node *node_array; /* Array of NUMA nodes */
+static uint32_t ncpus; /* Number of CPUs */
+static struct acpisrat_cpu *cpu_array; /* Array of cpus */
+static uint32_t nmems; /* Number of Memory ranges */
+static struct acpisrat_mem *mem_array;
+
+
+struct cpulist {
+	struct acpisrat_cpu cpu;
+	TAILQ_ENTRY(cpulist) entry;
+};
+
+static TAILQ_HEAD(, cpulist) cpulisthead;
+
+#define CPU_INIT		TAILQ_INIT(&cpulisthead);
+#define CPU_FOREACH(cpu)	TAILQ_FOREACH(cpu, &cpulisthead, entry)
+#define CPU_ADD(cpu)		TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
+#define CPU_REM(cpu)		TAILQ_REMOVE(&cpulisthead, cpu, entry)
+#define CPU_FIRST		TAILQ_FIRST(&cpulisthead)
+
+
+struct memlist {
+	struct acpisrat_mem mem;
+	TAILQ_ENTRY(memlist) entry;
+};
+
+static TAILQ_HEAD(, memlist) memlisthead;
+
+#define MEM_INIT		TAILQ_INIT(&memlisthead)
+#define MEM_FOREACH(mem)	TAILQ_FOREACH(mem, &memlisthead, entry)
+#define MEM_ADD(mem)		TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
+#define MEM_ADD_BEFORE(mem, b)	TAILQ_INSERT_BEFORE(b, mem, entry)
+#define MEM_REM(mem)		TAILQ_REMOVE(&memlisthead, mem, entry)
+#define MEM_FIRST		TAILQ_FIRST(&memlisthead)
+
+
+static struct cpulist *
+cpu_alloc(void)
+{
+	return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP);
+}
+
+static void
+cpu_free(struct cpulist *c)
+{
+	kmem_free(c, sizeof(struct cpulist));
+}
+
+#if 0
+static struct cpulist *
+cpu_get(acpisrat_nodeid_t nodeid)
+{
+	struct cpulist *tmp;
+
+	CPU_FOREACH(tmp) {
+		if (tmp->cpu.nodeid == nodeid)
+			return tmp;
+	}
+
+	return NULL;
+}
+#endif
+
+static struct memlist *
+mem_alloc(void)
+{
+	return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP);
+}
+
+static void
+mem_free(struct memlist *m)
+{
+	kmem_free(m, sizeof(struct memlist));
+}
+
+static struct memlist *
+mem_get(acpisrat_nodeid_t nodeid)
+{
+	struct memlist *tmp;
+
+	MEM_FOREACH(tmp) {
+		if (tmp->mem.nodeid == nodeid)
+			return tmp;
+	}
+
+	return NULL;
+}
+
+
+bool
+acpisrat_exist(void)
+{
+	ACPI_TABLE_HEADER *table;
+	ACPI_STATUS rv;
+
+	rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
+	if (ACPI_FAILURE(rv))
+		return false;
+
+	/* Check if header is valid */
+	if (table == NULL)
+		return false;
+
+	if (table->Length == 0xffffffff)
+		return false;
+
+	srat = (ACPI_TABLE_SRAT *)table;
+
+	return true;
+}
+
+static int
+acpisrat_parse(void)
+{
+	ACPI_SUBTABLE_HEADER *subtable;
+	ACPI_SRAT_CPU_AFFINITY *srat_cpu;
+	ACPI_SRAT_MEM_AFFINITY *srat_mem;
+	ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
+
+	acpisrat_nodeid_t nodeid;
+	struct cpulist *cpuentry = NULL;
+	struct memlist *mementry;
+	uint32_t srat_pos;
+	bool ignore_cpu_affinity = false;
+
+	KASSERT(srat != NULL);
+
+	/* Content starts right after the header */
+	srat_pos = sizeof(ACPI_TABLE_SRAT);
+
+	while (srat_pos < srat->Header.Length) {
+		subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
+		srat_pos += subtable->Length;
+
+		switch (subtable->Type) {
+		case ACPI_SRAT_TYPE_CPU_AFFINITY:
+			if (ignore_cpu_affinity)
+				continue;
+
+			srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
+			nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
+			    (srat_cpu->ProximityDomainHi[1] << 16) |
+			    (srat_cpu->ProximityDomainHi[0] << 8) |
+			    (srat_cpu->ProximityDomainLo);
+
+			cpuentry = cpu_alloc();
+			if (cpuentry == NULL)
+				return ENOMEM;
+			CPU_ADD(cpuentry);
+
+			cpuentry->cpu.nodeid = nodeid;
+			cpuentry->cpu.apicid = srat_cpu->ApicId;
+			cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
+			cpuentry->cpu.flags = srat_cpu->Flags;
+			cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
+			break;
+
+		case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
+			srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
+			nodeid = srat_mem->ProximityDomain;
+
+			mementry = mem_alloc();
+			if (mementry == NULL)
+				return ENOMEM;
+			MEM_ADD(mementry);
+
+			mementry->mem.nodeid = nodeid;
+			mementry->mem.baseaddress = srat_mem->BaseAddress;
+			mementry->mem.length = srat_mem->Length;
+			mementry->mem.flags = srat_mem->Flags;
+			break;
+
+		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
+			srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
+			nodeid = srat_x2apic->ProximityDomain;
+
+			/* This table entry overrides
+			 * ACPI_SRAT_TYPE_CPU_AFFINITY.
+			 */
+			if (!ignore_cpu_affinity) {
+				struct cpulist *citer;
+				while ((citer = CPU_FIRST) != NULL) {
+					CPU_REM(citer);
+					cpu_free(citer);
+				}
+				ignore_cpu_affinity = true;
+			}
+
+			cpuentry = cpu_alloc();
+			if (cpuentry == NULL)
+				return ENOMEM;
+			CPU_ADD(cpuentry);
+
+			cpuentry->cpu.nodeid = nodeid;
+			cpuentry->cpu.apicid = srat_x2apic->ApicId;
+			cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
+			cpuentry->cpu.flags = srat_x2apic->Flags;
+			break;
+
+		case ACPI_SRAT_TYPE_RESERVED:
+			printf("ACPI SRAT subtable reserved, length: 0x%x\n",
+				subtable->Length);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+acpisrat_quirks(void)
+{
+	struct cpulist *citer;
+	struct memlist *mem, *miter;
+
+	/* Some sanity checks. */
+
+	/* Deal with holes in the memory nodes.
+	 * BIOS doesn't enlist memory nodes which
+	 * don't have any memory modules plugged in.
+	 * This behaviour has been observed on AMD machines.
+	 *
+	 * Do that by searching for CPUs in NUMA nodes
+	 * which don't exist in the memory and then insert
+	 * a zero memory range for the missing node.
+	 */
+	CPU_FOREACH(citer) {
+		mem = mem_get(citer->cpu.nodeid);
+		if (mem != NULL)
+			continue;
+		mem = mem_alloc();
+		if (mem == NULL)
+			return ENOMEM;
+		mem->mem.nodeid = citer->cpu.nodeid;
+		/* all other fields are already zero filled */
+
+		MEM_FOREACH(miter) {
+			if (miter->mem.nodeid < citer->cpu.nodeid)
+				continue;
+			MEM_ADD_BEFORE(mem, miter);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int
+acpisrat_init(void)
+{
+	if (!acpisrat_exist())
+		return EEXIST;
+	return acpisrat_refresh();
+}
+
+int
+acpisrat_refresh(void)
+{
+	int rc, i, j, k;
+	struct cpulist *citer;
+	struct memlist *miter;
+	uint32_t cnodes = 0, mnodes = 0;
+
+	CPU_INIT;
+	MEM_INIT;
+
+	rc = acpisrat_parse();
+	if (rc)
+		return rc;
+
+	rc = acpisrat_quirks();
+	if (rc)
+		return rc;
+
+	/* cleanup resources */
+	rc = acpisrat_exit();
+	if (rc)
+		return rc;
+
+	nnodes = 0;
+	ncpus = 0;
+	CPU_FOREACH(citer) {
+		cnodes = MAX(citer->cpu.nodeid, cnodes);
+		ncpus++;
+	}
+
+	nmems = 0;
+	MEM_FOREACH(miter) {
+		mnodes = MAX(miter->mem.nodeid, mnodes);
+		nmems++;
+	}
+
+	nnodes = MAX(cnodes, mnodes) + 1;
+
+	node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
+	    KM_NOSLEEP);
+	if (node_array == NULL)
+		return ENOMEM;
+
+	cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
+	    KM_NOSLEEP);
+	if (cpu_array == NULL)
+		return ENOMEM;
+
+	mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
+	    KM_NOSLEEP);
+	if (mem_array == NULL)
+		return ENOMEM;
+
+	i = 0;
+	CPU_FOREACH(citer) {
+		memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
+		i++;
+		node_array[citer->cpu.nodeid].ncpus++;
+	}
+
+	i = 0;
+	MEM_FOREACH(miter) {
+		memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
+		i++;
+		node_array[miter->mem.nodeid].nmems++;
+	}
+
+	for (i = 0; i < nnodes; i++) {
+		node_array[i].nodeid = i;
+
+		node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
+		    sizeof(struct acpisrat_cpu *), KM_NOSLEEP);
+		node_array[i].mem = kmem_zalloc(node_array[i].nmems *
+		    sizeof(struct acpisrat_mem *), KM_NOSLEEP);
+
+		k = 0;
+		for (j = 0; j < ncpus; j++) {
+			if (cpu_array[j].nodeid != i)
+				continue;
+			node_array[i].cpu[k] = &cpu_array[j];
+			k++;
+		}
+
+		k = 0;
+		for (j = 0; j < nmems; j++) {
+			if (mem_array[j].nodeid != i)
+				continue;
+			node_array[i].mem[k] = &mem_array[j];
+			k++;
+		}
+	}
+
+	while ((citer = CPU_FIRST) != NULL) {
+		CPU_REM(citer);
+		cpu_free(citer);
+	}
+
+	while ((miter = MEM_FIRST) != NULL) {
+		MEM_REM(miter);
+		mem_free(miter);
+	}
+
+	return 0;
+}
+
+
+int
+acpisrat_exit(void)
+{
+	int i;
+
+	if (node_array) {
+		for (i = 0; i < nnodes; i++) {
+			if (node_array[i].cpu)
+				kmem_free(node_array[i].cpu,
+				    node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
+			if (node_array[i].mem)
+				kmem_free(node_array[i].mem,
+				    node_array[i].nmems * sizeof(struct acpisrat_mem *));
+		}
+		kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
+	}
+	node_array = NULL;
+
+	if (cpu_array)
+		kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
+	cpu_array = NULL;
+
+	if (mem_array)
+		kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
+	mem_array = NULL;
+
+	nnodes = 0;
+	ncpus = 0;
+	nmems = 0;
+
+	return 0;
+}
+
+
+void
+acpisrat_dump(void)
+{
+	uint32_t i, j, nn, nc, nm;
+	struct acpisrat_cpu c;
+	struct acpisrat_mem m;
+
+	nn = acpisrat_nodes();
+	aprint_debug("SRAT: %u NUMA nodes\n", nn);
+	for (i = 0; i < nn; i++) {
+		nc = acpisrat_node_cpus(i);
+		for (j = 0; j < nc; j++) {
+			acpisrat_cpu(i, j, &c);
+			aprint_debug("SRAT: node %u cpu %u "
+			    "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
+			    c.nodeid, j, c.apicid, c.sapiceid, c.flags,
+			    c.clockdomain);
+		}
+
+		nm = acpisrat_node_memoryranges(i);
+		for (j = 0; j < nm; j++) {
+			acpisrat_mem(i, j, &m);
+			aprint_debug("SRAT: node %u memory range %u (0x%"
+			    PRIx64" - 0x%"PRIx64" flags %u)\n",
+			    m.nodeid, j, m.baseaddress,
+			    m.baseaddress + m.length, m.flags);
+		}
+	}
+}
+
+uint32_t
+acpisrat_nodes(void)
+{
+	return nnodes;
+}
+
+uint32_t
+acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
+{
+	return node_array[nodeid].ncpus;
+}
+
+uint32_t
+acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
+{
+	return node_array[nodeid].nmems;
+}
+
+void
+acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
+    struct acpisrat_cpu *c)
+{
+	memcpy(c, node_array[nodeid].cpu[cpunum],
+	    sizeof(struct acpisrat_cpu));
+}
+
+void
+acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
+    struct acpisrat_mem *mem)
+{
+	memcpy(mem, node_array[nodeid].mem[memrange],
+	    sizeof(struct acpisrat_mem));
+}
Index: src/sys/dev/acpi/acpi_srat.h
diff -u /dev/null src/sys/dev/acpi/acpi_srat.h:1.1
--- /dev/null	Wed Nov 25 13:17:06 2009
+++ src/sys/dev/acpi/acpi_srat.h	Wed Nov 25 13:17:06 2009
@@ -0,0 +1,98 @@
+/* $NetBSD $ */
+
+/*
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christoph Egger.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ACPI_SRAT_H
+#define ACPI_SRAT_H
+
+typedef uint32_t acpisrat_nodeid_t;
+
+struct acpisrat_cpu {
+	acpisrat_nodeid_t nodeid;
+	uint32_t apicid;
+	uint32_t sapiceid;
+	uint32_t flags;
+
+	/* clockdomain has a meaningful value when the ACPI MADT table has
+	 * ACPI_MADT_TYPE_LOCAL_X2APIC and/or ACPI_MADT_TYPE_LOCAL_X2APIC_NMI
+	 * entries or ACPI CPU device have a _CDM.
+	 */
+	uint32_t clockdomain;
+};
+
+struct acpisrat_mem {
+	acpisrat_nodeid_t nodeid;
+	uint64_t baseaddress;
+	uint64_t length;
+	uint32_t flags;
+};
+
+/* Returns true if ACPI SRAT table is available.
+ *
+ * If table does not exist, all functions below
+ * have undefined behaviour.
+ */
+bool acpisrat_exist(void);
+
+/* Initializes parser. Must be the first function
+ * being called when table is available.
+ */
+int acpisrat_init(void);
+
+/* Re-parse ACPI SRAT table. Useful after
+ * hotplugging cpu or RAM.
+ */
+int acpisrat_refresh(void);
+
+/* Free allocated memory. Should be called
+ * when acpisrat is no longer of any use.
+ */
+int acpisrat_exit(void);
+
+void acpisrat_dump(void);
+
+/* Get number of NUMA nodes */
+uint32_t acpisrat_nodes(void);
+
+/* Get number of cpus in the node.
+ * 0 means, this is a cpu-less node.
+ */
+uint32_t acpisrat_node_cpus(acpisrat_nodeid_t);
+
+/* Get number of memory ranges in the node
+ * 0 means, this node has no RAM.
+ */
+uint32_t acpisrat_node_memoryranges(acpisrat_nodeid_t);
+
+/* Retrieve cpu and memory info. */
+void acpisrat_cpu(acpisrat_nodeid_t, uint32_t cpunum, struct acpisrat_cpu *);
+void acpisrat_mem(acpisrat_nodeid_t, uint32_t memrange, struct acpisrat_mem *);
+
+#endif /* ACPI_SRAT_H */

Reply via email to