Module Name: src
Committed By: cegger
Date: Wed Nov 25 13:17:06 UTC 2009
Modified Files:
src/sys/dev/acpi: files.acpi
Added Files:
src/sys/dev/acpi: acpi_srat.c acpi_srat.h
Log Message:
Add ACPI SRAT parser. This is a part of NUMA support.
Tested on 1-node, 2-node and 8-node machines.
Patch presented on tech-kern@, port-i386@ and port-am...@.
No comments.
To generate a diff of this commit:
cvs rdiff -u -r0 -r1.1 src/sys/dev/acpi/acpi_srat.c \
src/sys/dev/acpi/acpi_srat.h
cvs rdiff -u -r1.60 -r1.61 src/sys/dev/acpi/files.acpi
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/dev/acpi/files.acpi
diff -u src/sys/dev/acpi/files.acpi:1.60 src/sys/dev/acpi/files.acpi:1.61
--- src/sys/dev/acpi/files.acpi:1.60 Fri Oct 2 16:47:52 2009
+++ src/sys/dev/acpi/files.acpi Wed Nov 25 13:17:06 2009
@@ -1,4 +1,4 @@
-# $NetBSD: files.acpi,v 1.60 2009/10/02 16:47:52 jmcneill Exp $
+# $NetBSD: files.acpi,v 1.61 2009/11/25 13:17:06 cegger Exp $
include "dev/acpi/acpica/files.acpica"
@@ -20,6 +20,7 @@
file dev/acpi/acpi_quirks.c acpi
file dev/acpi/acpi_timer.c acpi
file dev/acpi/acpi_wakedev.c acpi
+file dev/acpi/acpi_srat.c acpi
# ACPI/apm emulation.
attach apm at acpiapmbus with acpiapm: sysmon_envsys
Added files:
Index: src/sys/dev/acpi/acpi_srat.c
diff -u /dev/null src/sys/dev/acpi/acpi_srat.c:1.1
--- /dev/null Wed Nov 25 13:17:06 2009
+++ src/sys/dev/acpi/acpi_srat.c Wed Nov 25 13:17:06 2009
@@ -0,0 +1,513 @@
+/* $NetBSD $ */
+
+/*
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christoph Egger.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD $");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <sys/kmem.h>
+
+#include <dev/acpi/acpica.h>
+#include <dev/acpi/acpivar.h>
+#include <dev/acpi/acpi_srat.h>
+
+static ACPI_TABLE_SRAT *srat;
+
+struct acpisrat_node {
+ acpisrat_nodeid_t nodeid;
+ uint32_t ncpus; /* Number of cpus in this node */
+ struct acpisrat_cpu **cpu; /* Array of cpus */
+ uint32_t nmems; /* Number of memory ranges in this node */
+ struct acpisrat_mem **mem; /* Array of memory ranges */
+};
+
+static uint32_t nnodes; /* Number of NUMA nodes */
+static struct acpisrat_node *node_array; /* Array of NUMA nodes */
+static uint32_t ncpus; /* Number of CPUs */
+static struct acpisrat_cpu *cpu_array; /* Array of cpus */
+static uint32_t nmems; /* Number of Memory ranges */
+static struct acpisrat_mem *mem_array;
+
+
+struct cpulist {
+ struct acpisrat_cpu cpu;
+ TAILQ_ENTRY(cpulist) entry;
+};
+
+static TAILQ_HEAD(, cpulist) cpulisthead;
+
+#define CPU_INIT TAILQ_INIT(&cpulisthead);
+#define CPU_FOREACH(cpu) TAILQ_FOREACH(cpu, &cpulisthead, entry)
+#define CPU_ADD(cpu) TAILQ_INSERT_TAIL(&cpulisthead, cpu, entry)
+#define CPU_REM(cpu) TAILQ_REMOVE(&cpulisthead, cpu, entry)
+#define CPU_FIRST TAILQ_FIRST(&cpulisthead)
+
+
+struct memlist {
+ struct acpisrat_mem mem;
+ TAILQ_ENTRY(memlist) entry;
+};
+
+static TAILQ_HEAD(, memlist) memlisthead;
+
+#define MEM_INIT TAILQ_INIT(&memlisthead)
+#define MEM_FOREACH(mem) TAILQ_FOREACH(mem, &memlisthead, entry)
+#define MEM_ADD(mem) TAILQ_INSERT_TAIL(&memlisthead, mem, entry)
+#define MEM_ADD_BEFORE(mem, b) TAILQ_INSERT_BEFORE(b, mem, entry)
+#define MEM_REM(mem) TAILQ_REMOVE(&memlisthead, mem, entry)
+#define MEM_FIRST TAILQ_FIRST(&memlisthead)
+
+
+static struct cpulist *
+cpu_alloc(void)
+{
+ return kmem_zalloc(sizeof(struct cpulist), KM_NOSLEEP);
+}
+
+static void
+cpu_free(struct cpulist *c)
+{
+ kmem_free(c, sizeof(struct cpulist));
+}
+
+#if 0
+static struct cpulist *
+cpu_get(acpisrat_nodeid_t nodeid)
+{
+ struct cpulist *tmp;
+
+ CPU_FOREACH(tmp) {
+ if (tmp->cpu.nodeid == nodeid)
+ return tmp;
+ }
+
+ return NULL;
+}
+#endif
+
+static struct memlist *
+mem_alloc(void)
+{
+ return kmem_zalloc(sizeof(struct memlist), KM_NOSLEEP);
+}
+
+static void
+mem_free(struct memlist *m)
+{
+ kmem_free(m, sizeof(struct memlist));
+}
+
+static struct memlist *
+mem_get(acpisrat_nodeid_t nodeid)
+{
+ struct memlist *tmp;
+
+ MEM_FOREACH(tmp) {
+ if (tmp->mem.nodeid == nodeid)
+ return tmp;
+ }
+
+ return NULL;
+}
+
+
+bool
+acpisrat_exist(void)
+{
+ ACPI_TABLE_HEADER *table;
+ ACPI_STATUS rv;
+
+ rv = AcpiGetTable(ACPI_SIG_SRAT, 1, (ACPI_TABLE_HEADER **)&table);
+ if (ACPI_FAILURE(rv))
+ return false;
+
+ /* Check if header is valid */
+ if (table == NULL)
+ return false;
+
+ if (table->Length == 0xffffffff)
+ return false;
+
+ srat = (ACPI_TABLE_SRAT *)table;
+
+ return true;
+}
+
+static int
+acpisrat_parse(void)
+{
+ ACPI_SUBTABLE_HEADER *subtable;
+ ACPI_SRAT_CPU_AFFINITY *srat_cpu;
+ ACPI_SRAT_MEM_AFFINITY *srat_mem;
+ ACPI_SRAT_X2APIC_CPU_AFFINITY *srat_x2apic;
+
+ acpisrat_nodeid_t nodeid;
+ struct cpulist *cpuentry = NULL;
+ struct memlist *mementry;
+ uint32_t srat_pos;
+ bool ignore_cpu_affinity = false;
+
+ KASSERT(srat != NULL);
+
+ /* Content starts right after the header */
+ srat_pos = sizeof(ACPI_TABLE_SRAT);
+
+ while (srat_pos < srat->Header.Length) {
+ subtable = (ACPI_SUBTABLE_HEADER *)((char *)srat + srat_pos);
+ srat_pos += subtable->Length;
+
+ switch (subtable->Type) {
+ case ACPI_SRAT_TYPE_CPU_AFFINITY:
+ if (ignore_cpu_affinity)
+ continue;
+
+ srat_cpu = (ACPI_SRAT_CPU_AFFINITY *)subtable;
+ nodeid = (srat_cpu->ProximityDomainHi[2] << 24) |
+ (srat_cpu->ProximityDomainHi[1] << 16) |
+ (srat_cpu->ProximityDomainHi[0] << 8) |
+ (srat_cpu->ProximityDomainLo);
+
+ cpuentry = cpu_alloc();
+ if (cpuentry == NULL)
+ return ENOMEM;
+ CPU_ADD(cpuentry);
+
+ cpuentry->cpu.nodeid = nodeid;
+ cpuentry->cpu.apicid = srat_cpu->ApicId;
+ cpuentry->cpu.sapiceid = srat_cpu->LocalSapicEid;
+ cpuentry->cpu.flags = srat_cpu->Flags;
+ cpuentry->cpu.clockdomain = srat_cpu->ClockDomain;
+ break;
+
+ case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
+ srat_mem = (ACPI_SRAT_MEM_AFFINITY *)subtable;
+ nodeid = srat_mem->ProximityDomain;
+
+ mementry = mem_alloc();
+ if (mementry == NULL)
+ return ENOMEM;
+ MEM_ADD(mementry);
+
+ mementry->mem.nodeid = nodeid;
+ mementry->mem.baseaddress = srat_mem->BaseAddress;
+ mementry->mem.length = srat_mem->Length;
+ mementry->mem.flags = srat_mem->Flags;
+ break;
+
+ case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
+ srat_x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)subtable;
+ nodeid = srat_x2apic->ProximityDomain;
+
+ /* This table entry overrides
+ * ACPI_SRAT_TYPE_CPU_AFFINITY.
+ */
+ if (!ignore_cpu_affinity) {
+ struct cpulist *citer;
+ while ((citer = CPU_FIRST) != NULL) {
+ CPU_REM(citer);
+ cpu_free(citer);
+ }
+ ignore_cpu_affinity = true;
+ }
+
+ cpuentry = cpu_alloc();
+ if (cpuentry == NULL)
+ return ENOMEM;
+ CPU_ADD(cpuentry);
+
+ cpuentry->cpu.nodeid = nodeid;
+ cpuentry->cpu.apicid = srat_x2apic->ApicId;
+ cpuentry->cpu.clockdomain = srat_x2apic->ClockDomain;
+ cpuentry->cpu.flags = srat_x2apic->Flags;
+ break;
+
+ case ACPI_SRAT_TYPE_RESERVED:
+ printf("ACPI SRAT subtable reserved, length: 0x%x\n",
+ subtable->Length);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+acpisrat_quirks(void)
+{
+ struct cpulist *citer;
+ struct memlist *mem, *miter;
+
+ /* Some sanity checks. */
+
+ /* Deal with holes in the memory nodes.
+ * BIOS doesn't enlist memory nodes which
+ * don't have any memory modules plugged in.
+ * This behaviour has been observed on AMD machines.
+ *
+ * Do that by searching for CPUs in NUMA nodes
+ * which don't exist in the memory and then insert
+ * a zero memory range for the missing node.
+ */
+ CPU_FOREACH(citer) {
+ mem = mem_get(citer->cpu.nodeid);
+ if (mem != NULL)
+ continue;
+ mem = mem_alloc();
+ if (mem == NULL)
+ return ENOMEM;
+ mem->mem.nodeid = citer->cpu.nodeid;
+ /* all other fields are already zero filled */
+
+ MEM_FOREACH(miter) {
+ if (miter->mem.nodeid < citer->cpu.nodeid)
+ continue;
+ MEM_ADD_BEFORE(mem, miter);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int
+acpisrat_init(void)
+{
+ if (!acpisrat_exist())
+ return EEXIST;
+ return acpisrat_refresh();
+}
+
+int
+acpisrat_refresh(void)
+{
+ int rc, i, j, k;
+ struct cpulist *citer;
+ struct memlist *miter;
+ uint32_t cnodes = 0, mnodes = 0;
+
+ CPU_INIT;
+ MEM_INIT;
+
+ rc = acpisrat_parse();
+ if (rc)
+ return rc;
+
+ rc = acpisrat_quirks();
+ if (rc)
+ return rc;
+
+ /* cleanup resources */
+ rc = acpisrat_exit();
+ if (rc)
+ return rc;
+
+ nnodes = 0;
+ ncpus = 0;
+ CPU_FOREACH(citer) {
+ cnodes = MAX(citer->cpu.nodeid, cnodes);
+ ncpus++;
+ }
+
+ nmems = 0;
+ MEM_FOREACH(miter) {
+ mnodes = MAX(miter->mem.nodeid, mnodes);
+ nmems++;
+ }
+
+ nnodes = MAX(cnodes, mnodes) + 1;
+
+ node_array = kmem_zalloc(nnodes * sizeof(struct acpisrat_node),
+ KM_NOSLEEP);
+ if (node_array == NULL)
+ return ENOMEM;
+
+ cpu_array = kmem_zalloc(ncpus * sizeof(struct acpisrat_cpu),
+ KM_NOSLEEP);
+ if (cpu_array == NULL)
+ return ENOMEM;
+
+ mem_array = kmem_zalloc(nmems * sizeof(struct acpisrat_mem),
+ KM_NOSLEEP);
+ if (mem_array == NULL)
+ return ENOMEM;
+
+ i = 0;
+ CPU_FOREACH(citer) {
+ memcpy(&cpu_array[i], &citer->cpu, sizeof(struct acpisrat_cpu));
+ i++;
+ node_array[citer->cpu.nodeid].ncpus++;
+ }
+
+ i = 0;
+ MEM_FOREACH(miter) {
+ memcpy(&mem_array[i], &miter->mem, sizeof(struct acpisrat_mem));
+ i++;
+ node_array[miter->mem.nodeid].nmems++;
+ }
+
+ for (i = 0; i < nnodes; i++) {
+ node_array[i].nodeid = i;
+
+ node_array[i].cpu = kmem_zalloc(node_array[i].ncpus *
+ sizeof(struct acpisrat_cpu *), KM_NOSLEEP);
+ node_array[i].mem = kmem_zalloc(node_array[i].nmems *
+ sizeof(struct acpisrat_mem *), KM_NOSLEEP);
+
+ k = 0;
+ for (j = 0; j < ncpus; j++) {
+ if (cpu_array[j].nodeid != i)
+ continue;
+ node_array[i].cpu[k] = &cpu_array[j];
+ k++;
+ }
+
+ k = 0;
+ for (j = 0; j < nmems; j++) {
+ if (mem_array[j].nodeid != i)
+ continue;
+ node_array[i].mem[k] = &mem_array[j];
+ k++;
+ }
+ }
+
+ while ((citer = CPU_FIRST) != NULL) {
+ CPU_REM(citer);
+ cpu_free(citer);
+ }
+
+ while ((miter = MEM_FIRST) != NULL) {
+ MEM_REM(miter);
+ mem_free(miter);
+ }
+
+ return 0;
+}
+
+
+int
+acpisrat_exit(void)
+{
+ int i;
+
+ if (node_array) {
+ for (i = 0; i < nnodes; i++) {
+ if (node_array[i].cpu)
+ kmem_free(node_array[i].cpu,
+ node_array[i].ncpus * sizeof(struct acpisrat_cpu *));
+ if (node_array[i].mem)
+ kmem_free(node_array[i].mem,
+ node_array[i].nmems * sizeof(struct acpisrat_mem *));
+ }
+ kmem_free(node_array, nnodes * sizeof(struct acpisrat_node));
+ }
+ node_array = NULL;
+
+ if (cpu_array)
+ kmem_free(cpu_array, ncpus * sizeof(struct acpisrat_cpu));
+ cpu_array = NULL;
+
+ if (mem_array)
+ kmem_free(mem_array, nmems * sizeof(struct acpisrat_mem));
+ mem_array = NULL;
+
+ nnodes = 0;
+ ncpus = 0;
+ nmems = 0;
+
+ return 0;
+}
+
+
+void
+acpisrat_dump(void)
+{
+ uint32_t i, j, nn, nc, nm;
+ struct acpisrat_cpu c;
+ struct acpisrat_mem m;
+
+ nn = acpisrat_nodes();
+ aprint_debug("SRAT: %u NUMA nodes\n", nn);
+ for (i = 0; i < nn; i++) {
+ nc = acpisrat_node_cpus(i);
+ for (j = 0; j < nc; j++) {
+ acpisrat_cpu(i, j, &c);
+ aprint_debug("SRAT: node %u cpu %u "
+ "(apic %u, sapic %u, flags %u, clockdomain %u)\n",
+ c.nodeid, j, c.apicid, c.sapiceid, c.flags,
+ c.clockdomain);
+ }
+
+ nm = acpisrat_node_memoryranges(i);
+ for (j = 0; j < nm; j++) {
+ acpisrat_mem(i, j, &m);
+ aprint_debug("SRAT: node %u memory range %u (0x%"
+ PRIx64" - 0x%"PRIx64" flags %u)\n",
+ m.nodeid, j, m.baseaddress,
+ m.baseaddress + m.length, m.flags);
+ }
+ }
+}
+
+uint32_t
+acpisrat_nodes(void)
+{
+ return nnodes;
+}
+
+uint32_t
+acpisrat_node_cpus(acpisrat_nodeid_t nodeid)
+{
+ return node_array[nodeid].ncpus;
+}
+
+uint32_t
+acpisrat_node_memoryranges(acpisrat_nodeid_t nodeid)
+{
+ return node_array[nodeid].nmems;
+}
+
+void
+acpisrat_cpu(acpisrat_nodeid_t nodeid, uint32_t cpunum,
+ struct acpisrat_cpu *c)
+{
+ memcpy(c, node_array[nodeid].cpu[cpunum],
+ sizeof(struct acpisrat_cpu));
+}
+
+void
+acpisrat_mem(acpisrat_nodeid_t nodeid, uint32_t memrange,
+ struct acpisrat_mem *mem)
+{
+ memcpy(mem, node_array[nodeid].mem[memrange],
+ sizeof(struct acpisrat_mem));
+}
Index: src/sys/dev/acpi/acpi_srat.h
diff -u /dev/null src/sys/dev/acpi/acpi_srat.h:1.1
--- /dev/null Wed Nov 25 13:17:06 2009
+++ src/sys/dev/acpi/acpi_srat.h Wed Nov 25 13:17:06 2009
@@ -0,0 +1,98 @@
+/* $NetBSD $ */
+
+/*
+ * Copyright (c) 2009 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christoph Egger.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ACPI_SRAT_H
+#define ACPI_SRAT_H
+
+typedef uint32_t acpisrat_nodeid_t;
+
+struct acpisrat_cpu {
+ acpisrat_nodeid_t nodeid;
+ uint32_t apicid;
+ uint32_t sapiceid;
+ uint32_t flags;
+
+ /* clockdomain has a meaningful value when the ACPI MADT table has
+ * ACPI_MADT_TYPE_LOCAL_X2APIC and/or ACPI_MADT_TYPE_LOCAL_X2APIC_NMI
+ * entries or ACPI CPU device have a _CDM.
+ */
+ uint32_t clockdomain;
+};
+
+struct acpisrat_mem {
+ acpisrat_nodeid_t nodeid;
+ uint64_t baseaddress;
+ uint64_t length;
+ uint32_t flags;
+};
+
+/* Returns true if ACPI SRAT table is available.
+ *
+ * If table does not exist, all functions below
+ * have undefined behaviour.
+ */
+bool acpisrat_exist(void);
+
+/* Initializes parser. Must be the first function
+ * being called when table is available.
+ */
+int acpisrat_init(void);
+
+/* Re-parse ACPI SRAT table. Useful after
+ * hotplugging cpu or RAM.
+ */
+int acpisrat_refresh(void);
+
+/* Free allocated memory. Should be called
+ * when acpisrat is no longer of any use.
+ */
+int acpisrat_exit(void);
+
+void acpisrat_dump(void);
+
+/* Get number of NUMA nodes */
+uint32_t acpisrat_nodes(void);
+
+/* Get number of cpus in the node.
+ * 0 means, this is a cpu-less node.
+ */
+uint32_t acpisrat_node_cpus(acpisrat_nodeid_t);
+
+/* Get number of memory ranges in the node
+ * 0 means, this node has no RAM.
+ */
+uint32_t acpisrat_node_memoryranges(acpisrat_nodeid_t);
+
+/* Retrieve cpu and memory info. */
+void acpisrat_cpu(acpisrat_nodeid_t, uint32_t cpunum, struct acpisrat_cpu *);
+void acpisrat_mem(acpisrat_nodeid_t, uint32_t memrange, struct acpisrat_mem *);
+
+#endif /* ACPI_SRAT_H */