OK all, I fixed the comments about accessing PCI config space in an interrupt context.
Can someone please give me a thumbs up or down about checking this in? If it's thumbs up, I will do a final retest and mail back a Signed-off-by patch. -- Russell Leidich
diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile 2007-12-11 14:30:53.533297000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile 2007-12-11 11:56:20.549185000 -0800 @@ -1,4 +1,4 @@ -obj-y = mce_$(BITS).o therm_throt.o +obj-y = mce_$(BITS).o therm_throt.o mce_thermal.o obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 2007-12-11 14:30:53.559298000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 2008-01-08 15:21:44.241310000 -0800 @@ -20,15 +20,18 @@ #include <linux/interrupt.h> #include <linux/kobject.h> #include <linux/notifier.h> +#include <linux/pci_ids.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/sysdev.h> #include <linux/sysfs.h> #include <asm/apic.h> -#include <asm/mce.h> #include <asm/msr.h> #include <asm/percpu.h> #include <asm/idle.h> +#include <asm/therm_throt.h> +#include <asm/k8.h> +#include "mce_thermal.h" #define PFX "mce_threshold: " #define VERSION "version 1.1.1" @@ -46,6 +49,16 @@ #define MASK_ERR_COUNT_HI 0x00000FFF #define MASK_BLKPTR_LO 0xFF000000 #define MCG_XBLK_ADDR 0xC0000400 +#define THERM_CTL_F3X64 0x64 +#define PSL_APIC_LO_EN 0x80 +#define PSL_APIC_HI_EN 0x40 +#define HTC_ACTIVE 0x10 +#define HTC_EN 1 +#define NB_PCI_DEV_BASE 0x18 + +static int smp_thermal_interrupt_init(void); +static int thermal_apic_init_allowed; +static void thermal_apic_init(void *unused); struct threshold_block { unsigned int block; @@ -657,6 +670,9 @@ static int threshold_cpu_callback(struct case CPU_ONLINE: case CPU_ONLINE_FROZEN: threshold_create_device(cpu); + if (thermal_apic_init_allowed) + smp_call_function_single(cpu, + &thermal_apic_init, NULL, 1, 0); break; case CPU_DEAD: case CPU_DEAD_FROZEN: @@ -665,7 +681,7 @@ static int threshold_cpu_callback(struct default: break; } - out: +out: return NOTIFY_OK; } @@ -688,3 +704,182 @@ static __init int threshold_init_device( } device_initcall(threshold_init_device); + +/* + * AMD-specific thermal interrupt handler. + */ +void amd_smp_thermal_interrupt(void) +{ + unsigned int cpu = smp_processor_id(); + + ack_APIC_irq(); + exit_idle(); + irq_enter(); + /* + * We're here because thermal throttling has just been activated -- not + * deactivated -- hence therm_throt_process(1). + */ + if (therm_throt_process(1)) + mce_log_therm_throt_event(cpu, 1); + /* + * We'll still get subsequent interrupts even if we don't clear the + * status bit in THERM_CTL_F3X64. Take advantage of this fact to avoid + * wasting time and icache by touching PCI config space. + */ + irq_exit(); +} + +/* + * Initialize each northbridge's thermal throttling logic. + */ +static void smp_thermal_northbridge_init(void) +{ + int nb_num; + u32 therm_ctl_f3x64; + + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + /* + * Configure the thermal interrupt for this northbridge. + */ + pci_read_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, &therm_ctl_f3x64); + therm_ctl_f3x64 |= PSL_APIC_HI_EN | HTC_EN; + therm_ctl_f3x64 &= (~PSL_APIC_LO_EN); + pci_write_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, therm_ctl_f3x64); + printk(KERN_INFO "Northbridge at PCI device 0x%x: " + "thermal monitoring enabled.\n", NB_PCI_DEV_BASE + + nb_num); + } +} + +/* + * Enable the delivery of thermal interrupts via the local APIC. + */ +static void thermal_apic_init(void *unused) +{ + unsigned int apic_lv_therm; + + /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */ + apic_lv_therm = apic_read(APIC_LVTTHMR); + /* + * See if some agent other than this routine has already initialized + * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that + * we would have programmed, had we been here before on this core. + */ + if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm & + (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED | + THERMAL_APIC_VECTOR))) { + unsigned int cpu = smp_processor_id(); + + printk(KERN_WARNING "CPU 0x%x: Thermal monitoring not " + "functional.\n", cpu); + if ((apic_lv_therm & APIC_MODE_MASK) == APIC_DM_SMI) + printk(KERN_DEBUG "Thermal interrupts already " + "handled by SMI according to (((local APIC " + "base) + 0x330) bit 0x9).\n"); + else + printk(KERN_DEBUG "Thermal interrupts unexpectedly " + "enabled at (((local APIC base) + 0x330) bit " + "0x10).\n"); + } else { + /* + * Configure the Local Thermal Vector Table Entry to issue + * issue thermal interrupts to THERMAL_APIC_VECTOR. + * + * Start by masking off Delivery Mode and Vector. + */ + apic_lv_therm &= ~(APIC_MODE_MASK | APIC_VECTOR_MASK); + /* Fixed interrupt, masked for now. */ + apic_lv_therm |= APIC_LVT_MASKED | APIC_DM_FIXED | + THERMAL_APIC_VECTOR; + apic_write(APIC_LVTTHMR, apic_lv_therm); + /* + * The Intel thermal kernel code implies that there may be a + * race involving the mask bit, so clear it only now, after + * the other bits have settled. + */ + apic_write(APIC_LVTTHMR, apic_lv_therm & ~APIC_LVT_MASKED); + } +} + +/* +* This function is intended to be called just after thermal throttling has + * been enabled. It warns the user if throttling is already active, which + * could indicate a failed cooling system. It may be the last chance to get + * a warning out before thermal shutdown occurs. + */ +static void smp_thermal_early_throttle_check(void) +{ + int nb_num; + u32 therm_ctl_f3x64; + + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + /* + * Read back THERM_CTL_F3X64 to check whther HTC_ACTIVE is + * asserted, in which case, warn the user. + */ + pci_read_config_dword(k8_northbridges[nb_num], + THERM_CTL_F3X64, &therm_ctl_f3x64); + if (therm_ctl_f3x64 & HTC_ACTIVE) + printk(KERN_WARNING "High temperature on northbridge " + "at PCI device 0x%x. Throttling enabled.\n", + NB_PCI_DEV_BASE + nb_num); + } +} + +/* + * Determine whether or not the northbridges support thermal throttling + * interrupts. If so, initialize them for receiving the same, then perform + * corresponding APIC initialization on each core. + * + * Due to an erratum involving the thermal sensor, this code does not work + * on any CPU prior to Family 0x10 (AMD Quad Core). + */ +static int smp_thermal_interrupt_init(void) +{ + int nb_num; + + if (num_k8_northbridges == 0) + goto out; + /* + * If any of the northbridges has PCI ID 0x1103, then its thermal + * hardware suffers from an erratum which prevents this code from + * working, so abort. (This implies that it only works on Family + * 0x10 (AMD Quad Core).) + */ + for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) { + if ((k8_northbridges[nb_num]->device) == 0x1103) + goto out; + } + /* + * Assert that we should log thermal throttling events, whenever + * we eventually get around to enabling them. + */ + atomic_set(&therm_throt_en, 1); + /* + * Bind smp_thermal_interrupt() to amd_smp_thermal_interrupt(). + */ + smp_thermal_interrupt = amd_smp_thermal_interrupt; + smp_thermal_northbridge_init(); + /* + * We've now initialized sufficient fabric to permit the + * initialization of the thermal interrupt APIC vectors, such as + * when a core comes online and calls threshold_cpu_callback(). + */ + thermal_apic_init_allowed = 1; + /* + * Call thermal_apic_init() on each core. + */ + on_each_cpu(&thermal_apic_init, NULL, 1, 0); + smp_thermal_early_throttle_check(); +out: + return 0; +} + +/* + * Ensure that smp_thermal_interrupt_init() executes after num_k8_northbridges + * has been initialized. + */ +late_initcall(smp_thermal_interrupt_init); + diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 2007-12-11 14:30:53.564303000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 2008-01-04 16:25:08.547999000 -0800 @@ -8,24 +8,21 @@ #include <linux/percpu.h> #include <asm/processor.h> #include <asm/msr.h> -#include <asm/mce.h> #include <asm/hw_irq.h> #include <asm/idle.h> #include <asm/therm_throt.h> +#include "mce_thermal.h" -asmlinkage void smp_thermal_interrupt(void) +void intel_smp_thermal_interrupt(void) { __u64 msr_val; ack_APIC_irq(); - exit_idle(); irq_enter(); - rdmsrl(MSR_IA32_THERM_STATUS, msr_val); if (therm_throt_process(msr_val & 1)) mce_log_therm_throt_event(smp_processor_id(), msr_val); - add_pda(irq_thermal_count, 1); irq_exit(); } @@ -75,6 +72,10 @@ static void __cpuinit intel_init_thermal wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); l = apic_read(APIC_LVTTHMR); + /* + * Bind smp_thermal_interrupt() to intel_smp_thermal_interrupt(). + */ + smp_thermal_interrupt = intel_smp_thermal_interrupt; apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", cpu, tm2 ? "TM2" : "TM1"); diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c 1969-12-31 16:00:00.000000000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c 2008-01-04 15:45:40.686775000 -0800 @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2007 Google Inc. + * + * Written by Mike Waychison <[EMAIL PROTECTED]> and Russell Leidich + * <[EMAIL PROTECTED]>. + * + * CPU-independent thermal interrupt handler. + */ + +#include <linux/init.h> +#include <linux/interrupt.h> +#include <asm/hw_irq.h> +#include <asm/idle.h> +#include "mce_thermal.h" + +static void default_smp_thermal_interrupt(void) +{ + if (printk_ratelimit()) + printk(KERN_DEBUG "Unexpected thermal throttling interrupt.\n"); +} + +smp_thermal_interrupt_callback_t smp_thermal_interrupt + = default_smp_thermal_interrupt; + diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.h /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.h --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.h 1969-12-31 16:00:00.000000000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.h 2008-01-04 15:53:07.614658000 -0800 @@ -0,0 +1,8 @@ +#include <linux/init.h> +#include <asm/mce.h> + +typedef void (*smp_thermal_interrupt_callback_t)(void); +extern smp_thermal_interrupt_callback_t smp_thermal_interrupt; + +void mce_log_therm_throt_event(unsigned int cpu, __u64 status); + diff -uprN /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/entry_64.S /home/rml/linux-2.6.24-rc5/arch/x86/kernel/entry_64.S --- /home/rml/linux-2.6.24-rc5.orig/arch/x86/kernel/entry_64.S 2007-12-11 14:30:53.756297000 -0800 +++ /home/rml/linux-2.6.24-rc5/arch/x86/kernel/entry_64.S 2008-01-04 15:34:16.761342000 -0800 @@ -649,7 +649,7 @@ END(common_interrupt) .endm ENTRY(thermal_interrupt) - apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt + apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt(%rip) END(thermal_interrupt) ENTRY(threshold_interrupt)