All,

Sorry, but the Google code review process has turned up a subtle issue
with my formerly submitted patch.  It is fixed in this newer patch
(attached).  Otherwise, the functional description is exactly as
explained below.

The issue is that there were 2 printk()s which printed the northbridge
PCI device number associated with throttling initialization and
detection.  These printk()s assumed that the northbridges were
enumerated in-order.  While this is highly likely to be the case, it
should not have been assumed.  In the event that it is _not_ the case,
then the PCI device numbers printed out would have been wrong.

Since I had to respin the patch, I took the opportunity to fix a few
minor issues as well: (1) Changed the contents of the aforementioned
printk()s to reveal the PCI bus, function, and register which relate
to the throttling message -- not just the device.  (2) Deleted all
mention of "AMD Quad Core", leaving only "Family 0x10".  (3) Fixed one
comment with incorrect grammar.  (4) Fixed punctuation in a printk().

I sincerely hope that this will be the last iteration.  Thanks to all
who contributed to the review, here and at Google!

On Jan 10, 2008 6:21 PM, Russell Leidich <[EMAIL PROTECTED]> wrote:
> All,
>
> Here's the hopefully-final version of the patch, which I have just
> tested on Intel and AMD.
>
> In my AMD test, I happened to discover that although MCEs were being
> logged, the MCE counter at
> /sys/devices/system/cpu/cpu(whatever)/thermal_throttle/count was not
> being updated.  I fixed this by (1) moving smp_thermal_init from
> late_initcall to device_initcall in mce_amd_64.c (2) moving
> thermal_throttle_init_device from device_initcall to late_initcall in
> therm_throt.c.
>
> Thanks to Andi for his notes on how to hack up an indirect call in
> AT&T-style X86-64.
>
> To reiterate my earlier general description of the patch:
>
> This patch adds thermal machine check event (MCE) support to AMD
> Barcelona (and probably later, if new PCI IDs are added to
> k8_northbridges[]), styled after the same in the Intel code.  The
> initialization consists of 3 parts: (1) northbridge, which enables the
> delivery of the interrupt to the local APIC, (2) APIC, which enables
> delivery to X86, and (3) hotplug support in threshold_cpu_callback(),
> which accomplishes #2 for CPUs which (re)enter the OS later.
>
> Whenever the temperature reaches the throttling threshold programmed
> into a northbridge register (by the BIOS -- my code doesn't change
> this value), a thermal interrupt is delivered.  The interrupt is
> delivered to the vector specified in the thermal APIC register at
> (APIC_BASE + 0x330), which is identical to Intel.  Because the vector
> register is the same between both architectures, and because I don't
> know which brand of CPU is present until runtime (if either),
> thermal_interrupt in entry_64.S will branch to smp_thermal_interrupt
> in mce_thermal.c.  In turn, smp_thermal_interrupt will branch to the
> CPU-specific code for handling the interrupt.  (Apart from the common
> vector location, AMD and Intel use radically different architectures
> for the purpose of reporting throttling events.)  At that point, an
> MCE is logged if and only if the temperature has made a low-to-high
> transition.  Rate limiting is employed so as not to spam the log.
>
> --
> Russell Leidich
>



-- 
Russell Leidich
Signed-off-by: Russell Leidich <[EMAIL PROTECTED]>
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile 
linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/Makefile   2007-12-11 
14:30:53.533297000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/Makefile        2007-12-11 
11:56:20.549185000 -0800
@@ -1,4 +1,4 @@
-obj-y                          =  mce_$(BITS).o therm_throt.o
+obj-y                          =  mce_$(BITS).o therm_throt.o mce_thermal.o
 
 obj-$(CONFIG_X86_32)           += k7.o p4.o p5.o p6.o winchip.o
 obj-$(CONFIG_X86_MCE_INTEL)    += mce_intel_64.o
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c 
linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c       
2007-12-11 14:30:53.559298000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_amd_64.c    2008-01-17 
16:22:47.134632000 -0800
@@ -20,15 +20,18 @@
 #include <linux/interrupt.h>
 #include <linux/kobject.h>
 #include <linux/notifier.h>
+#include <linux/pci_ids.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/sysdev.h>
 #include <linux/sysfs.h>
 #include <asm/apic.h>
-#include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/percpu.h>
 #include <asm/idle.h>
+#include <asm/therm_throt.h>
+#include <asm/k8.h>
+#include "mce_thermal.h"
 
 #define PFX               "mce_threshold: "
 #define VERSION           "version 1.1.1"
@@ -46,6 +49,15 @@
 #define MASK_ERR_COUNT_HI 0x00000FFF
 #define MASK_BLKPTR_LO    0xFF000000
 #define MCG_XBLK_ADDR     0xC0000400
+#define THERM_CTL_F3X64   0x64
+#define PSL_APIC_LO_EN    0x80
+#define PSL_APIC_HI_EN    0x40
+#define HTC_ACTIVE        0x10
+#define HTC_EN            1
+
+static int smp_thermal_interrupt_init(void);
+static int thermal_apic_init_allowed;
+static void thermal_apic_init(void *unused);
 
 struct threshold_block {
        unsigned int block;
@@ -657,6 +669,9 @@ static int threshold_cpu_callback(struct
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                threshold_create_device(cpu);
+               if (thermal_apic_init_allowed)
+                       smp_call_function_single(cpu,
+                                       &thermal_apic_init, NULL, 1, 0);
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
@@ -665,7 +680,7 @@ static int threshold_cpu_callback(struct
        default:
                break;
        }
-      out:
+out:
        return NOTIFY_OK;
 }
 
@@ -688,3 +703,184 @@ static __init int threshold_init_device(
 }
 
 device_initcall(threshold_init_device);
+
+/*
+ * AMD-specific thermal interrupt handler.
+ */
+void amd_smp_thermal_interrupt(void)
+{
+       unsigned int cpu = smp_processor_id();
+
+       ack_APIC_irq();
+       exit_idle();
+       irq_enter();
+       /*
+        * We're here because thermal throttling has just been activated -- not
+        * deactivated -- hence therm_throt_process(1).
+        */
+       if (therm_throt_process(1))
+               mce_log_therm_throt_event(cpu, 1);
+       /*
+        * We'll still get subsequent interrupts even if we don't clear the
+        * status bit in THERM_CTL_F3X64.  Take advantage of this fact to avoid
+        * wasting time and icache by touching PCI config space.
+        */
+       irq_exit();
+}
+
+/*
+ * Initialize each northbridge's thermal throttling logic.
+ */
+static void smp_thermal_northbridge_init(void)
+{
+       int nb_num;
+       u32 therm_ctl_f3x64;
+
+       for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+               /*
+                * Configure the thermal interrupt for this northbridge.
+                */
+               pci_read_config_dword(k8_northbridges[nb_num],
+                                       THERM_CTL_F3X64, &therm_ctl_f3x64);
+               therm_ctl_f3x64 |= PSL_APIC_HI_EN | HTC_EN;
+               therm_ctl_f3x64 &= (~PSL_APIC_LO_EN);
+               pci_write_config_dword(k8_northbridges[nb_num],
+                                       THERM_CTL_F3X64, therm_ctl_f3x64);
+               printk(KERN_INFO "CPU thermal throttling interrupts enabled in "
+                       "PCI bus 0x0, device 0x%x, function 0x3, register 0x64."
+                       "\n", k8_northbridges[nb_num]->devfn >> 3);
+       }
+}
+
+/*
+ * Enable the delivery of thermal interrupts via the local APIC.
+ */
+static void thermal_apic_init(void *unused)
+{
+       unsigned int apic_lv_therm;
+
+       /* Set up APIC_LVTTHMR to issue THERMAL_APIC_VECTOR. */
+       apic_lv_therm = apic_read(APIC_LVTTHMR);
+       /*
+        * See if some agent other than this routine has already initialized
+        * APIC_LVTTHMR, i.e. if it's unmasked, but not equal to the value that
+        * we would have programmed, had we been here before on this core.
+        */
+       if ((!(apic_lv_therm & APIC_LVT_MASKED)) && ((apic_lv_therm &
+               (APIC_MODE_MASK | APIC_VECTOR_MASK)) != (APIC_DM_FIXED |
+               THERMAL_APIC_VECTOR))) {
+               unsigned int cpu = smp_processor_id();
+
+               printk(KERN_WARNING "CPU 0x%x: Thermal monitoring not "
+                       "functional:\n", cpu);
+               if ((apic_lv_therm & APIC_MODE_MASK) == APIC_DM_SMI)
+                       printk(KERN_DEBUG "Thermal interrupts already "
+                               "handled by SMI according to (((local APIC "
+                               "base) + 0x330) bit 0x9).\n");
+               else
+                       printk(KERN_DEBUG "Thermal interrupts unexpectedly "
+                               "enabled at (((local APIC base) + 0x330) bit "
+                               "0x10).\n");
+       } else {
+               /*
+                * Configure the Local Thermal Vector Table Entry to issue
+                * thermal interrupts to THERMAL_APIC_VECTOR.
+                *
+                * Start by masking off Delivery Mode and Vector.
+                */
+               apic_lv_therm &= ~(APIC_MODE_MASK | APIC_VECTOR_MASK);
+               /* Fixed interrupt, masked for now. */
+               apic_lv_therm |= APIC_LVT_MASKED | APIC_DM_FIXED |
+                                                       THERMAL_APIC_VECTOR;
+               apic_write(APIC_LVTTHMR, apic_lv_therm);
+               /*
+                * The Intel thermal kernel code implies that there may be a
+                * race involving the mask bit, so clear it only now, after
+                * the other bits have settled.
+                */
+               apic_write(APIC_LVTTHMR, apic_lv_therm & ~APIC_LVT_MASKED);
+       }
+}
+
+/*
+* This function is intended to be called just after thermal throttling has
+ * been enabled.  It warns the user if throttling is already active, which
+ * could indicate a failed cooling system.  It may be the last chance to get
+ * a warning out before thermal shutdown occurs.
+ */
+static void smp_thermal_early_throttle_check(void)
+{
+       int nb_num;
+       u32 therm_ctl_f3x64;
+
+       for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+               /*
+                * Read back THERM_CTL_F3X64 to check whther HTC_ACTIVE is
+                * asserted, in which case, warn the user.
+                */
+               pci_read_config_dword(k8_northbridges[nb_num],
+                                       THERM_CTL_F3X64, &therm_ctl_f3x64);
+               if (therm_ctl_f3x64 & HTC_ACTIVE)
+                       printk(KERN_WARNING "High CPU package temperature "
+                               "indicated in PCI bus 0x0, device 0x%x, "
+                               "function 0x3, register 0x64.  Throttling "
+                               "enabled.\n", k8_northbridges[nb_num]->devfn >>
+                               3);
+       }
+}
+
+/*
+ * Determine whether or not the northbridges support thermal throttling
+ * interrupts.  If so, initialize them for receiving the same, then perform
+ * corresponding APIC initialization on each core.
+ *
+ * Due to an erratum involving the thermal sensor, this code does not work on
+ * any CPU prior to Family 0x10.
+ */
+static int smp_thermal_interrupt_init(void)
+{
+       int nb_num;
+
+       if (num_k8_northbridges == 0)
+               goto out;
+       /*
+        * If any of the northbridges has PCI ID 0x1103, then its thermal
+        * hardware suffers from an erratum which prevents this code from
+        * working, so abort.  (This implies that it only works on Family
+        * 0x10.)
+        */
+       for (nb_num = 0; nb_num < num_k8_northbridges; nb_num++) {
+               if ((k8_northbridges[nb_num]->device) == 0x1103)
+                       goto out;
+       }
+       /*
+        * Assert that we should log thermal throttling events, whenever
+        * we eventually get around to enabling them.
+        */
+       atomic_set(&therm_throt_en, 1);
+       /*
+        * Bind smp_thermal_interrupt() to amd_smp_thermal_interrupt().
+        */
+       smp_thermal_interrupt = amd_smp_thermal_interrupt;
+       smp_thermal_northbridge_init();
+       /*
+        * We've now initialized sufficient fabric to permit the
+        * initialization of the thermal interrupt APIC vectors, such as
+        * when a core comes online and calls threshold_cpu_callback().
+        */
+       thermal_apic_init_allowed = 1;
+       /*
+        * Call thermal_apic_init() on each core.
+        */
+       on_each_cpu(&thermal_apic_init, NULL, 1, 0);
+       smp_thermal_early_throttle_check();
+out:
+       return 0;
+}
+
+/*
+ * Ensure that smp_thermal_interrupt_init() does not execute before the PCI map
+ * contains all the northbridges.
+ */
+device_initcall(smp_thermal_interrupt_init);
+
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c 
linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c     
2007-12-11 14:30:53.564303000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_intel_64.c  2008-01-09 
18:32:44.776753000 -0800
@@ -8,24 +8,21 @@
 #include <linux/percpu.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
-#include <asm/mce.h>
 #include <asm/hw_irq.h>
 #include <asm/idle.h>
 #include <asm/therm_throt.h>
+#include "mce_thermal.h"
 
-asmlinkage void smp_thermal_interrupt(void)
+void intel_smp_thermal_interrupt(void)
 {
        __u64 msr_val;
 
        ack_APIC_irq();
-
        exit_idle();
        irq_enter();
-
        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
        if (therm_throt_process(msr_val & 1))
                mce_log_therm_throt_event(smp_processor_id(), msr_val);
-
        add_pda(irq_thermal_count, 1);
        irq_exit();
 }
@@ -75,6 +72,10 @@ static void __cpuinit intel_init_thermal
        wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
 
        l = apic_read(APIC_LVTTHMR);
+       /*
+        * Bind smp_thermal_interrupt() to intel_smp_thermal_interrupt().
+        */
+       smp_thermal_interrupt = intel_smp_thermal_interrupt;
        apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
        printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
                cpu, tm2 ? "TM2" : "TM1");
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c 
linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.c      
1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.c   2008-01-04 
15:45:40.686775000 -0800
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2007 Google Inc.
+ *
+ * Written by Mike Waychison <[EMAIL PROTECTED]> and Russell Leidich
+ * <[EMAIL PROTECTED]>.
+ *
+ * CPU-independent thermal interrupt handler.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <asm/hw_irq.h>
+#include <asm/idle.h>
+#include "mce_thermal.h"
+
+static void default_smp_thermal_interrupt(void)
+{
+       if (printk_ratelimit())
+               printk(KERN_DEBUG "Unexpected thermal throttling interrupt.\n");
+}
+
+smp_thermal_interrupt_callback_t smp_thermal_interrupt
+                                               = default_smp_thermal_interrupt;
+
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.h 
linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.h
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/mce_thermal.h      
1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/mce_thermal.h   2008-01-04 
15:53:07.614658000 -0800
@@ -0,0 +1,8 @@
+#include <linux/init.h>
+#include <asm/mce.h>
+
+typedef void (*smp_thermal_interrupt_callback_t)(void);
+extern smp_thermal_interrupt_callback_t        smp_thermal_interrupt;
+
+void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
+
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/therm_throt.c 
linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/therm_throt.c
--- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/therm_throt.c      
2007-12-11 14:30:53.607298000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/therm_throt.c   2008-01-10 
16:17:53.628335000 -0800
@@ -184,5 +184,10 @@ static __init int thermal_throttle_init_
        return 0;
 }
 
-device_initcall(thermal_throttle_init_device);
+/*
+ * thermal_throttle_init_device() must happen after the thermal throttling
+ * apparatus has been initialized on all supporting architectures, hence the
+ * following late_initcall().
+ */
+late_initcall(thermal_throttle_init_device);
 #endif /* CONFIG_SYSFS */
diff -uprN linux-2.6.24-rc5.orig/arch/x86/kernel/entry_64.S 
linux-2.6.24-rc5/arch/x86/kernel/entry_64.S
--- linux-2.6.24-rc5.orig/arch/x86/kernel/entry_64.S    2007-12-11 
14:30:53.756297000 -0800
+++ linux-2.6.24-rc5/arch/x86/kernel/entry_64.S 2008-01-09 10:30:56.356975000 
-0800
@@ -649,7 +649,7 @@ END(common_interrupt)
        .endm
 
 ENTRY(thermal_interrupt)
-       apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+       apicinterrupt THERMAL_APIC_VECTOR,*smp_thermal_interrupt(%rip)
 END(thermal_interrupt)
 
 ENTRY(threshold_interrupt)

Reply via email to