Register HPET as eventsource. If number of logical CPUs is less than number of HPET channels, HPET provides a per CPU eventsource (using as many timers as needed). Otherwise, it is a global eventsource (using only one timer). HPET timers are programmed in their standard interrupt mapping mode (not in legacy replacement mode).
Signed-off-by: Venkatesh Pallipadi <[EMAIL PROTECTED]> Index: linux-2.6.21-rc-mm/arch/x86_64/kernel/apic.c =================================================================== --- linux-2.6.21-rc-mm.orig/arch/x86_64/kernel/apic.c +++ linux-2.6.21-rc-mm/arch/x86_64/kernel/apic.c @@ -794,17 +794,24 @@ static void lapic_timer_setup(enum clock unsigned long flags; local_irq_save(flags); - /* Turn off PIT interrupt if we use APIC timer as main timer. - Only works with the PM timer right now - TBD fix it for HPET too. */ - if ((pmtmr_ioport != 0) && - smp_processor_id() == boot_cpu_id && - apic_runs_main_timer == 1 && - !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) { - stop_timer_interrupt(); - apic_runs_main_timer++; + if (mode == CLOCK_EVT_MODE_SHUTDOWN) { + unsigned long v; + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + } else { + /* Turn off IRQ 0 timer if we use APIC timer as main timer. */ + if ( smp_processor_id() == boot_cpu_id && + apic_runs_main_timer == 1 && + !cpu_isset(boot_cpu_id, + timer_interrupt_broadcast_ipi_mask)) { + stop_timer_interrupt(); + apic_runs_main_timer++; + } + __setup_APIC_LVTT(calibration_result, + mode != CLOCK_EVT_MODE_PERIODIC); } - __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_MODE_PERIODIC); + local_irq_restore(flags); } Index: linux-2.6.21-rc-mm/arch/x86_64/kernel/hpet.c =================================================================== --- linux-2.6.21-rc-mm.orig/arch/x86_64/kernel/hpet.c +++ linux-2.6.21-rc-mm/arch/x86_64/kernel/hpet.c @@ -1,3 +1,4 @@ + #include <linux/kernel.h> #include <linux/sched.h> #include <linux/init.h> @@ -7,10 +8,14 @@ #include <linux/ioport.h> #include <linux/acpi.h> #include <linux/hpet.h> +#include <linux/clockchips.h> +#include <linux/delay.h> + #include <asm/pgtable.h> #include <asm/vsyscall.h> #include <asm/timex.h> #include <asm/hpet.h> +#include <asm/idle.h> int nohpet __initdata; @@ -71,7 +76,7 @@ static __init int late_hpet_init(void) fs_initcall(late_hpet_init); #endif -int hpet_timer_stop_set_go(unsigned long tick) +static int hpet_timer_stop_set_go(unsigned long tick) { unsigned int cfg; @@ -158,11 +163,6 @@ int __init hpet_arch_init(void) return hpet_timer_stop_set_go(hpet_tick); } -int hpet_reenable(void) -{ - return hpet_timer_stop_set_go(hpet_tick); -} - /* * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing * it to the HPET timer of known frequency. @@ -468,6 +468,7 @@ static int __init nohpet_setup(char *s) __setup("nohpet", nohpet_setup); +/* Clocksource */ #define HPET_MASK 0xFFFFFFFF #define HPET_SHIFT 22 @@ -517,6 +518,454 @@ static int __init init_hpet_clocksource( return clocksource_register(&clocksource_hpet); } +/* Eventsource */ +static unsigned int num_timers_used; + +#define HPET_DEV_FLAG_ONESHOT 0x1 +#define HPET_DEV_FLAG_PERIODIC 0x2 +#define HPET_DEV_FLAG_TEST 0x10 + +/* HPET as clockevent */ +struct hpet_dev { + unsigned int num; + int cpu; + cpumask_t cpu_mask; + unsigned int irq; + unsigned int tick; + unsigned int count; + unsigned int flags; + char name[10]; +}; + +static struct hpet_dev *cpu_hpet_dev; /* per CPU ptr */ + +#define HPET_INTERRUPT_TEST_COUNT 5 + +static irqreturn_t hpet_normal_interrupt(int irq, void *data); + +static irqreturn_t hpet_normal_interrupt_test(int irq, void *data) +{ + unsigned int readin; + struct hpet_dev *dev = (struct hpet_dev *)data; + + dev->count++; + if (dev->count < HPET_INTERRUPT_TEST_COUNT) { + unsigned long flags; + + local_irq_save(flags); + readin = hpet_readl(HPET_COUNTER); + hpet_writel(readin + dev->tick, HPET_Tn_CMP(dev->num)); + local_irq_restore(flags); + } + return IRQ_HANDLED; +} + +static unsigned int cpu_hpet_irq[NR_CPUS] = {[0 ... NR_CPUS-1] = -1}; +static unsigned int assigned_irqs_map = 0; + +/* Called sequentially and hence no synchronization */ +static int hpet_assign_irq(struct hpet_dev *dev) +{ + unsigned int irq = -1; + unsigned int cpu = dev->cpu; + int ret = 0; + + dev->irq = -1; + /* + * Simple IRQ allocation policy : One IRQ per CPU and use + * first available for this timer. + */ + if (cpu > NR_CPUS) + return -1; + + if (cpu_hpet_irq[cpu] == -1) { + unsigned int possible_irqs_map; + unsigned int avail_irqs_map; + + /* Assign a new possible IRQ for this CPU */ + possible_irqs_map = hpet_readl(HPET_Tn_CFG(dev->num) + 4); + + /* + * TBD: Only use IOAPIC sharable interrupts + * IRQ 20 seems to have some problem on my platform. + * Using 21..24 for now. + */ + possible_irqs_map &= 0xe00000; + + avail_irqs_map = possible_irqs_map & (~assigned_irqs_map); + if (unlikely(!avail_irqs_map)) + return -1; + + irq = find_first_bit((void *)&avail_irqs_map, + sizeof(unsigned int)); + if (unlikely(irq > 31 || irq < 0)) + return -1; + + set_bit(irq, &assigned_irqs_map); + cpu_hpet_irq[cpu] = irq; + } + + dev->irq = cpu_hpet_irq[cpu]; + + if (dev->irq != -1) { + /* + * BIOS does not list these interrupts in tables. + * We set the routing here. + * HPET is connected to IOAPIC 0 and IRQ and GSI are same. + */ + ret = io_apic_set_pci_routing(0, irq, irq, 1, 1); + } + + return ret; +} + +static void hpet_setup_cfg(struct hpet_dev *dev, int enable) +{ + if (enable) { + hpet_writel(HPET_TN_ENABLE | HPET_TN_32BIT | + HPET_TN_LEVEL | (dev->irq << HPET_TN_ROUTE_SHIFT), + HPET_Tn_CFG(dev->num)); + } else { + hpet_writel(HPET_TN_32BIT | + HPET_TN_LEVEL | (dev->irq << HPET_TN_ROUTE_SHIFT), + HPET_Tn_CFG(dev->num)); + } +} + +static int hpet_setup_timer_on(struct hpet_dev *dev) +{ + unsigned int readin; + cpumask_t mask; + + cpus_clear(mask); + cpu_set(dev->cpu, mask); + set_pending_irq(dev->irq, mask); + + if (request_irq(dev->irq, hpet_normal_interrupt, + IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) { + return -1; + } + + readin = hpet_readl(HPET_COUNTER); + hpet_writel(readin + (unsigned int)hpet_tick, HPET_Tn_CMP(dev->num)); + hpet_setup_cfg(dev, 1); + return 0; +} + +static void hpet_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt); + +static int hpet_next_event(unsigned long delta, + struct clock_event_device *evt); + +static struct clock_event_device clockevent_hpet = { + .name = "hpet", + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, + .mult = 0, /* set below */ + .shift = HPET_SHIFT, + .rating = 110, + .set_mode = hpet_timer_setup, + .set_next_event = hpet_next_event, +}; + +static DEFINE_PER_CPU(struct clock_event_device, hpet_events); + +static void hpet_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + struct hpet_dev *hdev = per_cpu_ptr(cpu_hpet_dev, smp_processor_id()); + unsigned long flags; + unsigned int readin; + + local_irq_save(flags); + + hdev->flags &= (~(HPET_DEV_FLAG_ONESHOT | HPET_DEV_FLAG_PERIODIC)); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + hdev->flags |= HPET_DEV_FLAG_PERIODIC; + readin = hpet_readl(HPET_COUNTER); + hpet_writel(readin + hdev->tick, HPET_Tn_CMP(hdev->num)); + readin = hpet_readl(HPET_Tn_CFG(hdev->num)); + hpet_writel(readin | HPET_TN_ENABLE, HPET_Tn_CFG(hdev->num)); + break; + case CLOCK_EVT_MODE_ONESHOT: + hdev->flags |= HPET_DEV_FLAG_ONESHOT; + readin = hpet_readl(HPET_COUNTER); + hpet_writel(readin + hdev->tick, HPET_Tn_CMP(hdev->num)); + readin = hpet_readl(HPET_Tn_CFG(hdev->num)); + hpet_writel(readin | HPET_TN_ENABLE, HPET_Tn_CFG(hdev->num)); + break; + case CLOCK_EVT_MODE_SHUTDOWN: + readin = hpet_readl(HPET_Tn_CFG(hdev->num)); + hpet_writel(readin & (~HPET_TN_ENABLE), HPET_Tn_CFG(hdev->num)); + break; + default: + break; + } + + local_irq_restore(flags); +} + +static int hpet_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + struct hpet_dev *hdev = per_cpu_ptr(cpu_hpet_dev, smp_processor_id()); + unsigned long flags; + unsigned int readin; + + local_irq_save(flags); + readin = hpet_readl(HPET_COUNTER); + hpet_writel(readin + (unsigned int)delta, HPET_Tn_CMP(hdev->num)); + readin = hpet_readl(HPET_Tn_CFG(hdev->num)); + hpet_writel(readin | HPET_TN_ENABLE, HPET_Tn_CFG(hdev->num)); + hdev->count++; + local_irq_restore(flags); + return 0; +} + +static irqreturn_t hpet_normal_interrupt(int irq, void *data) +{ + unsigned int readin, status; + struct hpet_dev *dev = (struct hpet_dev *)data; + struct clock_event_device *hevt = &__get_cpu_var(hpet_events); + + status = hpet_readl(HPET_STATUS); + if (!(status & (0x1 << dev->num))) { + return IRQ_NONE; + } + hpet_writel((0x1 << dev->num), HPET_STATUS); + + if (unlikely(dev->flags & HPET_DEV_FLAG_TEST)) + return hpet_normal_interrupt_test(irq, data); + + /* TBD: Needed for NMI watchdog? */ + //add_pda(apic_timer_irqs, 1); + + dev->count++; + if (dev->flags & HPET_DEV_FLAG_ONESHOT) { + readin = hpet_readl(HPET_Tn_CFG(dev->num)); + hpet_writel(readin & (~HPET_TN_ENABLE), HPET_Tn_CFG(dev->num)); + } else if (dev->flags & HPET_DEV_FLAG_PERIODIC) { + readin = hpet_readl(HPET_COUNTER); + hpet_writel(readin + dev->tick, HPET_Tn_CMP(dev->num)); + } + + if (!hevt->event_handler) { + printk("Spurious HPET timer interrupt on HPET timer %d\n", + dev->num); + return IRQ_HANDLED; + } + + exit_idle(); + irq_enter(); + hevt->event_handler(hevt); + irq_exit(); + return IRQ_HANDLED; +} + +#define HPET_PERCPU_EVENT 1 +#define HPET_GLOBAL_EVENT 2 + +static void __devinit setup_cpu_hpet_timer(void *param) +{ + struct clock_event_device *hevt = &__get_cpu_var(hpet_events); + + memcpy(hevt, &clockevent_hpet, sizeof(*hevt)); + if ((unsigned long)param == HPET_PERCPU_EVENT) + hevt->cpumask = cpumask_of_cpu(smp_processor_id()); + else if ((unsigned long)param == HPET_GLOBAL_EVENT) + hevt->cpumask = cpumask_of_cpu(0); + + clockevents_register_device(hevt); +} + +static int init_hpet_eventsource(void) +{ + unsigned int id, cfg; + int ret = 0; + struct hpet_dev **hdev; + unsigned long tmp; + unsigned long tmp1; + unsigned int num_timers; + unsigned int percpu_timer; + int i; + + + id = hpet_readl(HPET_ID); + num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); + num_timers++; /* Value read out starts from 0 */ + if (num_possible_cpus() <= 2 && num_possible_cpus() <= num_timers) { + percpu_timer = 1; + num_timers_used = num_possible_cpus(); + } else { + percpu_timer = 0; + num_timers_used = 1; + } + + cpu_hpet_dev = alloc_percpu(struct hpet_dev); + if (cpu_hpet_dev == NULL) + return -1; + + printk("Dump percpu_timer %d, num_timers_used %d, num_timers %d\n", percpu_timer, num_timers_used, num_timers); + + hdev = kzalloc(sizeof(struct hpet_dev *) * num_timers_used, GFP_KERNEL); + if (!hdev) + return -1; + + for (i = 0; i < num_timers_used; i++) { + hdev[i] = per_cpu_ptr(cpu_hpet_dev, i); + hdev[i]->num = i; + hdev[i]->cpu = i; + + hdev[i]->tick = hpet_tick; + hdev[i]->count = 0; + hdev[i]->flags |= HPET_DEV_FLAG_TEST; + sprintf(hdev[i]->name, "hpet%d", i); + if (hpet_assign_irq(hdev[i]) || (hdev[i]->irq < 0)) { + printk(KERN_DEBUG "HPET IRQ allocation failed\n"); + goto free_exit; + } + } + + for (i = 0; i < num_timers_used; i++) { + if (hpet_setup_timer_on(hdev[i])) { + num_timers_used = i-1; + goto free_irq_exit; + } + } + + /* Start the timers and test events */ + cfg = hpet_readl(HPET_CFG); + cfg |= HPET_CFG_ENABLE; + hpet_writel(cfg, HPET_CFG); + + msleep(HPET_INTERRUPT_TEST_COUNT * 2 * 1000 / HZ); + + for (i = 0; i < num_timers_used; i++) { + hdev[i]->flags &= (~HPET_DEV_FLAG_TEST); + } + + for (i = 0; i < num_timers_used; i++) { + if (hdev[i]->count < HPET_INTERRUPT_TEST_COUNT) + goto free_irq_exit; + } + + /* + * Convert hpet_period to ticks/ns and then calculate multiplier with + * HPET_SHIFT + */ + tmp = (u64)hpet_period; + do_div(tmp, FSEC_PER_NSEC); + + tmp1 = 1 << HPET_SHIFT; + do_div(tmp1, (u32)tmp); + + clockevent_hpet.mult = (u32)tmp1; + + clockevent_hpet.min_delta_ns = + clockevent_delta2ns(hpet_tick, &clockevent_hpet); + clockevent_hpet.max_delta_ns = + clockevent_delta2ns(0x7FFFFFFF, &clockevent_hpet); + printk(KERN_DEBUG "hpet max_delta_ns: %ld\n", + clockevent_hpet.max_delta_ns); + printk(KERN_DEBUG "hpet min_delta_ns: %ld\n", + clockevent_hpet.min_delta_ns); + printk(KERN_DEBUG "hpet mult: %ld\n", clockevent_hpet.mult); + + if (percpu_timer) { + on_each_cpu(setup_cpu_hpet_timer, (void *)HPET_PERCPU_EVENT, + 0, 1); + } else { + /* Reduce the rating as event device is not per cpu */ + clockevent_hpet.rating -= 20; + setup_cpu_hpet_timer((void *)HPET_GLOBAL_EVENT); + } + + kfree(hdev); + return ret; + +free_irq_exit: + /* Cleanup on failure */ + hpet_timer_stop(); + for (i = 0; i < num_timers_used; i++) { + // TBD: free_irq does not work at this point + //free_irq(hdev[i]->irq, hdev[i]); + } + +free_exit: + if (!force_hpet_address) { + /* Switch back to legacy mode */ + hpet_timer_stop_set_go(hpet_tick); + num_timers_used = 2; + } else { + num_timers_used = 0; + } + kfree(hdev); + hdev = NULL; + return -1; +} + +/* Core Init */ +static int hpet_timer_reset(unsigned int resume) +{ + unsigned int cfg; + + /* + * Stop the timers and reset the main counter. + */ + cfg = hpet_readl(HPET_CFG); + cfg &= ~HPET_CFG_ENABLE; + hpet_writel(cfg, HPET_CFG); + hpet_writel(0, HPET_COUNTER); + hpet_writel(0, HPET_COUNTER + 4); + + if (!resume) { + cfg = hpet_readl(HPET_CFG); + cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY); + hpet_writel(cfg, HPET_CFG); + + /* Disable HPET timer 0 and 1 */ + cfg = hpet_readl(HPET_T0_CFG); + hpet_writel((~HPET_TN_ENABLE) & cfg, HPET_T0_CFG); + cfg = hpet_readl(HPET_T1_CFG); + hpet_writel((~HPET_TN_ENABLE) & cfg, HPET_T1_CFG); + return 0; + } + + /* + * Set up as many timers as enabled earlier, without enabling + * them. Timer subsystem should issue restart on them as they are + * in one shot mode + */ + if (cpu_hpet_dev) { + int i; + for (i = 0; i < num_timers_used; i++) { + struct hpet_dev *hdev; + hdev = per_cpu_ptr(cpu_hpet_dev, i); + hpet_setup_cfg(hdev, 0); + } + } + + /* Enable global counting */ + cfg |= HPET_CFG_ENABLE; + hpet_writel(cfg, HPET_CFG); + + return 0; +} + +/* Used to halt the timer from time.c */ +int hpet_timer_stop(void) +{ + return hpet_timer_reset(0); +} + +/* Used to restart the timer after resume from time.c */ +int hpet_timer_reenable(void) +{ + return hpet_timer_reset(1); +} + static int __init init_hpet_generic_timer(void) { int ret; @@ -524,8 +973,11 @@ static int __init init_hpet_generic_time if (nohpet) return -ENODEV; - if (!hpet_address) + if (!hpet_address) { hpet_address = force_hpet_address; + } else { + hpet_timer_stop(); + } if (!hpet_address) return -ENODEV; @@ -541,6 +993,13 @@ static int __init init_hpet_generic_time printk(KERN_DEBUG "HPET clocksource init failed\n"); } + ret |= init_hpet_eventsource(); + if (!ret) { + printk(KERN_DEBUG "Successfully registered HPET eventsource\n"); + } else { + printk(KERN_DEBUG "HPET eventsource init failed\n"); + } + if (ret) { unsigned int cfg; cfg = hpet_readl(HPET_CFG); Index: linux-2.6.21-rc-mm/arch/x86_64/kernel/time.c =================================================================== --- linux-2.6.21-rc-mm.orig/arch/x86_64/kernel/time.c +++ linux-2.6.21-rc-mm/arch/x86_64/kernel/time.c @@ -341,7 +341,7 @@ void __init stop_timer_interrupt(void) char *name; if (hpet_address) { name = "HPET"; - hpet_timer_stop_set_go(0); + hpet_timer_stop(); } else { name = "PIT"; pit_stop_interrupt(); @@ -417,7 +417,7 @@ static int timer_suspend(struct sys_devi static int timer_resume(struct sys_device *dev) { if (hpet_address) - hpet_reenable(); + hpet_timer_reenable(); else i8254_timer_resume(); Index: linux-2.6.21-rc-mm/include/asm-x86_64/hpet.h =================================================================== --- linux-2.6.21-rc-mm.orig/include/asm-x86_64/hpet.h +++ linux-2.6.21-rc-mm/include/asm-x86_64/hpet.h @@ -57,8 +57,8 @@ extern int is_hpet_enabled(void); extern int hpet_rtc_timer_init(void); extern int apic_is_clustered_box(void); extern int hpet_arch_init(void); -extern int hpet_timer_stop_set_go(unsigned long tick); -extern int hpet_reenable(void); +extern int hpet_timer_stop(void); +extern int hpet_timer_reenable(void); extern unsigned int hpet_calibrate_tsc(void); extern int hpet_use_timer; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/