Module: xenomai-3 Branch: master Commit: 80e3a077cc38321785fa6d47976696dc424a321e URL: http://git.xenomai.org/?p=xenomai-3.git;a=commit;h=80e3a077cc38321785fa6d47976696dc424a321e
Author: Philippe Gerum <r...@xenomai.org> Date: Tue Sep 15 11:24:05 2015 +0200 cobalt/powerpc: upgrade I-pipe support --- ...-1.patch => ipipe-core-3.18.20-powerpc-2.patch} | 2247 +++----------------- 1 file changed, 281 insertions(+), 1966 deletions(-) diff --git a/kernel/cobalt/arch/powerpc/patches/ipipe-core-3.18.12-powerpc-1.patch b/kernel/cobalt/arch/powerpc/patches/ipipe-core-3.18.20-powerpc-2.patch similarity index 89% rename from kernel/cobalt/arch/powerpc/patches/ipipe-core-3.18.12-powerpc-1.patch rename to kernel/cobalt/arch/powerpc/patches/ipipe-core-3.18.20-powerpc-2.patch index cf75e7e..d29f803 100644 --- a/kernel/cobalt/arch/powerpc/patches/ipipe-core-3.18.12-powerpc-1.patch +++ b/kernel/cobalt/arch/powerpc/patches/ipipe-core-3.18.20-powerpc-2.patch @@ -304,7 +304,7 @@ index b59ac27..9cc1d25 100644 * or should we not care like we do now ? --BenH. diff --git a/arch/powerpc/include/asm/ipipe.h b/arch/powerpc/include/asm/ipipe.h new file mode 100644 -index 0000000..824fa82 +index 0000000..ac0d5fb --- /dev/null +++ b/arch/powerpc/include/asm/ipipe.h @@ -0,0 +1,157 @@ @@ -349,7 +349,7 @@ index 0000000..824fa82 +#include <linux/cache.h> +#include <linux/threads.h> + -+#define IPIPE_CORE_RELEASE 1 ++#define IPIPE_CORE_RELEASE 2 + +struct ipipe_domain; + @@ -1375,6 +1375,45 @@ index 502cf69..0e29ef7 100644 obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o +diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c +index 34f5552..8ffb7f2 100644 +--- a/arch/powerpc/kernel/align.c ++++ b/arch/powerpc/kernel/align.c +@@ -739,7 +739,7 @@ int fix_alignment(struct pt_regs *regs) + unsigned int reg, areg; + unsigned int dsisr; + unsigned char __user *addr; +- unsigned long p, swiz; ++ unsigned long p, swiz, irqflags __maybe_unused; + int ret, i; + union data { + u64 ll; +@@ -957,10 +957,10 @@ int fix_alignment(struct pt_regs *regs) + if (flags & S) { + /* Single-precision FP store requires conversion... */ + #ifdef CONFIG_PPC_FPU +- preempt_disable(); ++ irqflags = hard_preempt_disable(); + enable_kernel_fp(); + cvt_df(&data.dd, (float *)&data.x32.low32); +- preempt_enable(); ++ hard_preempt_enable(irqflags); + #else + return 0; + #endif +@@ -997,10 +997,10 @@ int fix_alignment(struct pt_regs *regs) + /* Single-precision FP load requires conversion... */ + case LD+F+S: + #ifdef CONFIG_PPC_FPU +- preempt_disable(); ++ irqflags = hard_preempt_disable(); + enable_kernel_fp(); + cvt_fd((float *)&data.x32.low32, &data.dd); +- preempt_enable(); ++ hard_preempt_enable(irqflags); + #else + return 0; + #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9d7dede..aad5e9d 100644 --- a/arch/powerpc/kernel/asm-offsets.c @@ -1919,7 +1958,7 @@ index 5e01984..6f68eff 100644 #ifdef CONFIG_PPC_DOORBELL STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception) diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S -index 9ad236e..65b8f7c 100644 +index 9ad236e..76482dc 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -195,7 +195,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) @@ -1927,12 +1966,12 @@ index 9ad236e..65b8f7c 100644 */ _GLOBAL(giveup_fpu) +#ifdef CONFIG_IPIPE -+ mfmsr r6 ++ mfmsr r10 +#ifdef CONFIG_PPC64 -+ rldicl r5,r6,48,1 /* clear MSR_EE */ ++ rldicl r5,r10,48,1 /* clear MSR_EE */ + rotldi r5,r5,16 +#else -+ rlwinm r5,r6,0,17,15 /* clear MSR_EE */ ++ rlwinm r5,r10,0,17,15 /* clear MSR_EE */ +#endif +#else mfmsr r5 @@ -1955,7 +1994,7 @@ index 9ad236e..65b8f7c 100644 #endif /* CONFIG_SMP */ +2: +#ifdef CONFIG_IPIPE /* restore interrupt state */ -+ andi. r6,r6,MSR_EE ++ andi. r10,r10,MSR_EE + beqlr + mfmsr r5 + ori r5,r5,MSR_EE @@ -2250,7 +2289,7 @@ index a620203..a67bb7c 100644 #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S -index fffd1f9..7d82b1f 100644 +index fffd1f9..66425e9 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -390,7 +390,11 @@ interrupt_base: @@ -2265,6 +2304,38 @@ index fffd1f9..7d82b1f 100644 /* Alignment Interrupt */ ALIGNMENT_EXCEPTION +@@ -1017,10 +1021,14 @@ _GLOBAL(__setup_ehv_ivors) + _GLOBAL(giveup_spe) + mfmsr r5 + oris r5,r5,MSR_SPE@h ++#ifdef CONFIG_IPIPE ++ mr r10,r5 ++ rlwinm r5,r5,0,17,15 /* clear MSR_EE */ ++#endif + mtmsr r5 /* enable use of SPE now */ + isync + cmpi 0,r3,0 +- beqlr- /* if no previous owner, done */ ++ beq- 2f /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + lwz r5,PT_REGS(r3) + cmpi 0,r5,0 +@@ -1040,6 +1048,15 @@ _GLOBAL(giveup_spe) + lis r4,last_task_used_spe@ha + stw r5,last_task_used_spe@l(r4) + #endif /* !CONFIG_SMP */ ++2: ++#ifdef CONFIG_IPIPE ++ andi. r5,r10,MSR_EE ++ beqlr- ++ mfmsr r5 ++ ori r5,r5,MSR_EE ++ mtmsr r5 ++ isync ++#endif + blr + #endif /* CONFIG_SPE */ + diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index d7216c9..888dfd1 100644 --- a/arch/powerpc/kernel/idle.c @@ -2387,10 +2458,10 @@ index 283c603..c6d39bd 100644 li r0,0 diff --git a/arch/powerpc/kernel/ipipe.c b/arch/powerpc/kernel/ipipe.c new file mode 100644 -index 0000000..46333b6 +index 0000000..292fed5 --- /dev/null +++ b/arch/powerpc/kernel/ipipe.c -@@ -0,0 +1,387 @@ +@@ -0,0 +1,372 @@ +/* -*- linux-c -*- + * linux/arch/powerpc/kernel/ipipe.c + * @@ -2646,16 +2717,6 @@ index 0000000..46333b6 +} +EXPORT_SYMBOL_GPL(ipipe_get_sysinfo); + -+void ipipe_raise_irq(unsigned int irq) -+{ -+ unsigned long flags; -+ -+ flags = hard_local_irq_save(); -+ __ipipe_handle_irq(irq, NULL); -+ hard_local_irq_restore(flags); -+} -+EXPORT_SYMBOL_GPL(ipipe_raise_irq); -+ +static int __ipipe_exit_irq(struct pt_regs *regs) +{ + int root = __ipipe_root_p; @@ -2756,11 +2817,6 @@ index 0000000..46333b6 + return __ipipe_exit_irq(regs); +} + -+void __ipipe_pin_range_globally(unsigned long start, unsigned long end) -+{ -+ /* We don't support this. */ -+} -+ +EXPORT_SYMBOL_GPL(show_stack); +EXPORT_SYMBOL_GPL(_switch); +#ifndef CONFIG_SMP @@ -5616,10 +5672,10 @@ index e5c31ea..8dce875 100644 .probe = serial8250_probe, .remove = serial8250_remove, diff --git a/fs/exec.c b/fs/exec.c -index 7302b75..e6035e6 100644 +index b7a5f46..0582bd6 100644 --- a/fs/exec.c +++ b/fs/exec.c -@@ -820,6 +820,7 @@ static int exec_mmap(struct mm_struct *mm) +@@ -823,6 +823,7 @@ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; @@ -5627,7 +5683,7 @@ index 7302b75..e6035e6 100644 /* Notify parent that we're no longer interested in the old VM */ tsk = current; -@@ -843,8 +844,10 @@ static int exec_mmap(struct mm_struct *mm) +@@ -846,8 +847,10 @@ static int exec_mmap(struct mm_struct *mm) task_lock(tsk); active_mm = tsk->active_mm; tsk->mm = mm; @@ -5929,18 +5985,6 @@ index abcafaa..a8440e4 100644 } ____cacheline_aligned; /* -diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h -index 662697b..6a9b6ad 100644 ---- a/include/linux/ftrace.h -+++ b/include/linux/ftrace.h -@@ -108,6 +108,7 @@ enum { - FTRACE_OPS_FL_ADDING = 1 << 9, - FTRACE_OPS_FL_REMOVING = 1 << 10, - FTRACE_OPS_FL_MODIFYING = 1 << 11, -+ FTRACE_OPS_FL_IPIPE_EXCLUSIVE = 1 << 12, - }; - - #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index cba442e..b513a46 100644 --- a/include/linux/hardirq.h @@ -5984,10 +6028,10 @@ index e6bb36a..898a91a 100644 diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h new file mode 100644 -index 0000000..1d70705 +index 0000000..eafb7f3 --- /dev/null +++ b/include/linux/ipipe.h -@@ -0,0 +1,455 @@ +@@ -0,0 +1,457 @@ +/* -*- linux-c -*- + * include/linux/ipipe.h + * @@ -6168,6 +6212,8 @@ index 0000000..1d70705 +void ipipe_free_irq(struct ipipe_domain *ipd, + unsigned int irq); + ++void __ipipe_raise_irq(unsigned int irq); ++ +void ipipe_raise_irq(unsigned int irq); + +void ipipe_set_hooks(struct ipipe_domain *ipd, @@ -6445,10 +6491,10 @@ index 0000000..1d70705 +#endif /* !__LINUX_IPIPE_H */ diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h new file mode 100644 -index 0000000..ea01dd6 +index 0000000..a37358c --- /dev/null +++ b/include/linux/ipipe_base.h -@@ -0,0 +1,356 @@ +@@ -0,0 +1,358 @@ +/* -*- linux-c -*- + * include/linux/ipipe_base.h + * @@ -6630,9 +6676,6 @@ index 0000000..ea01dd6 + +void __ipipe_flush_printk(unsigned int irq, void *cookie); + -+void __ipipe_pin_range_globally(unsigned long start, -+ unsigned long end); -+ +#define __ipipe_get_cpu(flags) ({ (flags) = hard_preempt_disable(); ipipe_processor_id(); }) +#define __ipipe_put_cpu(flags) hard_preempt_enable(flags) + @@ -6765,10 +6808,6 @@ index 0000000..ea01dd6 + +static inline void __ipipe_init_taskinfo(struct task_struct *p) { } + -+static inline void __ipipe_pin_range_globally(unsigned long start, -+ unsigned long end) -+{ } -+ +#define hard_preempt_disable() ({ preempt_disable(); 0; }) +#define hard_preempt_enable(flags) ({ preempt_enable(); (void)(flags); }) + @@ -6795,6 +6834,15 @@ index 0000000..ea01dd6 + +#endif /* !CONFIG_IPIPE */ + ++#ifdef CONFIG_IPIPE_WANT_PTE_PINNING ++void __ipipe_pin_mapping_globally(unsigned long start, ++ unsigned long end); ++#else ++static inline void __ipipe_pin_mapping_globally(unsigned long start, ++ unsigned long end) ++{ } ++#endif ++ +static inline void ipipe_preempt_root_only(void) +{ +#if defined(CONFIG_IPIPE_DEBUG_CONTEXT) && \ @@ -8228,10 +8276,10 @@ index 3d770f55..0b21f46 100644 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h -index a6059bd..a4f80b6 100644 +index e4d8f70..22c313e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h -@@ -232,6 +232,9 @@ struct kvm_vcpu { +@@ -233,6 +233,9 @@ struct kvm_vcpu { #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; #endif @@ -8995,10 +9043,10 @@ index 9b7d746..562489c 100644 trace_task_newtask(p, clone_flags); diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig new file mode 100644 -index 0000000..da17b04 +index 0000000..218f51da --- /dev/null +++ b/kernel/ipipe/Kconfig -@@ -0,0 +1,62 @@ +@@ -0,0 +1,65 @@ +config IPIPE + bool "Interrupt pipeline" + default y @@ -9019,6 +9067,9 @@ index 0000000..da17b04 +config IPIPE_WANT_CLOCKSOURCE + bool + ++config IPIPE_WANT_PTE_PINNING ++ bool ++ +config IPIPE_CORE_APIREV + int + depends on IPIPE @@ -9452,10 +9503,10 @@ index 0000000..797a849 +} diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c new file mode 100644 -index 0000000..b8eb85c +index 0000000..0320453 --- /dev/null +++ b/kernel/ipipe/core.c -@@ -0,0 +1,1890 @@ +@@ -0,0 +1,1917 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/core.c + * @@ -10710,7 +10761,7 @@ index 0000000..b8eb85c + * handling interrupts: + * + * a) the root domain is alone, no registered head domain -+ * => all interrupts are delivered via the fast dispatcher. ++ * => all interrupts go through the interrupt log + * b) a head domain is registered + * => head domain IRQs go through the fast dispatcher + * => root domain IRQs go through the interrupt log @@ -10818,6 +10869,33 @@ index 0000000..b8eb85c + __ipipe_sync_pipeline(ipipe_head_domain); +} + ++void ipipe_raise_irq(unsigned int irq) ++{ ++ struct ipipe_domain *ipd = ipipe_head_domain; ++ unsigned long flags, control; ++ ++ flags = hard_local_irq_save(); ++ ++ /* ++ * Fast path: raising a virtual IRQ handled by the head ++ * domain. ++ */ ++ if (likely(ipipe_virtual_irq_p(irq) && ipd != ipipe_root_domain)) { ++ control = ipd->irqs[irq].control; ++ if (likely(control & IPIPE_HANDLE_MASK)) { ++ dispatch_irq_head(irq); ++ goto out; ++ } ++ } ++ ++ /* Emulate regular device IRQ receipt. */ ++ __ipipe_dispatch_irq(irq, IPIPE_IRQF_NOACK); ++out: ++ hard_local_irq_restore(flags); ++ ++} ++EXPORT_SYMBOL_GPL(ipipe_raise_irq); ++ +#ifdef CONFIG_PREEMPT + +void preempt_schedule_irq(void); @@ -11169,7 +11247,7 @@ index 0000000..b8eb85c + */ + if (this_domain == ipipe_root_domain) { + p = raw_cpu_ptr(&ipipe_percpu.root); -+ if (test_bit(IPIPE_STALL_FLAG, &p->status)) ++ if (test_bit(IPIPE_STALL_FLAG, &p->status) || preempt_count()) + goto out; + } + /* @@ -11850,7 +11928,7 @@ index 0000000..354bf29 +#endif /* CONFIG_IPIPE_HAVE_HOSTRT */ diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c new file mode 100644 -index 0000000..5cce0bc +index 0000000..c8c1b97 --- /dev/null +++ b/kernel/ipipe/tracer.c @@ -0,0 +1,1447 @@ @@ -13175,7 +13253,7 @@ index 0000000..5cce0bc + +static struct ftrace_ops ipipe_trace_ops = { + .func = ipipe_trace_function, -+ .flags = FTRACE_OPS_FL_IPIPE_EXCLUSIVE, ++ .flags = FTRACE_OPS_FL_RECURSION_SAFE, +}; + +static ssize_t __ipipe_wr_enable(struct file *file, const char __user *buffer, @@ -13948,7 +14026,7 @@ index 4b082b5..67447fc 100644 * The __lock_function inlines are taken from * include/linux/spinlock_api_smp.h diff --git a/kernel/module.c b/kernel/module.c -index 88cec1d..54cfcbc 100644 +index c353707..79b3790 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -878,7 +878,7 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) @@ -14088,10 +14166,10 @@ index 1f35a34..4b3828b 100644 if (pm_wakeup_pending()) { error = -EAGAIN; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c -index bf95fda..6f3e466 100644 +index 3b9f01b..343b6c2 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1807,6 +1807,43 @@ asmlinkage int printk_emit(int facility, int level, +@@ -1804,6 +1804,43 @@ asmlinkage int printk_emit(int facility, int level, } EXPORT_SYMBOL(printk_emit); @@ -14135,7 +14213,7 @@ index bf95fda..6f3e466 100644 /** * printk - print a kernel message * @fmt: format string -@@ -1830,6 +1867,59 @@ EXPORT_SYMBOL(printk_emit); +@@ -1827,6 +1864,59 @@ EXPORT_SYMBOL(printk_emit); */ asmlinkage __visible int printk(const char *fmt, ...) { @@ -14195,7 +14273,7 @@ index bf95fda..6f3e466 100644 va_list args; int r; -@@ -1847,6 +1937,8 @@ asmlinkage __visible int printk(const char *fmt, ...) +@@ -1844,6 +1934,8 @@ asmlinkage __visible int printk(const char *fmt, ...) return r; } @@ -14205,7 +14283,7 @@ index bf95fda..6f3e466 100644 #else /* CONFIG_PRINTK */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 9f5ed5e..5396d4b 100644 +index 6810e57..fdc4ae3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1598,7 +1598,9 @@ void scheduler_ipi(void) @@ -14365,15 +14443,15 @@ index 9f5ed5e..5396d4b 100644 __preempt_count_sub(PREEMPT_ACTIVE); /* -@@ -3624,6 +3650,7 @@ change: +@@ -3628,6 +3654,7 @@ change: prev_class = p->sched_class; - __setscheduler(rq, p, attr); -+ __ipipe_report_setsched(p); + __setscheduler(rq, p, attr, true); ++ __ipipe_report_setsched(p); if (running) p->sched_class->set_curr_task(rq); -@@ -4743,10 +4770,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +@@ -4747,10 +4774,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) do_set_cpus_allowed(p, new_mask); /* Can the task run on the task's current CPU? If so, we're done */ @@ -14388,7 +14466,7 @@ index 9f5ed5e..5396d4b 100644 if (task_running(rq, p) || p->state == TASK_WAKING) { struct migration_arg arg = { p, dest_cpu }; /* Need help from migration thread: drop lock and wait. */ -@@ -8265,3 +8295,42 @@ void dump_cpu_task(int cpu) +@@ -8269,3 +8299,42 @@ void dump_cpu_task(int cpu) pr_info("Task dump for CPU %d:\n", cpu); sched_show_task(cpu_curr(cpu)); } @@ -14715,1908 +14793,145 @@ index 3260ffd..76c9b92 100644 /* * This function runs timers and the timer-tq in bottom half context. */ -diff --git a/kernel/timer.c b/kernel/timer.c -new file mode 100644 -index 0000000..e3f1a90 ---- /dev/null -+++ b/kernel/timer.c -@@ -0,0 +1,1753 @@ -+/* -+ * linux/kernel/timer.c -+ * -+ * Kernel internal timers -+ * -+ * Copyright (C) 1991, 1992 Linus Torvalds -+ * -+ * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. -+ * -+ * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 -+ * "A Kernel Model for Precision Timekeeping" by Dave Mills -+ * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to -+ * serialize accesses to xtime/lost_ticks). -+ * Copyright (C) 1998 Andrea Arcangeli -+ * 1999-03-10 Improved NTP compatibility by Ulrich Windl -+ * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love -+ * 2000-10-05 Implemented scalable SMP per-CPU timer handling. -+ * Copyright (C) 2000, 2001, 2002 Ingo Molnar -+ * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar -+ */ -+ -+#include <linux/kernel_stat.h> -+#include <linux/export.h> -+#include <linux/interrupt.h> -+#include <linux/percpu.h> -+#include <linux/init.h> -+#include <linux/mm.h> -+#include <linux/swap.h> -+#include <linux/pid_namespace.h> -+#include <linux/notifier.h> -+#include <linux/thread_info.h> -+#include <linux/time.h> -+#include <linux/jiffies.h> -+#include <linux/posix-timers.h> -+#include <linux/cpu.h> -+#include <linux/syscalls.h> -+#include <linux/delay.h> -+#include <linux/tick.h> -+#include <linux/kallsyms.h> -+#include <linux/irq_work.h> -+#include <linux/sched.h> -+#include <linux/sched/sysctl.h> -+#include <linux/slab.h> -+#include <linux/compat.h> -+ -+#include <asm/uaccess.h> -+#include <asm/unistd.h> -+#include <asm/div64.h> -+#include <asm/timex.h> -+#include <asm/io.h> -+ -+#define CREATE_TRACE_POINTS -+#include <trace/events/timer.h> -+ -+__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; -+ -+EXPORT_SYMBOL(jiffies_64); -+ -+/* -+ * per-CPU timer vector definitions: -+ */ -+#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6) -+#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8) -+#define TVN_SIZE (1 << TVN_BITS) -+#define TVR_SIZE (1 << TVR_BITS) -+#define TVN_MASK (TVN_SIZE - 1) -+#define TVR_MASK (TVR_SIZE - 1) -+#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1)) -+ -+struct tvec { -+ struct list_head vec[TVN_SIZE]; -+}; -+ -+struct tvec_root { -+ struct list_head vec[TVR_SIZE]; -+}; -+ -+struct tvec_base { -+ spinlock_t lock; -+ struct timer_list *running_timer; -+ unsigned long timer_jiffies; -+ unsigned long next_timer; -+ unsigned long active_timers; -+ unsigned long all_timers; -+ struct tvec_root tv1; -+ struct tvec tv2; -+ struct tvec tv3; -+ struct tvec tv4; -+ struct tvec tv5; -+} ____cacheline_aligned; -+ -+struct tvec_base boot_tvec_bases; -+EXPORT_SYMBOL(boot_tvec_bases); -+static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; -+ -+/* Functions below help us manage 'deferrable' flag */ -+static inline unsigned int tbase_get_deferrable(struct tvec_base *base) -+{ -+ return ((unsigned int)(unsigned long)base & TIMER_DEFERRABLE); -+} -+ -+static inline unsigned int tbase_get_irqsafe(struct tvec_base *base) -+{ -+ return ((unsigned int)(unsigned long)base & TIMER_IRQSAFE); -+} -+ -+static inline struct tvec_base *tbase_get_base(struct tvec_base *base) -+{ -+ return ((struct tvec_base *)((unsigned long)base & ~TIMER_FLAG_MASK)); -+} -+ -+static inline void -+timer_set_base(struct timer_list *timer, struct tvec_base *new_base) -+{ -+ unsigned long flags = (unsigned long)timer->base & TIMER_FLAG_MASK; -+ -+ timer->base = (struct tvec_base *)((unsigned long)(new_base) | flags); -+} -+ -+static unsigned long round_jiffies_common(unsigned long j, int cpu, -+ bool force_up) -+{ -+ int rem; -+ unsigned long original = j; -+ -+ /* -+ * We don't want all cpus firing their timers at once hitting the -+ * same lock or cachelines, so we skew each extra cpu with an extra -+ * 3 jiffies. This 3 jiffies came originally from the mm/ code which -+ * already did this. -+ * The skew is done by adding 3*cpunr, then round, then subtract this -+ * extra offset again. -+ */ -+ j += cpu * 3; -+ -+ rem = j % HZ; -+ -+ /* -+ * If the target jiffie is just after a whole second (which can happen -+ * due to delays of the timer irq, long irq off times etc etc) then -+ * we should round down to the whole second, not up. Use 1/4th second -+ * as cutoff for this rounding as an extreme upper bound for this. -+ * But never round down if @force_up is set. -+ */ -+ if (rem < HZ/4 && !force_up) /* round down */ -+ j = j - rem; -+ else /* round up */ -+ j = j - rem + HZ; -+ -+ /* now that we have rounded, subtract the extra skew again */ -+ j -= cpu * 3; -+ -+ /* -+ * Make sure j is still in the future. Otherwise return the -+ * unmodified value. -+ */ -+ return time_is_after_jiffies(j) ? j : original; -+} -+ -+/** -+ * __round_jiffies - function to round jiffies to a full second -+ * @j: the time in (absolute) jiffies that should be rounded -+ * @cpu: the processor number on which the timeout will happen -+ * -+ * __round_jiffies() rounds an absolute time in the future (in jiffies) -+ * up or down to (approximately) full seconds. This is useful for timers -+ * for which the exact time they fire does not matter too much, as long as -+ * they fire approximately every X seconds. -+ * -+ * By rounding these timers to whole seconds, all such timers will fire -+ * at the same time, rather than at various times spread out. The goal -+ * of this is to have the CPU wake up less, which saves power. -+ * -+ * The exact rounding is skewed for each processor to avoid all -+ * processors firing at the exact same time, which could lead -+ * to lock contention or spurious cache line bouncing. -+ * -+ * The return value is the rounded version of the @j parameter. -+ */ -+unsigned long __round_jiffies(unsigned long j, int cpu) -+{ -+ return round_jiffies_common(j, cpu, false); -+} -+EXPORT_SYMBOL_GPL(__round_jiffies); +diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig +index a5da09c..6650799 100644 +--- a/kernel/trace/Kconfig ++++ b/kernel/trace/Kconfig +@@ -439,6 +439,7 @@ config DYNAMIC_FTRACE + bool "enable/disable function tracing dynamically" + depends on FUNCTION_TRACER + depends on HAVE_DYNAMIC_FTRACE ++ depends on !IPIPE + default y + help + This option will modify all the calls to function tracing +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index d1eff3d..2a324bc 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -32,6 +32,7 @@ + #include <linux/list.h> + #include <linux/hash.h> + #include <linux/rcupdate.h> ++#include <linux/ipipe.h> + + #include <trace/events/sched.h> + +@@ -2298,6 +2299,9 @@ void __weak arch_ftrace_update_code(int command) + + static void ftrace_run_update_code(int command) + { ++#ifdef CONFIG_IPIPE ++ unsigned long flags; ++#endif /* CONFIG_IPIPE */ + int ret; + + ret = ftrace_arch_code_modify_prepare(); +@@ -2311,7 +2315,13 @@ static void ftrace_run_update_code(int command) + * is safe. The stop_machine() is the safest, but also + * produces the most overhead. + */ ++#ifdef CONFIG_IPIPE ++ flags = ipipe_critical_enter(NULL); ++ __ftrace_modify_code(&command); ++ ipipe_critical_exit(flags); ++#else /* !CONFIG_IPIPE */ + arch_ftrace_update_code(command); ++#endif /* !CONFIG_IPIPE */ + + ret = ftrace_arch_code_modify_post_process(); + FTRACE_WARN_ON(ret); +@@ -4621,10 +4631,10 @@ static int ftrace_process_locs(struct module *mod, + * reason to cause large interrupt latencies while we do it. + */ + if (!mod) +- local_irq_save(flags); ++ flags = hard_local_irq_save(); + ftrace_update_code(mod, start_pg); + if (!mod) +- local_irq_restore(flags); ++ hard_local_irq_restore(flags); + ret = 0; + out: + mutex_unlock(&ftrace_lock); +@@ -4723,9 +4733,11 @@ void __init ftrace_init(void) + unsigned long count, flags; + int ret; + +- local_irq_save(flags); ++ flags = hard_local_irq_save_notrace(); + ret = ftrace_dyn_arch_init(); +- local_irq_restore(flags); ++ hard_local_irq_restore_notrace(flags); + -+/** -+ * __round_jiffies_relative - function to round jiffies to a full second -+ * @j: the time in (relative) jiffies that should be rounded -+ * @cpu: the processor number on which the timeout will happen -+ * -+ * __round_jiffies_relative() rounds a time delta in the future (in jiffies) -+ * up or down to (approximately) full seconds. This is useful for timers -+ * for which the exact time they fire does not matter too much, as long as -+ * they fire approximately every X seconds. -+ * -+ * By rounding these timers to whole seconds, all such timers will fire -+ * at the same time, rather than at various times spread out. The goal -+ * of this is to have the CPU wake up less, which saves power. -+ * -+ * The exact rounding is skewed for each processor to avoid all -+ * processors firing at the exact same time, which could lead -+ * to lock contention or spurious cache line bouncing. -+ * -+ * The return value is the rounded version of the @j parameter. -+ */ -+unsigned long __round_jiffies_relative(unsigned long j, int cpu) -+{ -+ unsigned long j0 = jiffies; ++ /* ftrace_dyn_arch_init places the return code in addr */ + if (ret) + goto failed; + +@@ -4891,7 +4903,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + } + } while_for_each_ftrace_op(op); + out: +- preempt_enable_notrace(); ++#ifdef CONFIG_IPIPE ++ if (hard_irqs_disabled() || !__ipipe_root_p) ++ /* ++ * Nothing urgent to schedule here. At latest the timer tick ++ * will pick up whatever the tracing functions kicked off. ++ */ ++ preempt_enable_no_resched_notrace(); ++ else ++#endif ++ preempt_enable_notrace(); + trace_clear_recursion(bit); + } + +diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c +index 0fc5cfe..33055b3 100644 +--- a/kernel/trace/ring_buffer.c ++++ b/kernel/trace/ring_buffer.c +@@ -2684,7 +2684,8 @@ static DEFINE_PER_CPU(unsigned int, current_context); + + static __always_inline int trace_recursive_lock(void) + { +- unsigned int val = __this_cpu_read(current_context); ++ unsigned long flags; ++ unsigned int val; + int bit; + + if (in_interrupt()) { +@@ -2697,21 +2698,34 @@ static __always_inline int trace_recursive_lock(void) + } else + bit = 3; + +- if (unlikely(val & (1 << bit))) ++ flags = hard_local_irq_save(); + -+ /* Use j0 because jiffies might change while we run */ -+ return round_jiffies_common(j + j0, cpu, false) - j0; -+} -+EXPORT_SYMBOL_GPL(__round_jiffies_relative); ++ val = __this_cpu_read(current_context); ++ if (unlikely(val & (1 << bit))) { ++ hard_local_irq_restore(flags); + return 1; ++ } + + val |= (1 << bit); + __this_cpu_write(current_context, val); + ++ hard_local_irq_restore(flags); + -+/** -+ * round_jiffies - function to round jiffies to a full second -+ * @j: the time in (absolute) jiffies that should be rounded -+ * -+ * round_jiffies() rounds an absolute time in the future (in jiffies) -+ * up or down to (approximately) full seconds. This is useful for timers -+ * for which the exact time they fire does not matter too much, as long as -+ * they fire approximately every X seconds. -+ * -+ * By rounding these timers to whole seconds, all such timers will fire -+ * at the same time, rather than at various times spread out. The goal -+ * of this is to have the CPU wake up less, which saves power. -+ * -+ * The return value is the rounded version of the @j parameter. -+ */ -+unsigned long round_jiffies(unsigned long j) -+{ -+ return round_jiffies_common(j, raw_smp_processor_id(), false); -+} -+EXPORT_SYMBOL_GPL(round_jiffies); -+ -+/** -+ * round_jiffies_relative - function to round jiffies to a full second -+ * @j: the time in (relative) jiffies that should be rounded -+ * -+ * round_jiffies_relative() rounds a time delta in the future (in jiffies) -+ * up or down to (approximately) full seconds. This is useful for timers -+ * for which the exact time they fire does not matter too much, as long as -+ * they fire approximately every X seconds. -+ * -+ * By rounding these timers to whole seconds, all such timers will fire -+ * at the same time, rather than at various times spread out. The goal -+ * of this is to have the CPU wake up less, which saves power. -+ * -+ * The return value is the rounded version of the @j parameter. -+ */ -+unsigned long round_jiffies_relative(unsigned long j) -+{ -+ return __round_jiffies_relative(j, raw_smp_processor_id()); -+} -+EXPORT_SYMBOL_GPL(round_jiffies_relative); -+ -+/** -+ * __round_jiffies_up - function to round jiffies up to a full second -+ * @j: the time in (absolute) jiffies that should be rounded -+ * @cpu: the processor number on which the timeout will happen -+ * -+ * This is the same as __round_jiffies() except that it will never -+ * round down. This is useful for timeouts for which the exact time -+ * of firing does not matter too much, as long as they don't fire too -+ * early. -+ */ -+unsigned long __round_jiffies_up(unsigned long j, int cpu) -+{ -+ return round_jiffies_common(j, cpu, true); -+} -+EXPORT_SYMBOL_GPL(__round_jiffies_up); -+ -+/** -+ * __round_jiffies_up_relative - function to round jiffies up to a full second -+ * @j: the time in (relative) jiffies that should be rounded -+ * @cpu: the processor number on which the timeout will happen -+ * -+ * This is the same as __round_jiffies_relative() except that it will never -+ * round down. This is useful for timeouts for which the exact time -+ * of firing does not matter too much, as long as they don't fire too -+ * early. -+ */ -+unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) -+{ -+ unsigned long j0 = jiffies; -+ -+ /* Use j0 because jiffies might change while we run */ -+ return round_jiffies_common(j + j0, cpu, true) - j0; -+} -+EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); -+ -+/** -+ * round_jiffies_up - function to round jiffies up to a full second -+ * @j: the time in (absolute) jiffies that should be rounded -+ * -+ * This is the same as round_jiffies() except that it will never -+ * round down. This is useful for timeouts for which the exact time -+ * of firing does not matter too much, as long as they don't fire too -+ * early. -+ */ -+unsigned long round_jiffies_up(unsigned long j) -+{ -+ return round_jiffies_common(j, raw_smp_processor_id(), true); -+} -+EXPORT_SYMBOL_GPL(round_jiffies_up); -+ -+/** -+ * round_jiffies_up_relative - function to round jiffies up to a full second -+ * @j: the time in (relative) jiffies that should be rounded -+ * -+ * This is the same as round_jiffies_relative() except that it will never -+ * round down. This is useful for timeouts for which the exact time -+ * of firing does not matter too much, as long as they don't fire too -+ * early. -+ */ -+unsigned long round_jiffies_up_relative(unsigned long j) -+{ -+ return __round_jiffies_up_relative(j, raw_smp_processor_id()); -+} -+EXPORT_SYMBOL_GPL(round_jiffies_up_relative); -+ -+/** -+ * set_timer_slack - set the allowed slack for a timer -+ * @timer: the timer to be modified -+ * @slack_hz: the amount of time (in jiffies) allowed for rounding -+ * -+ * Set the amount of time, in jiffies, that a certain timer has -+ * in terms of slack. By setting this value, the timer subsystem -+ * will schedule the actual timer somewhere between -+ * the time mod_timer() asks for, and that time plus the slack. -+ * -+ * By setting the slack to -1, a percentage of the delay is used -+ * instead. -+ */ -+void set_timer_slack(struct timer_list *timer, int slack_hz) -+{ -+ timer->slack = slack_hz; -+} -+EXPORT_SYMBOL_GPL(set_timer_slack); -+ -+/* -+ * If the list is empty, catch up ->timer_jiffies to the current time. -+ * The caller must hold the tvec_base lock. Returns true if the list -+ * was empty and therefore ->timer_jiffies was updated. -+ */ -+static bool catchup_timer_jiffies(struct tvec_base *base) -+{ -+ if (!base->all_timers) { -+ base->timer_jiffies = jiffies; -+ return true; -+ } -+ return false; -+} -+ -+static void -+__internal_add_timer(struct tvec_base *base, struct timer_list *timer) -+{ -+ unsigned long expires = timer->expires; -+ unsigned long idx = expires - base->timer_jiffies; -+ struct list_head *vec; -+ -+ if (idx < TVR_SIZE) { -+ int i = expires & TVR_MASK; -+ vec = base->tv1.vec + i; -+ } else if (idx < 1 << (TVR_BITS + TVN_BITS)) { -+ int i = (expires >> TVR_BITS) & TVN_MASK; -+ vec = base->tv2.vec + i; -+ } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) { -+ int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK; -+ vec = base->tv3.vec + i; -+ } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) { -+ int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK; -+ vec = base->tv4.vec + i; -+ } else if ((signed long) idx < 0) { -+ /* -+ * Can happen if you add a timer with expires == jiffies, -+ * or you set a timer to go off in the past -+ */ -+ vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK); -+ } else { -+ int i; -+ /* If the timeout is larger than MAX_TVAL (on 64-bit -+ * architectures or with CONFIG_BASE_SMALL=1) then we -+ * use the maximum timeout. -+ */ -+ if (idx > MAX_TVAL) { -+ idx = MAX_TVAL; -+ expires = idx + base->timer_jiffies; -+ } -+ i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK; -+ vec = base->tv5.vec + i; -+ } -+ /* -+ * Timers are FIFO: -+ */ -+ list_add_tail(&timer->entry, vec); -+} -+ -+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) -+{ -+ (void)catchup_timer_jiffies(base); -+ __internal_add_timer(base, timer); -+ /* -+ * Update base->active_timers and base->next_timer -+ */ -+ if (!tbase_get_deferrable(timer->base)) { -+ if (!base->active_timers++ || -+ time_before(timer->expires, base->next_timer)) -+ base->next_timer = timer->expires; -+ } -+ base->all_timers++; -+} -+ -+#ifdef CONFIG_TIMER_STATS -+void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) -+{ -+ if (timer->start_site) -+ return; -+ -+ timer->start_site = addr; -+ memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); -+ timer->start_pid = current->pid; -+} -+ -+static void timer_stats_account_timer(struct timer_list *timer) -+{ -+ unsigned int flag = 0; -+ -+ if (likely(!timer->start_site)) -+ return; -+ if (unlikely(tbase_get_deferrable(timer->base))) -+ flag |= TIMER_STATS_FLAG_DEFERRABLE; -+ -+ timer_stats_update_stats(timer, timer->start_pid, timer->start_site, -+ timer->function, timer->start_comm, flag); -+} -+ -+#else -+static void timer_stats_account_timer(struct timer_list *timer) {} -+#endif -+ -+#ifdef CONFIG_DEBUG_OBJECTS_TIMERS -+ -+static struct debug_obj_descr timer_debug_descr; -+ -+static void *timer_debug_hint(void *addr) -+{ -+ return ((struct timer_list *) addr)->function; -+} -+ -+/* -+ * fixup_init is called when: -+ * - an active object is initialized -+ */ -+static int timer_fixup_init(void *addr, enum debug_obj_state state) -+{ -+ struct timer_list *timer = addr; -+ -+ switch (state) { -+ case ODEBUG_STATE_ACTIVE: -+ del_timer_sync(timer); -+ debug_object_init(timer, &timer_debug_descr); -+ return 1; -+ default: -+ return 0; -+ } -+} -+ -+/* Stub timer callback for improperly used timers. */ -+static void stub_timer(unsigned long data) -+{ -+ WARN_ON(1); -+} -+ -+/* -+ * fixup_activate is called when: -+ * - an active object is activated -+ * - an unknown object is activated (might be a statically initialized object) -+ */ -+static int timer_fixup_activate(void *addr, enum debug_obj_state state) -+{ -+ struct timer_list *timer = addr; -+ -+ switch (state) { -+ -+ case ODEBUG_STATE_NOTAVAILABLE: -+ /* -+ * This is not really a fixup. The timer was -+ * statically initialized. We just make sure that it -+ * is tracked in the object tracker. -+ */ -+ if (timer->entry.next == NULL && -+ timer->entry.prev == TIMER_ENTRY_STATIC) { -+ debug_object_init(timer, &timer_debug_descr); -+ debug_object_activate(timer, &timer_debug_descr); -+ return 0; -+ } else { -+ setup_timer(timer, stub_timer, 0); -+ return 1; -+ } -+ return 0; -+ -+ case ODEBUG_STATE_ACTIVE: -+ WARN_ON(1); -+ -+ default: -+ return 0; -+ } -+} -+ -+/* -+ * fixup_free is called when: -+ * - an active object is freed -+ */ -+static int timer_fixup_free(void *addr, enum debug_obj_state state) -+{ -+ struct timer_list *timer = addr; -+ -+ switch (state) { -+ case ODEBUG_STATE_ACTIVE: -+ del_timer_sync(timer); -+ debug_object_free(timer, &timer_debug_descr); -+ return 1; -+ default: -+ return 0; -+ } -+} -+ -+/* -+ * fixup_assert_init is called when: -+ * - an untracked/uninit-ed object is found -+ */ -+static int timer_fixup_assert_init(void *addr, enum debug_obj_state state) -+{ -+ struct timer_list *timer = addr; -+ -+ switch (state) { -+ case ODEBUG_STATE_NOTAVAILABLE: -+ if (timer->entry.prev == TIMER_ENTRY_STATIC) { -+ /* -+ * This is not really a fixup. The timer was -+ * statically initialized. We just make sure that it -+ * is tracked in the object tracker. -+ */ -+ debug_object_init(timer, &timer_debug_descr); -+ return 0; -+ } else { -+ setup_timer(timer, stub_timer, 0); -+ return 1; -+ } -+ default: -+ return 0; -+ } -+} -+ -+static struct debug_obj_descr timer_debug_descr = { -+ .name = "timer_list", -+ .debug_hint = timer_debug_hint, -+ .fixup_init = timer_fixup_init, -+ .fixup_activate = timer_fixup_activate, -+ .fixup_free = timer_fixup_free, -+ .fixup_assert_init = timer_fixup_assert_init, -+}; -+ -+static inline void debug_timer_init(struct timer_list *timer) -+{ -+ debug_object_init(timer, &timer_debug_descr); -+} -+ -+static inline void debug_timer_activate(struct timer_list *timer) -+{ -+ debug_object_activate(timer, &timer_debug_descr); -+} -+ -+static inline void debug_timer_deactivate(struct timer_list *timer) -+{ -+ debug_object_deactivate(timer, &timer_debug_descr); -+} -+ -+static inline void debug_timer_free(struct timer_list *timer) -+{ -+ debug_object_free(timer, &timer_debug_descr); -+} -+ -+static inline void debug_timer_assert_init(struct timer_list *timer) -+{ -+ debug_object_assert_init(timer, &timer_debug_descr); -+} -+ -+static void do_init_timer(struct timer_list *timer, unsigned int flags, -+ const char *name, struct lock_class_key *key); -+ -+void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags, -+ const char *name, struct lock_class_key *key) -+{ -+ debug_object_init_on_stack(timer, &timer_debug_descr); -+ do_init_timer(timer, flags, name, key); -+} -+EXPORT_SYMBOL_GPL(init_timer_on_stack_key); -+ -+void destroy_timer_on_stack(struct timer_list *timer) -+{ -+ debug_object_free(timer, &timer_debug_descr); -+} -+EXPORT_SYMBOL_GPL(destroy_timer_on_stack); -+ -+#else -+static inline void debug_timer_init(struct timer_list *timer) { } -+static inline void debug_timer_activate(struct timer_list *timer) { } -+static inline void debug_timer_deactivate(struct timer_list *timer) { } -+static inline void debug_timer_assert_init(struct timer_list *timer) { } -+#endif -+ -+static inline void debug_init(struct timer_list *timer) -+{ -+ debug_timer_init(timer); -+ trace_timer_init(timer); -+} -+ -+static inline void -+debug_activate(struct timer_list *timer, unsigned long expires) -+{ -+ debug_timer_activate(timer); -+ trace_timer_start(timer, expires); -+} -+ -+static inline void debug_deactivate(struct timer_list *timer) -+{ -+ debug_timer_deactivate(timer); -+ trace_timer_cancel(timer); -+} -+ -+static inline void debug_assert_init(struct timer_list *timer) -+{ -+ debug_timer_assert_init(timer); -+} -+ -+static void do_init_timer(struct timer_list *timer, unsigned int flags, -+ const char *name, struct lock_class_key *key) -+{ -+ struct tvec_base *base = __raw_get_cpu_var(tvec_bases); -+ -+ timer->entry.next = NULL; -+ timer->base = (void *)((unsigned long)base | flags); -+ timer->slack = -1; -+#ifdef CONFIG_TIMER_STATS -+ timer->start_site = NULL; -+ timer->start_pid = -1; -+ memset(timer->start_comm, 0, TASK_COMM_LEN); -+#endif -+ lockdep_init_map(&timer->lockdep_map, name, key, 0); -+} -+ -+/** -+ * init_timer_key - initialize a timer -+ * @timer: the timer to be initialized -+ * @flags: timer flags -+ * @name: name of the timer -+ * @key: lockdep class key of the fake lock used for tracking timer -+ * sync lock dependencies -+ * -+ * init_timer_key() must be done to a timer prior calling *any* of the -+ * other timer functions. -+ */ -+void init_timer_key(struct timer_list *timer, unsigned int flags, -+ const char *name, struct lock_class_key *key) -+{ -+ debug_init(timer); -+ do_init_timer(timer, flags, name, key); -+} -+EXPORT_SYMBOL(init_timer_key); -+ -+static inline void detach_timer(struct timer_list *timer, bool clear_pending) -+{ -+ struct list_head *entry = &timer->entry; -+ -+ debug_deactivate(timer); -+ -+ __list_del(entry->prev, entry->next); -+ if (clear_pending) -+ entry->next = NULL; -+ entry->prev = LIST_POISON2; -+} -+ -+static inline void -+detach_expired_timer(struct timer_list *timer, struct tvec_base *base) -+{ -+ detach_timer(timer, true); -+ if (!tbase_get_deferrable(timer->base)) -+ base->active_timers--; -+ base->all_timers--; -+ (void)catchup_timer_jiffies(base); -+} -+ -+static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, -+ bool clear_pending) -+{ -+ if (!timer_pending(timer)) -+ return 0; -+ -+ detach_timer(timer, clear_pending); -+ if (!tbase_get_deferrable(timer->base)) { -+ base->active_timers--; -+ if (timer->expires == base->next_timer) -+ base->next_timer = base->timer_jiffies; -+ } -+ base->all_timers--; -+ (void)catchup_timer_jiffies(base); -+ return 1; -+} -+ -+/* -+ * We are using hashed locking: holding per_cpu(tvec_bases).lock -+ * means that all timers which are tied to this base via timer->base are -+ * locked, and the base itself is locked too. -+ * -+ * So __run_timers/migrate_timers can safely modify all timers which could -+ * be found on ->tvX lists. -+ * -+ * When the timer's base is locked, and the timer removed from list, it is -+ * possible to set timer->base = NULL and drop the lock: the timer remains -+ * locked. -+ */ -+static struct tvec_base *lock_timer_base(struct timer_list *timer, -+ unsigned long *flags) -+ __acquires(timer->base->lock) -+{ -+ struct tvec_base *base; -+ -+ for (;;) { -+ struct tvec_base *prelock_base = timer->base; -+ base = tbase_get_base(prelock_base); -+ if (likely(base != NULL)) { -+ spin_lock_irqsave(&base->lock, *flags); -+ if (likely(prelock_base == timer->base)) -+ return base; -+ /* The timer has migrated to another CPU */ -+ spin_unlock_irqrestore(&base->lock, *flags); -+ } -+ cpu_relax(); -+ } -+} -+ -+static inline int -+__mod_timer(struct timer_list *timer, unsigned long expires, -+ bool pending_only, int pinned) -+{ -+ struct tvec_base *base, *new_base; -+ unsigned long flags; -+ int ret = 0 , cpu; -+ -+ timer_stats_timer_set_start_info(timer); -+ BUG_ON(!timer->function); -+ -+ base = lock_timer_base(timer, &flags); -+ -+ ret = detach_if_pending(timer, base, false); -+ if (!ret && pending_only) -+ goto out_unlock; -+ -+ debug_activate(timer, expires); -+ -+ cpu = get_nohz_timer_target(pinned); -+ new_base = per_cpu(tvec_bases, cpu); -+ -+ if (base != new_base) { -+ /* -+ * We are trying to schedule the timer on the local CPU. -+ * However we can't change timer's base while it is running, -+ * otherwise del_timer_sync() can't detect that the timer's -+ * handler yet has not finished. This also guarantees that -+ * the timer is serialized wrt itself. -+ */ -+ if (likely(base->running_timer != timer)) { -+ /* See the comment in lock_timer_base() */ -+ timer_set_base(timer, NULL); -+ spin_unlock(&base->lock); -+ base = new_base; -+ spin_lock(&base->lock); -+ timer_set_base(timer, base); -+ } -+ } -+ -+ timer->expires = expires; -+ internal_add_timer(base, timer); -+ -+out_unlock: -+ spin_unlock_irqrestore(&base->lock, flags); -+ -+ return ret; -+} -+ -+/** -+ * mod_timer_pending - modify a pending timer's timeout -+ * @timer: the pending timer to be modified -+ * @expires: new timeout in jiffies -+ * -+ * mod_timer_pending() is the same for pending timers as mod_timer(), -+ * but will not re-activate and modify already deleted timers. -+ * -+ * It is useful for unserialized use of timers. -+ */ -+int mod_timer_pending(struct timer_list *timer, unsigned long expires) -+{ -+ return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); -+} -+EXPORT_SYMBOL(mod_timer_pending); -+ -+/* -+ * Decide where to put the timer while taking the slack into account -+ * -+ * Algorithm: -+ * 1) calculate the maximum (absolute) time -+ * 2) calculate the highest bit where the expires and new max are different -+ * 3) use this bit to make a mask -+ * 4) use the bitmask to round down the maximum time, so that all last -+ * bits are zeros -+ */ -+static inline -+unsigned long apply_slack(struct timer_list *timer, unsigned long expires) -+{ -+ unsigned long expires_limit, mask; -+ int bit; -+ -+ if (timer->slack >= 0) { -+ expires_limit = expires + timer->slack; -+ } else { -+ long delta = expires - jiffies; -+ -+ if (delta < 256) -+ return expires; -+ -+ expires_limit = expires + delta / 256; -+ } -+ mask = expires ^ expires_limit; -+ if (mask == 0) -+ return expires; -+ -+ bit = find_last_bit(&mask, BITS_PER_LONG); -+ -+ mask = (1UL << bit) - 1; -+ -+ expires_limit = expires_limit & ~(mask); -+ -+ return expires_limit; -+} -+ -+/** -+ * mod_timer - modify a timer's timeout -+ * @timer: the timer to be modified -+ * @expires: new timeout in jiffies -+ * -+ * mod_timer() is a more efficient way to update the expire field of an -+ * active timer (if the timer is inactive it will be activated) -+ * -+ * mod_timer(timer, expires) is equivalent to: -+ * -+ * del_timer(timer); timer->expires = expires; add_timer(timer); -+ * -+ * Note that if there are multiple unserialized concurrent users of the -+ * same timer, then mod_timer() is the only safe way to modify the timeout, -+ * since add_timer() cannot modify an already running timer. -+ * -+ * The function returns whether it has modified a pending timer or not. -+ * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an -+ * active timer returns 1.) -+ */ -+int mod_timer(struct timer_list *timer, unsigned long expires) -+{ -+ expires = apply_slack(timer, expires); -+ -+ /* -+ * This is a common optimization triggered by the -+ * networking code - if the timer is re-modified -+ * to be the same thing then just return: -+ */ -+ if (timer_pending(timer) && timer->expires == expires) -+ return 1; -+ -+ return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); -+} -+EXPORT_SYMBOL(mod_timer); -+ -+/** -+ * mod_timer_pinned - modify a timer's timeout -+ * @timer: the timer to be modified -+ * @expires: new timeout in jiffies -+ * -+ * mod_timer_pinned() is a way to update the expire field of an -+ * active timer (if the timer is inactive it will be activated) -+ * and to ensure that the timer is scheduled on the current CPU. -+ * -+ * Note that this does not prevent the timer from being migrated -+ * when the current CPU goes offline. If this is a problem for -+ * you, use CPU-hotplug notifiers to handle it correctly, for -+ * example, cancelling the timer when the corresponding CPU goes -+ * offline. -+ * -+ * mod_timer_pinned(timer, expires) is equivalent to: -+ * -+ * del_timer(timer); timer->expires = expires; add_timer(timer); -+ */ -+int mod_timer_pinned(struct timer_list *timer, unsigned long expires) -+{ -+ if (timer->expires == expires && timer_pending(timer)) -+ return 1; -+ -+ return __mod_timer(timer, expires, false, TIMER_PINNED); -+} -+EXPORT_SYMBOL(mod_timer_pinned); -+ -+/** -+ * add_timer - start a timer -+ * @timer: the timer to be added -+ * -+ * The kernel will do a ->function(->data) callback from the -+ * timer interrupt at the ->expires point in the future. The -+ * current time is 'jiffies'. -+ * -+ * The timer's ->expires, ->function (and if the handler uses it, ->data) -+ * fields must be set prior calling this function. -+ * -+ * Timers with an ->expires field in the past will be executed in the next -+ * timer tick. -+ */ -+void add_timer(struct timer_list *timer) -+{ -+ BUG_ON(timer_pending(timer)); -+ mod_timer(timer, timer->expires); -+} -+EXPORT_SYMBOL(add_timer); -+ -+/** -+ * add_timer_on - start a timer on a particular CPU -+ * @timer: the timer to be added -+ * @cpu: the CPU to start it on -+ * -+ * This is not very scalable on SMP. Double adds are not possible. -+ */ -+void add_timer_on(struct timer_list *timer, int cpu) -+{ -+ struct tvec_base *base = per_cpu(tvec_bases, cpu); -+ unsigned long flags; -+ -+ timer_stats_timer_set_start_info(timer); -+ BUG_ON(timer_pending(timer) || !timer->function); -+ spin_lock_irqsave(&base->lock, flags); -+ timer_set_base(timer, base); -+ debug_activate(timer, timer->expires); -+ internal_add_timer(base, timer); -+ /* -+ * Check whether the other CPU is in dynticks mode and needs -+ * to be triggered to reevaluate the timer wheel. -+ * We are protected against the other CPU fiddling -+ * with the timer by holding the timer base lock. This also -+ * makes sure that a CPU on the way to stop its tick can not -+ * evaluate the timer wheel. -+ * -+ * Spare the IPI for deferrable timers on idle targets though. -+ * The next busy ticks will take care of it. Except full dynticks -+ * require special care against races with idle_cpu(), lets deal -+ * with that later. -+ */ -+ if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(cpu)) -+ wake_up_nohz_cpu(cpu); -+ -+ spin_unlock_irqrestore(&base->lock, flags); -+} -+EXPORT_SYMBOL_GPL(add_timer_on); -+ -+/** -+ * del_timer - deactive a timer. -+ * @timer: the timer to be deactivated -+ * -+ * del_timer() deactivates a timer - this works on both active and inactive -+ * timers. -+ * -+ * The function returns whether it has deactivated a pending timer or not. -+ * (ie. del_timer() of an inactive timer returns 0, del_timer() of an -+ * active timer returns 1.) -+ */ -+int del_timer(struct timer_list *timer) -+{ -+ struct tvec_base *base; -+ unsigned long flags; -+ int ret = 0; -+ -+ debug_assert_init(timer); -+ -+ timer_stats_timer_clear_start_info(timer); -+ if (timer_pending(timer)) { -+ base = lock_timer_base(timer, &flags); -+ ret = detach_if_pending(timer, base, true); -+ spin_unlock_irqrestore(&base->lock, flags); -+ } -+ -+ return ret; -+} -+EXPORT_SYMBOL(del_timer); -+ -+/** -+ * try_to_del_timer_sync - Try to deactivate a timer -+ * @timer: timer do del -+ * -+ * This function tries to deactivate a timer. Upon successful (ret >= 0) -+ * exit the timer is not queued and the handler is not running on any CPU. -+ */ -+int try_to_del_timer_sync(struct timer_list *timer) -+{ -+ struct tvec_base *base; -+ unsigned long flags; -+ int ret = -1; -+ -+ debug_assert_init(timer); -+ -+ base = lock_timer_base(timer, &flags); -+ -+ if (base->running_timer != timer) { -+ timer_stats_timer_clear_start_info(timer); -+ ret = detach_if_pending(timer, base, true); -+ } -+ spin_unlock_irqrestore(&base->lock, flags); -+ -+ return ret; -+} -+EXPORT_SYMBOL(try_to_del_timer_sync); -+ -+#ifdef CONFIG_SMP -+/** -+ * del_timer_sync - deactivate a timer and wait for the handler to finish. -+ * @timer: the timer to be deactivated -+ * -+ * This function only differs from del_timer() on SMP: besides deactivating -+ * the timer it also makes sure the handler has finished executing on other -+ * CPUs. -+ * -+ * Synchronization rules: Callers must prevent restarting of the timer, -+ * otherwise this function is meaningless. It must not be called from -+ * interrupt contexts unless the timer is an irqsafe one. The caller must -+ * not hold locks which would prevent completion of the timer's -+ * handler. The timer's handler must not call add_timer_on(). Upon exit the -+ * timer is not queued and the handler is not running on any CPU. -+ * -+ * Note: For !irqsafe timers, you must not hold locks that are held in -+ * interrupt context while calling this function. Even if the lock has -+ * nothing to do with the timer in question. Here's why: -+ * -+ * CPU0 CPU1 -+ * ---- ---- -+ * <SOFTIRQ> -+ * call_timer_fn(); -+ * base->running_timer = mytimer; -+ * spin_lock_irq(somelock); -+ * <IRQ> -+ * spin_lock(somelock); -+ * del_timer_sync(mytimer); -+ * while (base->running_timer == mytimer); -+ * -+ * Now del_timer_sync() will never return and never release somelock. -+ * The interrupt on the other CPU is waiting to grab somelock but -+ * it has interrupted the softirq that CPU0 is waiting to finish. -+ * -+ * The function returns whether it has deactivated a pending timer or not. -+ */ -+int del_timer_sync(struct timer_list *timer) -+{ -+#ifdef CONFIG_LOCKDEP -+ unsigned long flags; -+ -+ /* -+ * If lockdep gives a backtrace here, please reference -+ * the synchronization rules above. -+ */ -+ local_irq_save(flags); -+ lock_map_acquire(&timer->lockdep_map); -+ lock_map_release(&timer->lockdep_map); -+ local_irq_restore(flags); -+#endif -+ /* -+ * don't use it in hardirq context, because it -+ * could lead to deadlock. -+ */ -+ WARN_ON(in_irq() && !tbase_get_irqsafe(timer->base)); -+ for (;;) { -+ int ret = try_to_del_timer_sync(timer); -+ if (ret >= 0) -+ return ret; -+ cpu_relax(); -+ } -+} -+EXPORT_SYMBOL(del_timer_sync); -+#endif -+ -+static int cascade(struct tvec_base *base, struct tvec *tv, int index) -+{ -+ /* cascade all the timers from tv up one level */ -+ struct timer_list *timer, *tmp; -+ struct list_head tv_list; -+ -+ list_replace_init(tv->vec + index, &tv_list); -+ -+ /* -+ * We are removing _all_ timers from the list, so we -+ * don't have to detach them individually. -+ */ -+ list_for_each_entry_safe(timer, tmp, &tv_list, entry) { -+ BUG_ON(tbase_get_base(timer->base) != base); -+ /* No accounting, while moving them */ -+ __internal_add_timer(base, timer); -+ } -+ -+ return index; -+} -+ -+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), -+ unsigned long data) -+{ -+ int count = preempt_count(); -+ -+#ifdef CONFIG_LOCKDEP -+ /* -+ * It is permissible to free the timer from inside the -+ * function that is called from it, this we need to take into -+ * account for lockdep too. To avoid bogus "held lock freed" -+ * warnings as well as problems when looking into -+ * timer->lockdep_map, make a copy and use that here. -+ */ -+ struct lockdep_map lockdep_map; -+ -+ lockdep_copy_map(&lockdep_map, &timer->lockdep_map); -+#endif -+ /* -+ * Couple the lock chain with the lock chain at -+ * del_timer_sync() by acquiring the lock_map around the fn() -+ * call here and in del_timer_sync(). -+ */ -+ lock_map_acquire(&lockdep_map); -+ -+ trace_timer_expire_entry(timer); -+ fn(data); -+ trace_timer_expire_exit(timer); -+ -+ lock_map_release(&lockdep_map); -+ -+ if (count != preempt_count()) { -+ WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", -+ fn, count, preempt_count()); -+ /* -+ * Restore the preempt count. That gives us a decent -+ * chance to survive and extract information. If the -+ * callback kept a lock held, bad luck, but not worse -+ * than the BUG() we had. -+ */ -+ preempt_count_set(count); -+ } -+} -+ -+#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) -+ -+/** -+ * __run_timers - run all expired timers (if any) on this CPU. -+ * @base: the timer vector to be processed. -+ * -+ * This function cascades all vectors and executes all expired timer -+ * vectors. -+ */ -+static inline void __run_timers(struct tvec_base *base) -+{ -+ struct timer_list *timer; -+ -+ spin_lock_irq(&base->lock); -+ if (catchup_timer_jiffies(base)) { -+ spin_unlock_irq(&base->lock); -+ return; -+ } -+ while (time_after_eq(jiffies, base->timer_jiffies)) { -+ struct list_head work_list; -+ struct list_head *head = &work_list; -+ int index = base->timer_jiffies & TVR_MASK; -+ -+ /* -+ * Cascade timers: -+ */ -+ if (!index && -+ (!cascade(base, &base->tv2, INDEX(0))) && -+ (!cascade(base, &base->tv3, INDEX(1))) && -+ !cascade(base, &base->tv4, INDEX(2))) -+ cascade(base, &base->tv5, INDEX(3)); -+ ++base->timer_jiffies; -+ list_replace_init(base->tv1.vec + index, head); -+ while (!list_empty(head)) { -+ void (*fn)(unsigned long); -+ unsigned long data; -+ bool irqsafe; -+ -+ timer = list_first_entry(head, struct timer_list,entry); -+ fn = timer->function; -+ data = timer->data; -+ irqsafe = tbase_get_irqsafe(timer->base); -+ -+ timer_stats_account_timer(timer); -+ -+ base->running_timer = timer; -+ detach_expired_timer(timer, base); -+ -+ if (irqsafe) { -+ spin_unlock(&base->lock); -+ call_timer_fn(timer, fn, data); -+ spin_lock(&base->lock); -+ } else { -+ spin_unlock_irq(&base->lock); -+ call_timer_fn(timer, fn, data); -+ spin_lock_irq(&base->lock); -+ } -+ } -+ } -+ base->running_timer = NULL; -+ spin_unlock_irq(&base->lock); -+} -+ -+#ifdef CONFIG_NO_HZ_COMMON -+/* -+ * Find out when the next timer event is due to happen. This -+ * is used on S/390 to stop all activity when a CPU is idle. -+ * This function needs to be called with interrupts disabled. -+ */ -+static unsigned long __next_timer_interrupt(struct tvec_base *base) -+{ -+ unsigned long timer_jiffies = base->timer_jiffies; -+ unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA; -+ int index, slot, array, found = 0; -+ struct timer_list *nte; -+ struct tvec *varray[4]; -+ -+ /* Look for timer events in tv1. */ -+ index = slot = timer_jiffies & TVR_MASK; -+ do { -+ list_for_each_entry(nte, base->tv1.vec + slot, entry) { -+ if (tbase_get_deferrable(nte->base)) -+ continue; -+ -+ found = 1; -+ expires = nte->expires; -+ /* Look at the cascade bucket(s)? */ -+ if (!index || slot < index) -+ goto cascade; -+ return expires; -+ } -+ slot = (slot + 1) & TVR_MASK; -+ } while (slot != index); -+ -+cascade: -+ /* Calculate the next cascade event */ -+ if (index) -+ timer_jiffies += TVR_SIZE - index; -+ timer_jiffies >>= TVR_BITS; -+ -+ /* Check tv2-tv5. */ -+ varray[0] = &base->tv2; -+ varray[1] = &base->tv3; -+ varray[2] = &base->tv4; -+ varray[3] = &base->tv5; -+ -+ for (array = 0; array < 4; array++) { -+ struct tvec *varp = varray[array]; -+ -+ index = slot = timer_jiffies & TVN_MASK; -+ do { -+ list_for_each_entry(nte, varp->vec + slot, entry) { -+ if (tbase_get_deferrable(nte->base)) -+ continue; -+ -+ found = 1; -+ if (time_before(nte->expires, expires)) -+ expires = nte->expires; -+ } -+ /* -+ * Do we still search for the first timer or are -+ * we looking up the cascade buckets ? -+ */ -+ if (found) { -+ /* Look at the cascade bucket(s)? */ -+ if (!index || slot < index) -+ break; -+ return expires; -+ } -+ slot = (slot + 1) & TVN_MASK; -+ } while (slot != index); -+ -+ if (index) -+ timer_jiffies += TVN_SIZE - index; -+ timer_jiffies >>= TVN_BITS; -+ } -+ return expires; -+} -+ -+/* -+ * Check, if the next hrtimer event is before the next timer wheel -+ * event: -+ */ -+static unsigned long cmp_next_hrtimer_event(unsigned long now, -+ unsigned long expires) -+{ -+ ktime_t hr_delta = hrtimer_get_next_event(); -+ struct timespec tsdelta; -+ unsigned long delta; -+ -+ if (hr_delta.tv64 == KTIME_MAX) -+ return expires; -+ -+ /* -+ * Expired timer available, let it expire in the next tick -+ */ -+ if (hr_delta.tv64 <= 0) -+ return now + 1; -+ -+ tsdelta = ktime_to_timespec(hr_delta); -+ delta = timespec_to_jiffies(&tsdelta); -+ -+ /* -+ * Limit the delta to the max value, which is checked in -+ * tick_nohz_stop_sched_tick(): -+ */ -+ if (delta > NEXT_TIMER_MAX_DELTA) -+ delta = NEXT_TIMER_MAX_DELTA; -+ -+ /* -+ * Take rounding errors in to account and make sure, that it -+ * expires in the next tick. Otherwise we go into an endless -+ * ping pong due to tick_nohz_stop_sched_tick() retriggering -+ * the timer softirq -+ */ -+ if (delta < 1) -+ delta = 1; -+ now += delta; -+ if (time_before(now, expires)) -+ return now; -+ return expires; -+} -+ -+/** -+ * get_next_timer_interrupt - return the jiffy of the next pending timer -+ * @now: current time (in jiffies) -+ */ -+unsigned long get_next_timer_interrupt(unsigned long now) -+{ -+ struct tvec_base *base = __this_cpu_read(tvec_bases); -+ unsigned long expires = now + NEXT_TIMER_MAX_DELTA; -+ -+ /* -+ * Pretend that there is no timer pending if the cpu is offline. -+ * Possible pending timers will be migrated later to an active cpu. -+ */ -+ if (cpu_is_offline(smp_processor_id())) -+ return expires; -+ -+ spin_lock(&base->lock); -+ if (base->active_timers) { -+ if (time_before_eq(base->next_timer, base->timer_jiffies)) -+ base->next_timer = __next_timer_interrupt(base); -+ expires = base->next_timer; -+ } -+ spin_unlock(&base->lock); -+ -+ if (time_before_eq(expires, now)) -+ return now; -+ -+ return cmp_next_hrtimer_event(now, expires); -+} -+#endif -+ -+/* -+ * Called from the timer interrupt handler to charge one tick to the current -+ * process. user_tick is 1 if the tick is user time, 0 for system. -+ */ -+void update_process_times(int user_tick) -+{ -+ struct task_struct *p = current; -+ int cpu = smp_processor_id(); -+ -+ /* Note: this timer irq context must be accounted for as well. */ -+ account_process_tick(p, user_tick); -+ run_local_timers(); -+ rcu_check_callbacks(cpu, user_tick); -+#ifdef CONFIG_IRQ_WORK -+ if (in_irq()) -+ irq_work_run(); -+#endif -+ scheduler_tick(); -+ run_posix_cpu_timers(p); -+} -+ -+#ifdef CONFIG_IPIPE -+ -+void update_root_process_times(struct pt_regs *regs) -+{ -+ int cpu, user_tick = user_mode(regs); -+ -+ if (__ipipe_root_tick_p(regs)) { -+ update_process_times(user_tick); -+ return; -+ } -+ -+ run_local_timers(); -+ cpu = smp_processor_id(); -+ rcu_check_callbacks(cpu, user_tick); -+ run_posix_cpu_timers(current); -+} -+ -+#endif -+ -+/* -+ * This function runs timers and the timer-tq in bottom half context. -+ */ -+static void run_timer_softirq(struct softirq_action *h) -+{ -+ struct tvec_base *base = __this_cpu_read(tvec_bases); -+ -+ hrtimer_run_pending(); -+ -+ if (time_after_eq(jiffies, base->timer_jiffies)) -+ __run_timers(base); -+} -+ -+/* -+ * Called by the local, per-CPU timer interrupt on SMP. -+ */ -+void run_local_timers(void) -+{ -+ hrtimer_run_queues(); -+ raise_softirq(TIMER_SOFTIRQ); -+} -+ -+#ifdef __ARCH_WANT_SYS_ALARM -+ -+/* -+ * For backwards compatibility? This can be done in libc so Alpha -+ * and all newer ports shouldn't need it. -+ */ -+SYSCALL_DEFINE1(alarm, unsigned int, seconds) -+{ -+ return alarm_setitimer(seconds); -+} -+ -+#endif -+ -+static void process_timeout(unsigned long __data) -+{ -+ wake_up_process((struct task_struct *)__data); -+} -+ -+/** -+ * schedule_timeout - sleep until timeout -+ * @timeout: timeout value in jiffies -+ * -+ * Make the current task sleep until @timeout jiffies have -+ * elapsed. The routine will return immediately unless -+ * the current task state has been set (see set_current_state()). -+ * -+ * You can set the task state as follows - -+ * -+ * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to -+ * pass before the routine returns. The routine will return 0 -+ * -+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is -+ * delivered to the current task. In this case the remaining time -+ * in jiffies will be returned, or 0 if the timer expired in time -+ * -+ * The current task state is guaranteed to be TASK_RUNNING when this -+ * routine returns. -+ * -+ * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule -+ * the CPU away without a bound on the timeout. In this case the return -+ * value will be %MAX_SCHEDULE_TIMEOUT. -+ * -+ * In all cases the return value is guaranteed to be non-negative. -+ */ -+signed long __sched schedule_timeout(signed long timeout) -+{ -+ struct timer_list timer; -+ unsigned long expire; -+ -+ switch (timeout) -+ { -+ case MAX_SCHEDULE_TIMEOUT: -+ /* -+ * These two special cases are useful to be comfortable -+ * in the caller. Nothing more. We could take -+ * MAX_SCHEDULE_TIMEOUT from one of the negative value -+ * but I' d like to return a valid offset (>=0) to allow -+ * the caller to do everything it want with the retval. -+ */ -+ schedule(); -+ goto out; -+ default: -+ /* -+ * Another bit of PARANOID. Note that the retval will be -+ * 0 since no piece of kernel is supposed to do a check -+ * for a negative retval of schedule_timeout() (since it -+ * should never happens anyway). You just have the printk() -+ * that will tell you if something is gone wrong and where. -+ */ -+ if (timeout < 0) { -+ printk(KERN_ERR "schedule_timeout: wrong timeout " -+ "value %lx\n", timeout); -+ dump_stack(); -+ current->state = TASK_RUNNING; -+ goto out; -+ } -+ } -+ -+ expire = timeout + jiffies; -+ -+ setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); -+ __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); -+ schedule(); -+ del_singleshot_timer_sync(&timer); -+ -+ /* Remove the timer from the object tracker */ -+ destroy_timer_on_stack(&timer); -+ -+ timeout = expire - jiffies; -+ -+ out: -+ return timeout < 0 ? 0 : timeout; -+} -+EXPORT_SYMBOL(schedule_timeout); -+ -+/* -+ * We can use __set_current_state() here because schedule_timeout() calls -+ * schedule() unconditionally. -+ */ -+signed long __sched schedule_timeout_interruptible(signed long timeout) -+{ -+ __set_current_state(TASK_INTERRUPTIBLE); -+ return schedule_timeout(timeout); -+} -+EXPORT_SYMBOL(schedule_timeout_interruptible); -+ -+signed long __sched schedule_timeout_killable(signed long timeout) -+{ -+ __set_current_state(TASK_KILLABLE); -+ return schedule_timeout(timeout); -+} -+EXPORT_SYMBOL(schedule_timeout_killable); -+ -+signed long __sched schedule_timeout_uninterruptible(signed long timeout) -+{ -+ __set_current_state(TASK_UNINTERRUPTIBLE); -+ return schedule_timeout(timeout); -+} -+EXPORT_SYMBOL(schedule_timeout_uninterruptible); -+ -+static int init_timers_cpu(int cpu) -+{ -+ int j; -+ struct tvec_base *base; -+ static char tvec_base_done[NR_CPUS]; -+ -+ if (!tvec_base_done[cpu]) { -+ static char boot_done; -+ -+ if (boot_done) { -+ /* -+ * The APs use this path later in boot -+ */ -+ base = kzalloc_node(sizeof(*base), GFP_KERNEL, -+ cpu_to_node(cpu)); -+ if (!base) -+ return -ENOMEM; -+ -+ /* Make sure tvec_base has TIMER_FLAG_MASK bits free */ -+ if (WARN_ON(base != tbase_get_base(base))) { -+ kfree(base); -+ return -ENOMEM; -+ } -+ per_cpu(tvec_bases, cpu) = base; -+ } else { -+ /* -+ * This is for the boot CPU - we use compile-time -+ * static initialisation because per-cpu memory isn't -+ * ready yet and because the memory allocators are not -+ * initialised either. -+ */ -+ boot_done = 1; -+ base = &boot_tvec_bases; -+ } -+ spin_lock_init(&base->lock); -+ tvec_base_done[cpu] = 1; -+ } else { -+ base = per_cpu(tvec_bases, cpu); -+ } -+ -+ -+ for (j = 0; j < TVN_SIZE; j++) { -+ INIT_LIST_HEAD(base->tv5.vec + j); -+ INIT_LIST_HEAD(base->tv4.vec + j); -+ INIT_LIST_HEAD(base->tv3.vec + j); -+ INIT_LIST_HEAD(base->tv2.vec + j); -+ } -+ for (j = 0; j < TVR_SIZE; j++) -+ INIT_LIST_HEAD(base->tv1.vec + j); -+ -+ base->timer_jiffies = jiffies; -+ base->next_timer = base->timer_jiffies; -+ base->active_timers = 0; -+ base->all_timers = 0; -+ return 0; -+} -+ -+#ifdef CONFIG_HOTPLUG_CPU -+static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) -+{ -+ struct timer_list *timer; -+ -+ while (!list_empty(head)) { -+ timer = list_first_entry(head, struct timer_list, entry); -+ /* We ignore the accounting on the dying cpu */ -+ detach_timer(timer, false); -+ timer_set_base(timer, new_base); -+ internal_add_timer(new_base, timer); -+ } -+} -+ -+static void migrate_timers(int cpu) -+{ -+ struct tvec_base *old_base; -+ struct tvec_base *new_base; -+ int i; -+ -+ BUG_ON(cpu_online(cpu)); -+ old_base = per_cpu(tvec_bases, cpu); -+ new_base = get_cpu_var(tvec_bases); -+ /* -+ * The caller is globally serialized and nobody else -+ * takes two locks at once, deadlock is not possible. -+ */ -+ spin_lock_irq(&new_base->lock); -+ spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); -+ -+ BUG_ON(old_base->running_timer); -+ -+ for (i = 0; i < TVR_SIZE; i++) -+ migrate_timer_list(new_base, old_base->tv1.vec + i); -+ for (i = 0; i < TVN_SIZE; i++) { -+ migrate_timer_list(new_base, old_base->tv2.vec + i); -+ migrate_timer_list(new_base, old_base->tv3.vec + i); -+ migrate_timer_list(new_base, old_base->tv4.vec + i); -+ migrate_timer_list(new_base, old_base->tv5.vec + i); -+ } -+ -+ spin_unlock(&old_base->lock); -+ spin_unlock_irq(&new_base->lock); -+ put_cpu_var(tvec_bases); -+} -+#endif /* CONFIG_HOTPLUG_CPU */ -+ -+static int timer_cpu_notify(struct notifier_block *self, -+ unsigned long action, void *hcpu) -+{ -+ long cpu = (long)hcpu; -+ int err; -+ -+ switch(action) { -+ case CPU_UP_PREPARE: -+ case CPU_UP_PREPARE_FROZEN: -+ err = init_timers_cpu(cpu); -+ if (err < 0) -+ return notifier_from_errno(err); -+ break; -+#ifdef CONFIG_HOTPLUG_CPU -+ case CPU_DEAD: -+ case CPU_DEAD_FROZEN: -+ migrate_timers(cpu); -+ break; -+#endif -+ default: -+ break; -+ } -+ return NOTIFY_OK; -+} -+ -+static struct notifier_block timers_nb = { -+ .notifier_call = timer_cpu_notify, -+}; -+ -+ -+void __init init_timers(void) -+{ -+ int err; -+ -+ /* ensure there are enough low bits for flags in timer->base pointer */ -+ BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK); -+ -+ err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, -+ (void *)(long)smp_processor_id()); -+ BUG_ON(err != NOTIFY_OK); -+ -+ init_timer_stats(); -+ register_cpu_notifier(&timers_nb); -+ open_softirq(TIMER_SOFTIRQ, run_timer_softirq); -+} -+ -+/** -+ * msleep - sleep safely even with waitqueue interruptions -+ * @msecs: Time in milliseconds to sleep for -+ */ -+void msleep(unsigned int msecs) -+{ -+ unsigned long timeout = msecs_to_jiffies(msecs) + 1; -+ -+ while (timeout) -+ timeout = schedule_timeout_uninterruptible(timeout); -+} -+ -+EXPORT_SYMBOL(msleep); -+ -+/** -+ * msleep_interruptible - sleep waiting for signals -+ * @msecs: Time in milliseconds to sleep for -+ */ -+unsigned long msleep_interruptible(unsigned int msecs) -+{ -+ unsigned long timeout = msecs_to_jiffies(msecs) + 1; -+ -+ while (timeout && !signal_pending(current)) -+ timeout = schedule_timeout_interruptible(timeout); -+ return jiffies_to_msecs(timeout); -+} -+ -+EXPORT_SYMBOL(msleep_interruptible); -+ -+static int __sched do_usleep_range(unsigned long min, unsigned long max) -+{ -+ ktime_t kmin; -+ unsigned long delta; -+ -+ kmin = ktime_set(0, min * NSEC_PER_USEC); -+ delta = (max - min) * NSEC_PER_USEC; -+ return schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); -+} -+ -+/** -+ * usleep_range - Drop in replacement for udelay where wakeup is flexible -+ * @min: Minimum time in usecs to sleep -+ * @max: Maximum time in usecs to sleep -+ */ -+void usleep_range(unsigned long min, unsigned long max) -+{ -+ __set_current_state(TASK_UNINTERRUPTIBLE); -+ do_usleep_range(min, max); -+} -+EXPORT_SYMBOL(usleep_range); -diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig -index a5da09c..4f6d522 100644 ---- a/kernel/trace/Kconfig -+++ b/kernel/trace/Kconfig -@@ -439,6 +439,7 @@ config DYNAMIC_FTRACE - bool "enable/disable function tracing dynamically" - depends on FUNCTION_TRACER - depends on HAVE_DYNAMIC_FTRACE -+ depends on !IPIPE_TRACE_MCOUNT - default y - help - This option will modify all the calls to function tracing -diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c -index d1eff3d..ac1979d 100644 ---- a/kernel/trace/ftrace.c -+++ b/kernel/trace/ftrace.c -@@ -32,6 +32,7 @@ - #include <linux/list.h> - #include <linux/hash.h> - #include <linux/rcupdate.h> -+#include <linux/ipipe.h> - - #include <trace/events/sched.h> - -@@ -251,8 +252,17 @@ static inline void update_function_graph_func(void) { } - - static void update_ftrace_function(void) - { -+ struct ftrace_ops *ops; - ftrace_func_t func; - -+ for (ops = ftrace_ops_list; -+ ops != &ftrace_list_end; ops = ops->next) -+ if (ops->flags & FTRACE_OPS_FL_IPIPE_EXCLUSIVE) { -+ function_trace_op = ops; -+ ftrace_trace_function = ops->func; -+ return; -+ } -+ - /* - * Prepare the ftrace_ops that the arch callback will use. - * If there's only one ftrace_ops registered, the ftrace_ops_list -@@ -2298,6 +2308,9 @@ void __weak arch_ftrace_update_code(int command) - - static void ftrace_run_update_code(int command) - { -+#ifdef CONFIG_IPIPE -+ unsigned long flags; -+#endif /* CONFIG_IPIPE */ - int ret; - - ret = ftrace_arch_code_modify_prepare(); -@@ -2311,7 +2324,13 @@ static void ftrace_run_update_code(int command) - * is safe. The stop_machine() is the safest, but also - * produces the most overhead. - */ -+#ifdef CONFIG_IPIPE -+ flags = ipipe_critical_enter(NULL); -+ __ftrace_modify_code(&command); -+ ipipe_critical_exit(flags); -+#else /* !CONFIG_IPIPE */ - arch_ftrace_update_code(command); -+#endif /* !CONFIG_IPIPE */ - - ret = ftrace_arch_code_modify_post_process(); - FTRACE_WARN_ON(ret); -@@ -4621,10 +4640,10 @@ static int ftrace_process_locs(struct module *mod, - * reason to cause large interrupt latencies while we do it. - */ - if (!mod) -- local_irq_save(flags); -+ flags = hard_local_irq_save(); - ftrace_update_code(mod, start_pg); - if (!mod) -- local_irq_restore(flags); -+ hard_local_irq_restore(flags); - ret = 0; - out: - mutex_unlock(&ftrace_lock); -@@ -4723,9 +4742,11 @@ void __init ftrace_init(void) - unsigned long count, flags; - int ret; - -- local_irq_save(flags); -+ flags = hard_local_irq_save_notrace(); - ret = ftrace_dyn_arch_init(); -- local_irq_restore(flags); -+ hard_local_irq_restore_notrace(flags); -+ -+ /* ftrace_dyn_arch_init places the return code in addr */ - if (ret) - goto failed; - -diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c -index f4fbbfc..fc316d2 100644 ---- a/kernel/trace/ring_buffer.c -+++ b/kernel/trace/ring_buffer.c -@@ -2684,7 +2684,8 @@ static DEFINE_PER_CPU(unsigned int, current_context); - - static __always_inline int trace_recursive_lock(void) - { -- unsigned int val = this_cpu_read(current_context); -+ unsigned long flags; -+ unsigned int val; - int bit; - - if (in_interrupt()) { -@@ -2697,22 +2698,35 @@ static __always_inline int trace_recursive_lock(void) - } else - bit = 3; - -- if (unlikely(val & (1 << bit))) -+ flags = hard_local_irq_save(); -+ -+ val = __this_cpu_read(current_context); -+ if (unlikely(val & (1 << bit))) { -+ hard_local_irq_restore(flags); - return 1; -+ } - - val |= (1 << bit); -- this_cpu_write(current_context, val); -+ __this_cpu_write(current_context, val); -+ -+ hard_local_irq_restore(flags); - return 0; } static __always_inline void trace_recursive_unlock(void) { -- unsigned int val = this_cpu_read(current_context); +- unsigned int val = __this_cpu_read(current_context); + unsigned long flags; + unsigned int val; + + flags = hard_local_irq_save(); + val = __this_cpu_read(current_context); - val--; -- val &= this_cpu_read(current_context); -- this_cpu_write(current_context, val); -+ val &= __this_cpu_read(current_context); -+ __this_cpu_write(current_context, val); + val &= val & (val - 1); + __this_cpu_write(current_context, val); + + hard_local_irq_restore(flags); } @@ -16699,7 +15014,7 @@ index 57f0ec9..80437ac 100644 static struct tracer_opt func_opts[] = { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c -index f0a0c98..310dd0a 100644 +index 2964333..7b15e42 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -336,7 +336,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) @@ -16893,7 +15208,7 @@ index f8e0e53..02175aa3 100644 wake_up_klogd(); } diff --git a/lib/ioremap.c b/lib/ioremap.c -index 0c9216c..1575d3e 100644 +index 0c9216c..00a9a30 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -10,6 +10,7 @@ @@ -16912,7 +15227,7 @@ index 0c9216c..1575d3e 100644 + /* APEI may invoke this for temporarily remapping pages in interrupt + * context - nothing we can and need to propagate globally. */ + if (!in_interrupt()) { -+ __ipipe_pin_range_globally(start, end); ++ __ipipe_pin_mapping_globally(start, end); + flush_cache_vmap(start, end); + } @@ -17189,7 +15504,7 @@ index 73cf098..6928e67 100644 +} +#endif diff --git a/mm/mmap.c b/mm/mmap.c -index 3c83bec..3247e83 100644 +index f88b4f9..e9b401a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -49,6 +49,10 @@ @@ -17279,14 +15594,14 @@ index ace9345..6b56407 100644 return pages; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c -index 90520af..ebbf9e2 100644 +index 90520af..dadc22d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -193,6 +193,8 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, return err; } while (pgd++, addr = next, addr != end); -+ __ipipe_pin_range_globally(start, end); ++ __ipipe_pin_mapping_globally(start, end); + return nr; } _______________________________________________ Xenomai-git mailing list Xenomai-git@xenomai.org http://xenomai.org/mailman/listinfo/xenomai-git