[tip:irq/core] genirq: Force interrupt threading on RT

2019-08-19 Thread tip-bot for Thomas Gleixner
Commit-ID:  b6a32bbd8735def2d0d696ba59205d1874b7800f
Gitweb: https://git.kernel.org/tip/b6a32bbd8735def2d0d696ba59205d1874b7800f
Author: Thomas Gleixner 
AuthorDate: Fri, 16 Aug 2019 18:09:23 +0200
Committer:  Thomas Gleixner 
CommitDate: Mon, 19 Aug 2019 15:45:48 +0200

genirq: Force interrupt threading on RT

Switch force_irqthreads from a boot time modifiable variable to a compile
time constant when CONFIG_PREEMPT_RT is enabled.

Signed-off-by: Thomas Gleixner 
Signed-off-by: Sebastian Andrzej Siewior 
Signed-off-by: Thomas Gleixner 
Link: https://lkml.kernel.org/r/20190816160923.12855-1-bige...@linutronix.de

---
 include/linux/interrupt.h | 4 
 kernel/irq/manage.c   | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5b8328a99b2a..07b527dca996 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -472,7 +472,11 @@ extern int irq_set_irqchip_state(unsigned int irq, enum 
irqchip_irq_state which,
 bool state);
 
 #ifdef CONFIG_IRQ_FORCED_THREADING
+# ifdef CONFIG_PREEMPT_RT
+#  define force_irqthreads (true)
+# else
 extern bool force_irqthreads;
+# endif
 #else
 #define force_irqthreads   (0)
 #endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e8f7f179bf77..97de1b7d43af 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -23,7 +23,7 @@
 
 #include "internals.h"
 
-#ifdef CONFIG_IRQ_FORCED_THREADING
+#if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
 __read_mostly bool force_irqthreads;
 EXPORT_SYMBOL_GPL(force_irqthreads);
 


[tip:x86/urgent] x86/apic: Handle missing global clockevent gracefully

2019-08-19 Thread tip-bot for Thomas Gleixner
Commit-ID:  f897e60a12f0b9146357780d317879bce2a877dc
Gitweb: https://git.kernel.org/tip/f897e60a12f0b9146357780d317879bce2a877dc
Author: Thomas Gleixner 
AuthorDate: Fri, 9 Aug 2019 14:54:07 +0200
Committer:  Thomas Gleixner 
CommitDate: Mon, 19 Aug 2019 12:34:07 +0200

x86/apic: Handle missing global clockevent gracefully

Some newer machines do not advertise legacy timers. The kernel can handle
that situation if the TSC and the CPU frequency are enumerated by CPUID or
MSRs and the CPU supports TSC deadline timer. If the CPU does not support
TSC deadline timer the local APIC timer frequency has to be known as well.

Some Ryzens machines do not advertize legacy timers, but there is no
reliable way to determine the bus frequency which feeds the local APIC
timer when the machine allows overclocking of that frequency.

As there is no legacy timer the local APIC timer calibration crashes due to
a NULL pointer dereference when accessing the not installed global clock
event device.

Switch the calibration loop to a non interrupt based one, which polls
either TSC (if frequency is known) or jiffies. The latter requires a global
clockevent. As the machines which do not have a global clockevent installed
have a known TSC frequency this is a non issue. For older machines where
TSC frequency is not known, there is no known case where the legacy timers
do not exist as that would have been reported long ago.

Reported-by: Daniel Drake 
Reported-by: Jiri Slaby 
Signed-off-by: Thomas Gleixner 
Tested-by: Daniel Drake 
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1908091443030.21...@nanos.tec.linutronix.de
Link: http://bugzilla.opensuse.org/show_bug.cgi?id=1142926#c12
---
 arch/x86/kernel/apic/apic.c | 68 +++--
 1 file changed, 53 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f5291362da1a..aa5495d0f478 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, 
lapic_cal_pm2;
 static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
 
 /*
- * Temporary interrupt handler.
+ * Temporary interrupt handler and polled calibration function.
  */
 static void __init lapic_cal_handler(struct clock_event_device *dev)
 {
@@ -851,7 +851,8 @@ bool __init apic_needs_pit(void)
 static int __init calibrate_APIC_clock(void)
 {
struct clock_event_device *levt = this_cpu_ptr(_events);
-   void (*real_handler)(struct clock_event_device *dev);
+   u64 tsc_perj = 0, tsc_start = 0;
+   unsigned long jif_start;
unsigned long deltaj;
long delta, deltatsc;
int pm_referenced = 0;
@@ -878,28 +879,64 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
"calibrating APIC timer ...\n");
 
+   /*
+* There are platforms w/o global clockevent devices. Instead of
+* making the calibration conditional on that, use a polling based
+* approach everywhere.
+*/
local_irq_disable();
 
-   /* Replace the global interrupt handler */
-   real_handler = global_clock_event->event_handler;
-   global_clock_event->event_handler = lapic_cal_handler;
-
/*
 * Setup the APIC counter to maximum. There is no way the lapic
 * can underflow in the 100ms detection time frame
 */
__setup_APIC_LVTT(0x, 0, 0);
 
-   /* Let the interrupts run */
+   /*
+* Methods to terminate the calibration loop:
+*  1) Global clockevent if available (jiffies)
+*  2) TSC if available and frequency is known
+*/
+   jif_start = READ_ONCE(jiffies);
+
+   if (tsc_khz) {
+   tsc_start = rdtsc();
+   tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
+   }
+
+   /*
+* Enable interrupts so the tick can fire, if a global
+* clockevent device is available
+*/
local_irq_enable();
 
-   while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
-   cpu_relax();
+   while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
+   /* Wait for a tick to elapse */
+   while (1) {
+   if (tsc_khz) {
+   u64 tsc_now = rdtsc();
+   if ((tsc_now - tsc_start) >= tsc_perj) {
+   tsc_start += tsc_perj;
+   break;
+   }
+   } else {
+   unsigned long jif_now = READ_ONCE(jiffies);
 
-   local_irq_disable();
+   if (time_after(jif_now, jif_start)) {
+   jif_start = jif_now;
+   break;
+   }
+  

[tip:timers/core] posix-timers: Prepare for PREEMPT_RT

2019-08-01 Thread tip-bot for Thomas Gleixner
Commit-ID:  08a3c192c93f4359a94bf47971e55b0324b72b8b
Gitweb: https://git.kernel.org/tip/08a3c192c93f4359a94bf47971e55b0324b72b8b
Author: Thomas Gleixner 
AuthorDate: Wed, 31 Jul 2019 00:33:55 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 1 Aug 2019 20:51:25 +0200

posix-timers: Prepare for PREEMPT_RT

Posix timer delete retry loops are affected by the same priority inversion
and live lock issues as the other timers.

Provide a RT specific synchronization function which keeps a reference to
the timer by holding rcu read lock to prevent the timer from being freed,
dropping the timer lock and invoking the timer specific wait function.

This does not yet cover posix CPU timers because they need more special
treatment on PREEMPT_RT.

Originally-by: Anna-Maria Gleixenr 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190730223829.058247...@linutronix.de


---
 kernel/time/posix-timers.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 3e663f982c82..a71c1aab071c 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -805,6 +805,29 @@ static int common_hrtimer_try_to_cancel(struct k_itimer 
*timr)
return hrtimer_try_to_cancel(>it.real.timer);
 }
 
+#ifdef CONFIG_PREEMPT_RT
+static struct k_itimer *timer_wait_running(struct k_itimer *timer,
+  unsigned long *flags)
+{
+   const struct k_clock *kc = READ_ONCE(timer->kclock);
+   timer_t timer_id = READ_ONCE(timer->it_id);
+
+   /* Prevent kfree(timer) after dropping the lock */
+   rcu_read_lock();
+   unlock_timer(timer, *flags);
+
+   if (kc->timer_arm == common_hrtimer_arm)
+   hrtimer_cancel_wait_running(>it.real.timer);
+   else if (kc == _clock)
+   hrtimer_cancel_wait_running(>it.alarm.alarmtimer.timer);
+   else
+   WARN_ON_ONCE(1);
+   rcu_read_unlock();
+
+   /* Relock the timer. It might be not longer hashed. */
+   return lock_timer(timer_id, flags);
+}
+#else
 static struct k_itimer *timer_wait_running(struct k_itimer *timer,
   unsigned long *flags)
 {
@@ -815,6 +838,7 @@ static struct k_itimer *timer_wait_running(struct k_itimer 
*timer,
/* Relock the timer. It might be not longer hashed. */
return lock_timer(timer_id, flags);
 }
+#endif
 
 /* Set a POSIX.1b interval timer. */
 int common_timer_set(struct k_itimer *timr, int flags,


[tip:timers/core] posix-timers: Rework cancel retry loops

2019-08-01 Thread tip-bot for Thomas Gleixner
Commit-ID:  6945e5c2abe008302b20266248d6de95575311a8
Gitweb: https://git.kernel.org/tip/6945e5c2abe008302b20266248d6de95575311a8
Author: Thomas Gleixner 
AuthorDate: Wed, 31 Jul 2019 00:33:53 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 1 Aug 2019 20:51:24 +0200

posix-timers: Rework cancel retry loops

As a preparatory step for adding the PREEMPT RT specific synchronization
mechanism to wait for a running timer callback, rework the timer cancel
retry loops so they call a common function. This allows trivial
substitution in one place.

Originally-by: Anna-Maria Gleixner 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190730223828.874901...@linutronix.de


---
 kernel/time/posix-timers.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index f5aedd2f60df..bbe8f9686a70 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -805,6 +805,17 @@ static int common_hrtimer_try_to_cancel(struct k_itimer 
*timr)
return hrtimer_try_to_cancel(>it.real.timer);
 }
 
+static struct k_itimer *timer_wait_running(struct k_itimer *timer,
+  unsigned long *flags)
+{
+   timer_t timer_id = READ_ONCE(timer->it_id);
+
+   unlock_timer(timer, *flags);
+   cpu_relax();
+   /* Relock the timer. It might be not longer hashed. */
+   return lock_timer(timer_id, flags);
+}
+
 /* Set a POSIX.1b interval timer. */
 int common_timer_set(struct k_itimer *timr, int flags,
 struct itimerspec64 *new_setting,
@@ -859,8 +870,9 @@ static int do_timer_settime(timer_t timer_id, int tmr_flags,
 
if (old_spec64)
memset(old_spec64, 0, sizeof(*old_spec64));
-retry:
+
timr = lock_timer(timer_id, );
+retry:
if (!timr)
return -EINVAL;
 
@@ -870,11 +882,14 @@ retry:
else
error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);
 
-   unlock_timer(timr, flags);
if (error == TIMER_RETRY) {
-   old_spec64 = NULL;  // We already got the old time...
+   // We already got the old time...
+   old_spec64 = NULL;
+   /* Unlocks and relocks the timer if it still exists */
+   timr = timer_wait_running(timr, );
goto retry;
}
+   unlock_timer(timr, flags);
 
return error;
 }
@@ -951,13 +966,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
struct k_itimer *timer;
unsigned long flags;
 
-retry_delete:
timer = lock_timer(timer_id, );
+
+retry_delete:
if (!timer)
return -EINVAL;
 
-   if (timer_delete_hook(timer) == TIMER_RETRY) {
-   unlock_timer(timer, flags);
+   if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) {
+   /* Unlocks and relocks the timer if it still exists */
+   timer = timer_wait_running(timer, );
goto retry_delete;
}
 


[tip:timers/core] posix-timers: Cleanup the flag/flags confusion

2019-08-01 Thread tip-bot for Thomas Gleixner
Commit-ID:  21670ee44f1e3565030bcabc62178b8e5eb2fce7
Gitweb: https://git.kernel.org/tip/21670ee44f1e3565030bcabc62178b8e5eb2fce7
Author: Thomas Gleixner 
AuthorDate: Wed, 31 Jul 2019 00:33:52 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 1 Aug 2019 20:51:24 +0200

posix-timers: Cleanup the flag/flags confusion

do_timer_settime() has a 'flags' argument and uses 'flag' for the interrupt
flags, which is confusing at best.

Rename the argument so 'flags' can be used for interrupt flags as usual.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190730223828.782664...@linutronix.de


---
 kernel/time/posix-timers.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index d7f2d91acdac..f5aedd2f60df 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -844,13 +844,13 @@ int common_timer_set(struct k_itimer *timr, int flags,
return 0;
 }
 
-static int do_timer_settime(timer_t timer_id, int flags,
+static int do_timer_settime(timer_t timer_id, int tmr_flags,
struct itimerspec64 *new_spec64,
struct itimerspec64 *old_spec64)
 {
const struct k_clock *kc;
struct k_itimer *timr;
-   unsigned long flag;
+   unsigned long flags;
int error = 0;
 
if (!timespec64_valid(_spec64->it_interval) ||
@@ -860,7 +860,7 @@ static int do_timer_settime(timer_t timer_id, int flags,
if (old_spec64)
memset(old_spec64, 0, sizeof(*old_spec64));
 retry:
-   timr = lock_timer(timer_id, );
+   timr = lock_timer(timer_id, );
if (!timr)
return -EINVAL;
 
@@ -868,9 +868,9 @@ retry:
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
else
-   error = kc->timer_set(timr, flags, new_spec64, old_spec64);
+   error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);
 
-   unlock_timer(timr, flag);
+   unlock_timer(timr, flags);
if (error == TIMER_RETRY) {
old_spec64 = NULL;  // We already got the old time...
goto retry;


[tip:timers/core] posix-timers: Prepare for PREEMPT_RT

2019-08-01 Thread tip-bot for Thomas Gleixner
Commit-ID:  3a839db3eaeeef31520de45f3b078204d068e3d0
Gitweb: https://git.kernel.org/tip/3a839db3eaeeef31520de45f3b078204d068e3d0
Author: Thomas Gleixner 
AuthorDate: Wed, 31 Jul 2019 00:33:55 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 1 Aug 2019 17:46:43 +0200

posix-timers: Prepare for PREEMPT_RT

Posix timer delete retry loops are affected by the same priority inversion
and live lock issues as the other timers.

Provide a RT specific synchronization function which keeps a reference to
the timer by holding rcu read lock to prevent the timer from being freed,
dropping the timer lock and invoking the timer specific wait function.

This does not yet cover posix CPU timers because they need more special
treatment on PREEMPT_RT.

Originally-by: Anna-Maria Gleixenr 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190730223829.058247...@linutronix.de

---
 kernel/time/posix-timers.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 3e663f982c82..a71c1aab071c 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -805,6 +805,29 @@ static int common_hrtimer_try_to_cancel(struct k_itimer 
*timr)
return hrtimer_try_to_cancel(>it.real.timer);
 }
 
+#ifdef CONFIG_PREEMPT_RT
+static struct k_itimer *timer_wait_running(struct k_itimer *timer,
+  unsigned long *flags)
+{
+   const struct k_clock *kc = READ_ONCE(timer->kclock);
+   timer_t timer_id = READ_ONCE(timer->it_id);
+
+   /* Prevent kfree(timer) after dropping the lock */
+   rcu_read_lock();
+   unlock_timer(timer, *flags);
+
+   if (kc->timer_arm == common_hrtimer_arm)
+   hrtimer_cancel_wait_running(>it.real.timer);
+   else if (kc == _clock)
+   hrtimer_cancel_wait_running(>it.alarm.alarmtimer.timer);
+   else
+   WARN_ON_ONCE(1);
+   rcu_read_unlock();
+
+   /* Relock the timer. It might be not longer hashed. */
+   return lock_timer(timer_id, flags);
+}
+#else
 static struct k_itimer *timer_wait_running(struct k_itimer *timer,
   unsigned long *flags)
 {
@@ -815,6 +838,7 @@ static struct k_itimer *timer_wait_running(struct k_itimer 
*timer,
/* Relock the timer. It might be not longer hashed. */
return lock_timer(timer_id, flags);
 }
+#endif
 
 /* Set a POSIX.1b interval timer. */
 int common_timer_set(struct k_itimer *timr, int flags,


[tip:timers/core] posix-timers: Rework cancel retry loops

2019-08-01 Thread tip-bot for Thomas Gleixner
Commit-ID:  f8d1b0549263354b8d8854fefc521ac536be70ff
Gitweb: https://git.kernel.org/tip/f8d1b0549263354b8d8854fefc521ac536be70ff
Author: Thomas Gleixner 
AuthorDate: Wed, 31 Jul 2019 00:33:53 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 1 Aug 2019 17:46:42 +0200

posix-timers: Rework cancel retry loops

As a preparatory step for adding the PREEMPT RT specific synchronization
mechanism to wait for a running timer callback, rework the timer cancel
retry loops so they call a common function. This allows trivial
substitution in one place.

Originally-by: Anna-Maria Gleixner 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190730223828.874901...@linutronix.de

---
 kernel/time/posix-timers.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index f5aedd2f60df..bbe8f9686a70 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -805,6 +805,17 @@ static int common_hrtimer_try_to_cancel(struct k_itimer 
*timr)
return hrtimer_try_to_cancel(>it.real.timer);
 }
 
+static struct k_itimer *timer_wait_running(struct k_itimer *timer,
+  unsigned long *flags)
+{
+   timer_t timer_id = READ_ONCE(timer->it_id);
+
+   unlock_timer(timer, *flags);
+   cpu_relax();
+   /* Relock the timer. It might be not longer hashed. */
+   return lock_timer(timer_id, flags);
+}
+
 /* Set a POSIX.1b interval timer. */
 int common_timer_set(struct k_itimer *timr, int flags,
 struct itimerspec64 *new_setting,
@@ -859,8 +870,9 @@ static int do_timer_settime(timer_t timer_id, int tmr_flags,
 
if (old_spec64)
memset(old_spec64, 0, sizeof(*old_spec64));
-retry:
+
timr = lock_timer(timer_id, );
+retry:
if (!timr)
return -EINVAL;
 
@@ -870,11 +882,14 @@ retry:
else
error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);
 
-   unlock_timer(timr, flags);
if (error == TIMER_RETRY) {
-   old_spec64 = NULL;  // We already got the old time...
+   // We already got the old time...
+   old_spec64 = NULL;
+   /* Unlocks and relocks the timer if it still exists */
+   timr = timer_wait_running(timr, );
goto retry;
}
+   unlock_timer(timr, flags);
 
return error;
 }
@@ -951,13 +966,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
struct k_itimer *timer;
unsigned long flags;
 
-retry_delete:
timer = lock_timer(timer_id, );
+
+retry_delete:
if (!timer)
return -EINVAL;
 
-   if (timer_delete_hook(timer) == TIMER_RETRY) {
-   unlock_timer(timer, flags);
+   if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) {
+   /* Unlocks and relocks the timer if it still exists */
+   timer = timer_wait_running(timer, );
goto retry_delete;
}
 


[tip:sched/rt] x86/kvm: Use CONFIG_PREEMPTION

2019-07-31 Thread tip-bot for Thomas Gleixner
Commit-ID:  09c7e8b21d67c3c78ab9701dbc0fb1e9f14a0ba5
Gitweb: https://git.kernel.org/tip/09c7e8b21d67c3c78ab9701dbc0fb1e9f14a0ba5
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 23:19:44 +0200
Committer:  Ingo Molnar 
CommitDate: Wed, 31 Jul 2019 19:03:36 +0200

x86/kvm: Use CONFIG_PREEMPTION

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Switch the conditional for async pagefaults to use CONFIG_PREEMPTION.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Masami Hiramatsu 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: http://lkml.kernel.org/r/20190726212124.789755...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b7f34fe2171e..3d07f84c4846 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -314,7 +314,7 @@ static void kvm_guest_cpu_init(void)
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) {
u64 pa = slow_virt_to_phys(this_cpu_ptr(_reason));
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
 #endif
pa |= KVM_ASYNC_PF_ENABLED;


[tip:sched/rt] x86/dumpstack: Indicate PREEMPT_RT in dumps

2019-07-31 Thread tip-bot for Thomas Gleixner
Commit-ID:  cb376c26971ff54f25980ec1f0ae2f06d6a69df0
Gitweb: https://git.kernel.org/tip/cb376c26971ff54f25980ec1f0ae2f06d6a69df0
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 23:19:43 +0200
Committer:  Ingo Molnar 
CommitDate: Wed, 31 Jul 2019 19:03:36 +0200

x86/dumpstack: Indicate PREEMPT_RT in dumps

Stack dumps print whether the kernel has preemption enabled or not. Extend
it so a PREEMPT_RT enabled kernel can be identified.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Masami Hiramatsu 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: http://lkml.kernel.org/r/20190726212124.699136...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/dumpstack.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2b5886401e5f..e07424e19274 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -367,13 +367,18 @@ NOKPROBE_SYMBOL(oops_end);
 
 int __die(const char *str, struct pt_regs *regs, long err)
 {
+   const char *pr = "";
+
/* Save the regs of the first oops for the executive summary later. */
if (!die_counter)
exec_summary_regs = *regs;
 
+   if (IS_ENABLED(CONFIG_PREEMPTION))
+   pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
+
printk(KERN_DEFAULT
   "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0x, ++die_counter,
-  IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
+  pr,
   IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
   debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
   IS_ENABLED(CONFIG_KASAN)   ? " KASAN"   : "",


[tip:sched/rt] x86: Use CONFIG_PREEMPTION

2019-07-31 Thread tip-bot for Thomas Gleixner
Commit-ID:  48593975aeee548f25e256c515fd1d1e3fb2cc20
Gitweb: https://git.kernel.org/tip/48593975aeee548f25e256c515fd1d1e3fb2cc20
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 23:19:42 +0200
Committer:  Ingo Molnar 
CommitDate: Wed, 31 Jul 2019 19:03:35 +0200

x86: Use CONFIG_PREEMPTION

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Switch the entry code, preempt and kprobes conditionals over to
CONFIG_PREEMPTION.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Masami Hiramatsu 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: http://lkml.kernel.org/r/20190726212124.608488...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/entry/entry_32.S  | 6 +++---
 arch/x86/entry/entry_64.S  | 4 ++--
 arch/x86/entry/thunk_32.S  | 2 +-
 arch/x86/entry/thunk_64.S  | 4 ++--
 arch/x86/include/asm/preempt.h | 2 +-
 arch/x86/kernel/kprobes/core.c | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 4f86928246e7..f83ca5aa8b77 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -63,7 +63,7 @@
  * enough to patch inline, increasing performance.
  */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 # define preempt_stop(clobbers)DISABLE_INTERRUPTS(clobbers); 
TRACE_IRQS_OFF
 #else
 # define preempt_stop(clobbers)
@@ -1084,7 +1084,7 @@ restore_all:
INTERRUPT_RETURN
 
 restore_all_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl$0, PER_CPU_VAR(__preempt_count)
jnz .Lno_preempt
@@ -1364,7 +1364,7 @@ ENTRY(xen_hypervisor_callback)
 ENTRY(xen_do_upcall)
 1: mov %esp, %eax
callxen_evtchn_do_upcall
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
callxen_maybe_preempt_hcall
 #endif
jmp ret_from_intr
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3f5a978a02a7..9701464341e4 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -662,7 +662,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 
 /* Returning to kernel space */
 retint_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
/* Interrupts are off */
/* Check if we need preemption */
btl $9, EFLAGS(%rsp)/* were interrupts off? */
@@ -1113,7 +1113,7 @@ ENTRY(xen_do_hypervisor_callback) /* 
do_hypervisor_callback(struct *pt_regs) */
callxen_evtchn_do_upcall
LEAVE_IRQ_STACK
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
callxen_maybe_preempt_hcall
 #endif
jmp error_exit
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index cb3464525b37..2713490611a3 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -34,7 +34,7 @@
THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
 #endif
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
THUNK ___preempt_schedule, preempt_schedule
THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
EXPORT_SYMBOL(___preempt_schedule)
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index cc20465b2867..ea5c4167086c 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -46,7 +46,7 @@
THUNK lockdep_sys_exit_thunk,lockdep_sys_exit
 #endif
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
THUNK ___preempt_schedule, preempt_schedule
THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
EXPORT_SYMBOL(___preempt_schedule)
@@ -55,7 +55,7 @@
 
 #if defined(CONFIG_TRACE_IRQFLAGS) \
  || defined(CONFIG_DEBUG_LOCK_ALLOC) \
- || defined(CONFIG_PREEMPT)
+ || defined(CONFIG_PREEMPTION)
 .L_restore:
popq %r11
popq %r10
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 99a7fa9ab0a3..3d4cb83a8828 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -102,7 +102,7 @@ static __always_inline bool should_resched(int 
preempt_offset)
return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
 }
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
   extern asmlinkage void ___preempt_schedule(void);
 # define __preempt_schedule() \
asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0e0b08008b5a..43fc13c831af 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -580,7 +580,7 @@ static void setup_singlestep(struct kprobe *p, struct 
pt_regs *regs,
if (setup_detour_execution(p, regs, reenter))
return;
 
-#if !defined(CONFIG_PREEMPT)
+#if 

[tip:sched/rt] kprobes: Use CONFIG_PREEMPTION

2019-07-31 Thread tip-bot for Thomas Gleixner
Commit-ID:  92616606368ee01f1163fcfc986116c810cd48ba
Gitweb: https://git.kernel.org/tip/92616606368ee01f1163fcfc986116c810cd48ba
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 23:19:41 +0200
Committer:  Ingo Molnar 
CommitDate: Wed, 31 Jul 2019 19:03:35 +0200

kprobes: Use CONFIG_PREEMPTION

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Switch kprobes conditional over to CONFIG_PREEMPTION.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Masami Hiramatsu 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: http://lkml.kernel.org/r/20190726212124.516286...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9873fc627d61..8bc5f1ffd68e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1906,7 +1906,7 @@ int register_kretprobe(struct kretprobe *rp)
 
/* Pre-allocate memory for max kretprobe instances */
if (rp->maxactive <= 0) {
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
 #else
rp->maxactive = num_possible_cpus();


[tip:sched/rt] tracing: Use CONFIG_PREEMPTION

2019-07-31 Thread tip-bot for Thomas Gleixner
Commit-ID:  30c937043b2db09ae3408f5534824f9ececdb581
Gitweb: https://git.kernel.org/tip/30c937043b2db09ae3408f5534824f9ececdb581
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 23:19:40 +0200
Committer:  Ingo Molnar 
CommitDate: Wed, 31 Jul 2019 19:03:35 +0200

tracing: Use CONFIG_PREEMPTION

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Switch the conditionals in the tracer over to CONFIG_PREEMPTION.

This is the first step to make the tracer work on RT. The other small
tweaks are submitted separately.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Masami Hiramatsu 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: http://lkml.kernel.org/r/20190726212124.409766...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 kernel/trace/Kconfig | 4 ++--
 kernel/trace/ftrace.c| 2 +-
 kernel/trace/ring_buffer_benchmark.c | 2 +-
 kernel/trace/trace_events.c  | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d2c1fe0b451d..6a64d7772870 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -179,7 +179,7 @@ config TRACE_PREEMPT_TOGGLE
 config PREEMPTIRQ_EVENTS
bool "Enable trace events for preempt and irq disable/enable"
select TRACE_IRQFLAGS
-   select TRACE_PREEMPT_TOGGLE if PREEMPT
+   select TRACE_PREEMPT_TOGGLE if PREEMPTION
select GENERIC_TRACER
default n
help
@@ -214,7 +214,7 @@ config PREEMPT_TRACER
bool "Preemption-off Latency Tracer"
default n
depends on !ARCH_USES_GETTIMEOFFSET
-   depends on PREEMPT
+   depends on PREEMPTION
select GENERIC_TRACER
select TRACER_MAX_TRACE
select RING_BUFFER_ALLOW_SWAP
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eca34503f178..a800e867c1a3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2814,7 +2814,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
 * synchornize_rcu_tasks() will wait for those tasks to
 * execute and either schedule voluntarily or enter user space.
 */
-   if (IS_ENABLED(CONFIG_PREEMPT))
+   if (IS_ENABLED(CONFIG_PREEMPTION))
synchronize_rcu_tasks();
 
  free_ops:
diff --git a/kernel/trace/ring_buffer_benchmark.c 
b/kernel/trace/ring_buffer_benchmark.c
index 0564f6db0561..09b0b49f346e 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -267,7 +267,7 @@ static void ring_buffer_producer(void)
if (consumer && !(cnt % wakeup_interval))
wake_up_process(consumer);
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
/*
 * If we are a non preempt kernel, the 10 second run will
 * stop everything while it runs. Instead, we will call
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index c7506bc81b75..5a189fb8ec23 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -255,12 +255,12 @@ void *trace_event_buffer_reserve(struct 
trace_event_buffer *fbuffer,
local_save_flags(fbuffer->flags);
fbuffer->pc = preempt_count();
/*
-* If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
+* If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
 * preemption (adding one to the preempt_count). Since we are
 * interested in the preempt_count at the time the tracepoint was
 * hit, we need to subtract one to offset the increment.
 */
-   if (IS_ENABLED(CONFIG_PREEMPT))
+   if (IS_ENABLED(CONFIG_PREEMPTION))
fbuffer->pc--;
fbuffer->trace_file = trace_file;
 


[tip:sched/rt] locking/spinlocks: Use CONFIG_PREEMPTION

2019-07-31 Thread tip-bot for Thomas Gleixner
Commit-ID:  27972765bd0410fc2ef5e86a41de17c71440a2dd
Gitweb: https://git.kernel.org/tip/27972765bd0410fc2ef5e86a41de17c71440a2dd
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 23:19:39 +0200
Committer:  Ingo Molnar 
CommitDate: Wed, 31 Jul 2019 19:03:35 +0200

locking/spinlocks: Use CONFIG_PREEMPTION

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Adjust the comments in the locking code.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Masami Hiramatsu 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: http://lkml.kernel.org/r/20190726212124.302995...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 include/linux/spinlock.h | 2 +-
 include/linux/spinlock_api_smp.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index ed7c4d6b8235..031ce8617df8 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -214,7 +214,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) 
__releases(lock)
 
 /*
  * Define the various spin_lock methods.  Note we define these
- * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The
+ * regardless of whether CONFIG_SMP or CONFIG_PREEMPTION are set. The
  * various methods are defined as nops in the case they are not
  * required.
  */
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 42dfab89e740..b762eaba4cdf 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -96,7 +96,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 
 /*
  * If lockdep is enabled then we use the non-preemption spin-ops
- * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+ * even on CONFIG_PREEMPTION, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)


[tip:sched/rt] rcu: Use CONFIG_PREEMPTION

2019-07-31 Thread tip-bot for Thomas Gleixner
Commit-ID:  01b1d88b09824bea1a75b0bac04dcf50d9893875
Gitweb: https://git.kernel.org/tip/01b1d88b09824bea1a75b0bac04dcf50d9893875
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 23:19:38 +0200
Committer:  Ingo Molnar 
CommitDate: Wed, 31 Jul 2019 19:03:35 +0200

rcu: Use CONFIG_PREEMPTION

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Switch the conditionals in RCU to use CONFIG_PREEMPTION.

That's the first step towards RCU on RT. The further tweaks are work in
progress. This neither touches the selftest bits which need a closer look
by Paul.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Masami Hiramatsu 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: http://lkml.kernel.org/r/20190726212124.210156...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/Kconfig | 2 +-
 include/linux/rcupdate.h | 2 +-
 include/linux/rcutree.h  | 2 +-
 include/linux/torture.h  | 2 +-
 kernel/rcu/Kconfig   | 8 
 kernel/rcu/tree.c| 6 +++---
 kernel/rcu/tree_stall.h  | 6 +++---
 kernel/trace/Kconfig | 2 +-
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index a7b57dd42c26..c7efbc018f4f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -103,7 +103,7 @@ config STATIC_KEYS_SELFTEST
 config OPTPROBES
def_bool y
depends on KPROBES && HAVE_OPTPROBES
-   select TASKS_RCU if PREEMPT
+   select TASKS_RCU if PREEMPTION
 
 config KPROBES_ON_FTRACE
def_bool y
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8f7167478c1d..c4f76a310443 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -578,7 +578,7 @@ do {
  \
  *
  * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU),
  * it is illegal to block while in an RCU read-side critical section.
- * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPT
+ * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION
  * kernel builds, RCU read-side critical sections may be preempted,
  * but explicit blocking is illegal.  Finally, in preemptible RCU
  * implementations in real-time (with -rt patchset) kernel builds, RCU
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 735601ac27d3..18b1ed9864b0 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -53,7 +53,7 @@ void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 void rcu_end_inkernel_boot(void);
 bool rcu_is_watching(void);
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 void rcu_all_qs(void);
 #endif
 
diff --git a/include/linux/torture.h b/include/linux/torture.h
index a620118385bb..6241f59e2d6f 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -86,7 +86,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
 #define torture_stop_kthread(n, tp) \
_torture_stop_kthread("Stopping " #n " task", &(tp))
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 #define torture_preempt_schedule() preempt_schedule()
 #else
 #define torture_preempt_schedule()
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 480edf328b51..7644eda17d62 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -7,7 +7,7 @@ menu "RCU Subsystem"
 
 config TREE_RCU
bool
-   default y if !PREEMPT && SMP
+   default y if !PREEMPTION && SMP
help
  This option selects the RCU implementation that is
  designed for very large SMP system with hundreds or
@@ -16,7 +16,7 @@ config TREE_RCU
 
 config PREEMPT_RCU
bool
-   default y if PREEMPT
+   default y if PREEMPTION
help
  This option selects the RCU implementation that is
  designed for very large SMP systems with hundreds or
@@ -28,7 +28,7 @@ config PREEMPT_RCU
 
 config TINY_RCU
bool
-   default y if !PREEMPT && !SMP
+   default y if !PREEMPTION && !SMP
help
  This option selects the RCU implementation that is
  designed for UP systems from which real-time response
@@ -70,7 +70,7 @@ config TREE_SRCU
  This option selects the full-fledged version of SRCU.
 
 config TASKS_RCU
-   def_bool PREEMPT
+   def_bool PREEMPTION
select SRCU
help
  This option enables a task-based RCU implementation that uses
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a14e5fbbea46..5962636502bc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1881,7 +1881,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned 
long flags)
struct rcu_node *rnp_p;
 
raw_lockdep_assert_held_rcu_node(rnp);
-   if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)) ||
+   if 

[tip:sched/rt] sched/preempt: Use CONFIG_PREEMPTION where appropriate

2019-07-31 Thread tip-bot for Thomas Gleixner
Commit-ID:  c1a280b68d4e6b6db4a65aa7865c22d8789ddf09
Gitweb: https://git.kernel.org/tip/c1a280b68d4e6b6db4a65aa7865c22d8789ddf09
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 23:19:37 +0200
Committer:  Ingo Molnar 
CommitDate: Wed, 31 Jul 2019 19:03:34 +0200

sched/preempt: Use CONFIG_PREEMPTION where appropriate

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Switch the preemption code, scheduler and init task over to use
CONFIG_PREEMPTION.

That's the first step towards RT in that area. The more complex changes are
coming separately.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Cc: Linus Torvalds 
Cc: Masami Hiramatsu 
Cc: Paolo Bonzini 
Cc: Paul E. McKenney 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: http://lkml.kernel.org/r/20190726212124.117528...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 include/asm-generic/preempt.h |  4 ++--
 include/linux/preempt.h   |  6 +++---
 include/linux/sched.h |  6 +++---
 init/init_task.c  |  2 +-
 init/main.c   |  2 +-
 kernel/sched/core.c   | 14 +++---
 kernel/sched/fair.c   |  2 +-
 kernel/sched/sched.h  |  4 ++--
 8 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index c3046c920063..d683f5e6d791 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -78,11 +78,11 @@ static __always_inline bool should_resched(int 
preempt_offset)
tif_need_resched());
 }
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 extern asmlinkage void preempt_schedule(void);
 #define __preempt_schedule() preempt_schedule()
 extern asmlinkage void preempt_schedule_notrace(void);
 #define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index dd92b1a93919..bbb68dba37cc 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -182,7 +182,7 @@ do { \
 
 #define preemptible()  (preempt_count() == 0 && !irqs_disabled())
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 #define preempt_enable() \
 do { \
barrier(); \
@@ -203,7 +203,7 @@ do { \
__preempt_schedule(); \
 } while (0)
 
-#else /* !CONFIG_PREEMPT */
+#else /* !CONFIG_PREEMPTION */
 #define preempt_enable() \
 do { \
barrier(); \
@@ -217,7 +217,7 @@ do { \
 } while (0)
 
 #define preempt_check_resched() do { } while (0)
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #define preempt_disable_notrace() \
 do { \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..6947516a2d3e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1767,7 +1767,7 @@ static inline int test_tsk_need_resched(struct 
task_struct *tsk)
  * value indicates whether a reschedule was done in fact.
  * cond_resched_lock() will drop the spinlock before scheduling,
  */
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 extern int _cond_resched(void);
 #else
 static inline int _cond_resched(void) { return 0; }
@@ -1796,12 +1796,12 @@ static inline void cond_resched_rcu(void)
 
 /*
  * Does a critical section need to be broken due to another
- * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
  * but a general need for low latency)
  */
 static inline int spin_needbreak(spinlock_t *lock)
 {
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
return spin_is_contended(lock);
 #else
return 0;
diff --git a/init/init_task.c b/init/init_task.c
index 7ab773b9b3cd..bfe06c53b14e 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -174,7 +174,7 @@ struct task_struct init_task
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
.ret_stack  = NULL,
 #endif
-#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT)
+#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION)
.trace_recursion = 0,
 #endif
 #ifdef CONFIG_LIVEPATCH
diff --git a/init/main.c b/init/main.c
index 96f8d5af52d6..653693da8da6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -433,7 +433,7 @@ noinline void __ref rest_init(void)
 
/*
 * Enable might_sleep() and smp_processor_id() checks.
-* They cannot be enabled earlier because with CONFIG_PREEMPT=y
+* They cannot be enabled earlier because with CONFIG_PREEMPTION=y
 * kernel_thread() would trigger might_sleep() splats. With
 * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
 * already, but it's stuck on the kthreadd_done completion.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..604a5e137efe 100644
--- a/kernel/sched/core.c
+++ 

[tip:timers/urgent] arm64: compat: vdso: Use legacy syscalls as fallback

2019-07-30 Thread tip-bot for Thomas Gleixner
Commit-ID:  33a58980ff3cc5dbf0bb1b325746ac69223eda0b
Gitweb: https://git.kernel.org/tip/33a58980ff3cc5dbf0bb1b325746ac69223eda0b
Author: Thomas Gleixner 
AuthorDate: Sun, 28 Jul 2019 15:12:56 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 31 Jul 2019 00:09:10 +0200

arm64: compat: vdso: Use legacy syscalls as fallback

The generic VDSO implementation uses the Y2038 safe clock_gettime64() and
clock_getres_time64() syscalls as fallback for 32bit VDSO. This breaks
seccomp setups because these syscalls might be not (yet) allowed.

Implement the 32bit variants which use the legacy syscalls and select the
variant in the core library.

The 64bit time variants are not removed because they are required for the
time64 based vdso accessors.

Fixes: 00b26474c2f1 ("lib/vdso: Provide generic VDSO implementation")
Reported-by: Sean Christopherson 
Reported-by: Paul Bolle 
Suggested-by: Andy Lutomirski 
Signed-off-by: Thomas Gleixner 
Tested-by: Vincenzo Frascino 
Reviewed-by: Vincenzo Frascino 
Link: https://lkml.kernel.org/r/20190728131648.971361...@linutronix.de

---
 arch/arm64/include/asm/vdso/compat_gettimeofday.h | 40 +++
 1 file changed, 40 insertions(+)

diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h 
b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index f4812777f5c5..c50ee1b7d5cd 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -16,6 +16,8 @@
 
 #define VDSO_HAS_CLOCK_GETRES  1
 
+#define VDSO_HAS_32BIT_FALLBACK1
+
 static __always_inline
 int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
  struct timezone *_tz)
@@ -51,6 +53,23 @@ long clock_gettime_fallback(clockid_t _clkid, struct 
__kernel_timespec *_ts)
return ret;
 }
 
+static __always_inline
+long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+   register struct old_timespec32 *ts asm("r1") = _ts;
+   register clockid_t clkid asm("r0") = _clkid;
+   register long ret asm ("r0");
+   register long nr asm("r7") = __NR_compat_clock_gettime;
+
+   asm volatile(
+   "   swi #0\n"
+   : "=r" (ret)
+   : "r" (clkid), "r" (ts), "r" (nr)
+   : "memory");
+
+   return ret;
+}
+
 static __always_inline
 int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
 {
@@ -72,6 +91,27 @@ int clock_getres_fallback(clockid_t _clkid, struct 
__kernel_timespec *_ts)
return ret;
 }
 
+static __always_inline
+int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+   register struct old_timespec32 *ts asm("r1") = _ts;
+   register clockid_t clkid asm("r0") = _clkid;
+   register long ret asm ("r0");
+   register long nr asm("r7") = __NR_compat_clock_getres;
+
+   /* The checks below are required for ABI consistency with arm */
+   if ((_clkid >= MAX_CLOCKS) && (_ts == NULL))
+   return -EINVAL;
+
+   asm volatile(
+   "   swi #0\n"
+   : "=r" (ret)
+   : "r" (clkid), "r" (ts), "r" (nr)
+   : "memory");
+
+   return ret;
+}
+
 static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
 {
u64 res;


[tip:timers/urgent] x86/vdso/32: Use 32bit syscall fallback

2019-07-30 Thread tip-bot for Thomas Gleixner
Commit-ID:  d2f5d3fa26196183adb44a413c44caa9872275b4
Gitweb: https://git.kernel.org/tip/d2f5d3fa26196183adb44a413c44caa9872275b4
Author: Thomas Gleixner 
AuthorDate: Sun, 28 Jul 2019 15:12:55 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 31 Jul 2019 00:09:10 +0200

x86/vdso/32: Use 32bit syscall fallback

The generic VDSO implementation uses the Y2038 safe clock_gettime64() and
clock_getres_time64() syscalls as fallback for 32bit VDSO. This breaks
seccomp setups because these syscalls might be not (yet) allowed.

Implement the 32bit variants which use the legacy syscalls and select the
variant in the core library.

The 64bit time variants are not removed because they are required for the
time64 based vdso accessors.

Fixes: 7ac870747988 ("x86/vdso: Switch to generic vDSO implementation")
Reported-by: Sean Christopherson 
Reported-by: Paul Bolle 
Suggested-by: Andy Lutomirski 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Vincenzo Frascino 
Reviewed-by: Andy Lutomirski 
Link: https://lkml.kernel.org/r/20190728131648.879156...@linutronix.de

---
 arch/x86/include/asm/vdso/gettimeofday.h | 36 
 1 file changed, 36 insertions(+)

diff --git a/arch/x86/include/asm/vdso/gettimeofday.h 
b/arch/x86/include/asm/vdso/gettimeofday.h
index ae91429129a6..ba71a63cdac4 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -96,6 +96,8 @@ long clock_getres_fallback(clockid_t _clkid, struct 
__kernel_timespec *_ts)
 
 #else
 
+#define VDSO_HAS_32BIT_FALLBACK1
+
 static __always_inline
 long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
 {
@@ -113,6 +115,23 @@ long clock_gettime_fallback(clockid_t _clkid, struct 
__kernel_timespec *_ts)
return ret;
 }
 
+static __always_inline
+long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+   long ret;
+
+   asm (
+   "mov %%ebx, %%edx \n"
+   "mov %[clock], %%ebx \n"
+   "call __kernel_vsyscall \n"
+   "mov %%edx, %%ebx \n"
+   : "=a" (ret), "=m" (*_ts)
+   : "0" (__NR_clock_gettime), [clock] "g" (_clkid), "c" (_ts)
+   : "edx");
+
+   return ret;
+}
+
 static __always_inline
 long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
   struct timezone *_tz)
@@ -148,6 +167,23 @@ clock_getres_fallback(clockid_t _clkid, struct 
__kernel_timespec *_ts)
return ret;
 }
 
+static __always_inline
+long clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+   long ret;
+
+   asm (
+   "mov %%ebx, %%edx \n"
+   "mov %[clock], %%ebx \n"
+   "call __kernel_vsyscall \n"
+   "mov %%edx, %%ebx \n"
+   : "=a" (ret), "=m" (*_ts)
+   : "0" (__NR_clock_getres), [clock] "g" (_clkid), "c" (_ts)
+   : "edx");
+
+   return ret;
+}
+
 #endif
 
 #ifdef CONFIG_PARAVIRT_CLOCK


[tip:timers/urgent] lib/vdso/32: Provide legacy syscall fallbacks

2019-07-30 Thread tip-bot for Thomas Gleixner
Commit-ID:  c60a32ea4f459f99b98d383cad3b1ac7cfb3f4be
Gitweb: https://git.kernel.org/tip/c60a32ea4f459f99b98d383cad3b1ac7cfb3f4be
Author: Thomas Gleixner 
AuthorDate: Tue, 30 Jul 2019 11:38:50 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 31 Jul 2019 00:09:09 +0200

lib/vdso/32: Provide legacy syscall fallbacks

To address the regression which causes seccomp to deny applications the
access to clock_gettime64() and clock_getres64() syscalls because they
are not enabled in the existing filters.

That trips over the fact that 32bit VDSOs use the new clock_gettime64() and
clock_getres64() syscalls in the fallback path.

Add a conditional to invoke the 32bit legacy fallback syscalls instead of
the new 64bit variants. The conditional can go away once all architectures
are converted.

Fixes: 00b26474c2f1 ("lib/vdso: Provide generic VDSO implementation")
Signed-off-by: Thomas Gleixner 
Tested-by: Sean Christopherson 
Reviewed-by: Sean Christopherson 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1907301134470.1...@nanos.tec.linutronix.de

---
 lib/vdso/gettimeofday.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index a9e7fd029593..e630e7ff57f1 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -125,14 +125,18 @@ __cvdso_clock_gettime32(clockid_t clock, struct 
old_timespec32 *res)
 
ret = __cvdso_clock_gettime_common(clock, );
 
+#ifdef VDSO_HAS_32BIT_FALLBACK
+   if (unlikely(ret))
+   return clock_gettime32_fallback(clock, res);
+#else
if (unlikely(ret))
ret = clock_gettime_fallback(clock, );
+#endif
 
if (likely(!ret)) {
res->tv_sec = ts.tv_sec;
res->tv_nsec = ts.tv_nsec;
}
-
return ret;
 }
 
@@ -232,8 +236,14 @@ __cvdso_clock_getres_time32(clockid_t clock, struct 
old_timespec32 *res)
int ret;
 
ret = __cvdso_clock_getres_common(clock, );
+
+#ifdef VDSO_HAS_32BIT_FALLBACK
+   if (unlikely(ret))
+   return clock_getres32_fallback(clock, res);
+#else
if (unlikely(ret))
ret = clock_getres_fallback(clock, );
+#endif
 
if (likely(!ret)) {
res->tv_sec = ts.tv_sec;


[tip:timers/urgent] lib/vdso/32: Remove inconsistent NULL pointer checks

2019-07-30 Thread tip-bot for Thomas Gleixner
Commit-ID:  a9446a906f52292c52ecbd5be78eaa4d8395756c
Gitweb: https://git.kernel.org/tip/a9446a906f52292c52ecbd5be78eaa4d8395756c
Author: Thomas Gleixner 
AuthorDate: Sun, 28 Jul 2019 15:12:52 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 31 Jul 2019 00:09:09 +0200

lib/vdso/32: Remove inconsistent NULL pointer checks

The 32bit variants of vdso_clock_gettime()/getres() have a NULL pointer
check for the timespec pointer. That's inconsistent vs. 64bit.

But the vdso implementation will never be consistent versus the syscall
because the only case which it can handle is NULL. Any other invalid
pointer will cause a segfault. So special casing NULL is not really useful.

Remove it along with the superflouos syscall fallback invocation as that
will return -EFAULT anyway. That also gets rid of the dubious typecast
which only works because the pointer is NULL.

Fixes: 00b26474c2f1 ("lib/vdso: Provide generic VDSO implementation")
Signed-off-by: Thomas Gleixner 
Tested-by: Vincenzo Frascino 
Reviewed-by: Vincenzo Frascino 
Reviewed-by: Andy Lutomirski 
Link: https://lkml.kernel.org/r/20190728131648.587523...@linutronix.de

---
 lib/vdso/gettimeofday.c | 18 ++
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 2d1c1f241fd9..e28f5a607a5f 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -115,9 +115,6 @@ __cvdso_clock_gettime32(clockid_t clock, struct 
old_timespec32 *res)
struct __kernel_timespec ts;
int ret;
 
-   if (res == NULL)
-   goto fallback;
-
ret = __cvdso_clock_gettime(clock, );
 
if (ret == 0) {
@@ -126,9 +123,6 @@ __cvdso_clock_gettime32(clockid_t clock, struct 
old_timespec32 *res)
}
 
return ret;
-
-fallback:
-   return clock_gettime_fallback(clock, (struct __kernel_timespec *)res);
 }
 
 static __maybe_unused int
@@ -204,10 +198,8 @@ int __cvdso_clock_getres(clockid_t clock, struct 
__kernel_timespec *res)
goto fallback;
}
 
-   if (res) {
-   res->tv_sec = 0;
-   res->tv_nsec = ns;
-   }
+   res->tv_sec = 0;
+   res->tv_nsec = ns;
 
return 0;
 
@@ -221,9 +213,6 @@ __cvdso_clock_getres_time32(clockid_t clock, struct 
old_timespec32 *res)
struct __kernel_timespec ts;
int ret;
 
-   if (res == NULL)
-   goto fallback;
-
ret = __cvdso_clock_getres(clock, );
 
if (ret == 0) {
@@ -232,8 +221,5 @@ __cvdso_clock_getres_time32(clockid_t clock, struct 
old_timespec32 *res)
}
 
return ret;
-
-fallback:
-   return clock_getres_fallback(clock, (struct __kernel_timespec *)res);
 }
 #endif /* VDSO_HAS_CLOCK_GETRES */


[tip:timers/urgent] lib/vdso: Move fallback invocation to the callers

2019-07-30 Thread tip-bot for Thomas Gleixner
Commit-ID:  502a590a170b3b3d0ad998ee0b639ac0b3db1dfa
Gitweb: https://git.kernel.org/tip/502a590a170b3b3d0ad998ee0b639ac0b3db1dfa
Author: Thomas Gleixner 
AuthorDate: Sun, 28 Jul 2019 15:12:53 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 31 Jul 2019 00:09:09 +0200

lib/vdso: Move fallback invocation to the callers

To allow syscall fallbacks using the legacy 32bit syscall for 32bit VDSO
builds, move the fallback invocation out into the callers.

Split the common code out of __cvdso_clock_gettime/getres() and invoke the
syscall fallback in the 64bit and 32bit variants.

Preparatory work for using legacy syscalls in 32bit VDSO. No functional
change.

Fixes: 00b26474c2f1 ("lib/vdso: Provide generic VDSO implementation")
Signed-off-by: Thomas Gleixner 
Tested-by: Vincenzo Frascino 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Vincenzo Frascino 
Link: https://lkml.kernel.org/r/20190728131648.695579...@linutronix.de

---
 lib/vdso/gettimeofday.c | 53 +
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index e28f5a607a5f..a9e7fd029593 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -51,7 +51,7 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,
ns = vdso_ts->nsec;
last = vd->cycle_last;
if (unlikely((s64)cycles < 0))
-   return clock_gettime_fallback(clk, ts);
+   return -1;
 
ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
ns >>= vd->shift;
@@ -82,14 +82,14 @@ static void do_coarse(const struct vdso_data *vd, clockid_t 
clk,
 }
 
 static __maybe_unused int
-__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+__cvdso_clock_gettime_common(clockid_t clock, struct __kernel_timespec *ts)
 {
const struct vdso_data *vd = __arch_get_vdso_data();
u32 msk;
 
/* Check for negative values or invalid clocks */
if (unlikely((u32) clock >= MAX_CLOCKS))
-   goto fallback;
+   return -1;
 
/*
 * Convert the clockid to a bitmask and use it to check which
@@ -104,9 +104,17 @@ __cvdso_clock_gettime(clockid_t clock, struct 
__kernel_timespec *ts)
} else if (msk & VDSO_RAW) {
return do_hres([CS_RAW], clock, ts);
}
+   return -1;
+}
 
-fallback:
-   return clock_gettime_fallback(clock, ts);
+static __maybe_unused int
+__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+   int ret = __cvdso_clock_gettime_common(clock, ts);
+
+   if (unlikely(ret))
+   return clock_gettime_fallback(clock, ts);
+   return 0;
 }
 
 static __maybe_unused int
@@ -115,9 +123,12 @@ __cvdso_clock_gettime32(clockid_t clock, struct 
old_timespec32 *res)
struct __kernel_timespec ts;
int ret;
 
-   ret = __cvdso_clock_gettime(clock, );
+   ret = __cvdso_clock_gettime_common(clock, );
 
-   if (ret == 0) {
+   if (unlikely(ret))
+   ret = clock_gettime_fallback(clock, );
+
+   if (likely(!ret)) {
res->tv_sec = ts.tv_sec;
res->tv_nsec = ts.tv_nsec;
}
@@ -163,17 +174,18 @@ static __maybe_unused time_t __cvdso_time(time_t *time)
 
 #ifdef VDSO_HAS_CLOCK_GETRES
 static __maybe_unused
-int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
+int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
 {
const struct vdso_data *vd = __arch_get_vdso_data();
-   u64 ns;
+   u64 hrtimer_res;
u32 msk;
-   u64 hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
+   u64 ns;
 
/* Check for negative values or invalid clocks */
if (unlikely((u32) clock >= MAX_CLOCKS))
-   goto fallback;
+   return -1;
 
+   hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
/*
 * Convert the clockid to a bitmask and use it to check which
 * clocks are handled in the VDSO directly.
@@ -195,16 +207,22 @@ int __cvdso_clock_getres(clockid_t clock, struct 
__kernel_timespec *res)
 */
ns = hrtimer_res;
} else {
-   goto fallback;
+   return -1;
}
 
res->tv_sec = 0;
res->tv_nsec = ns;
 
return 0;
+}
+
+int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
+{
+   int ret = __cvdso_clock_getres_common(clock, res);
 
-fallback:
-   return clock_getres_fallback(clock, res);
+   if (unlikely(ret))
+   return clock_getres_fallback(clock, res);
+   return 0;
 }
 
 static __maybe_unused int
@@ -213,13 +231,14 @@ __cvdso_clock_getres_time32(clockid_t clock, struct 
old_timespec32 *res)
struct __kernel_timespec ts;
int ret;
 
-   ret = __cvdso_clock_getres(clock, );

[tip:timers/core] perf/core: Mark hrtimers to expire in hard interrupt context

2019-07-30 Thread tip-bot for Thomas Gleixner
Commit-ID:  c23a8bd3ac02df2ca5e77396df1dee247db3d49f
Gitweb: https://git.kernel.org/tip/c23a8bd3ac02df2ca5e77396df1dee247db3d49f
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 20:30:53 +0200
Committer:  Thomas Gleixner 
CommitDate: Tue, 30 Jul 2019 23:57:54 +0200

perf/core: Mark hrtimers to expire in hard interrupt context

To guarantee that the multiplexing mechanism and the hrtimer driven events
work on PREEMPT_RT enabled kernels it's required that the related hrtimers
expire in hard interrupt context. Mark them so PREEMPT_RT kernels wont
defer them to soft interrupt context.

No functional change.

[ tglx: Split out of larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190726185753.169509...@linutronix.de

---
 kernel/events/core.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..9d623e257a51 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1103,7 +1103,7 @@ static void __perf_mux_hrtimer_init(struct 
perf_cpu_context *cpuctx, int cpu)
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
 
raw_spin_lock_init(>hrtimer_lock);
-   hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+   hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
timer->function = perf_mux_hrtimer_handler;
 }
 
@@ -1121,7 +1121,7 @@ static int perf_mux_hrtimer_restart(struct 
perf_cpu_context *cpuctx)
if (!cpuctx->hrtimer_active) {
cpuctx->hrtimer_active = 1;
hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
-   hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+   hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
}
raw_spin_unlock_irqrestore(>hrtimer_lock, flags);
 
@@ -9491,7 +9491,7 @@ static void perf_swevent_start_hrtimer(struct perf_event 
*event)
period = max_t(u64, 1, hwc->sample_period);
}
hrtimer_start(>hrtimer, ns_to_ktime(period),
- HRTIMER_MODE_REL_PINNED);
+ HRTIMER_MODE_REL_PINNED_HARD);
 }
 
 static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@ -9513,7 +9513,7 @@ static void perf_swevent_init_hrtimer(struct perf_event 
*event)
if (!is_sampling_event(event))
return;
 
-   hrtimer_init(>hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+   hrtimer_init(>hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hwc->hrtimer.function = perf_swevent_hrtimer;
 
/*


[tip:timers/core] sched: Mark hrtimers to expire in hard interrupt context

2019-07-30 Thread tip-bot for Thomas Gleixner
Commit-ID:  b78b1e6b5b77b265a94e3027e6f0dcaad33faf9f
Gitweb: https://git.kernel.org/tip/b78b1e6b5b77b265a94e3027e6f0dcaad33faf9f
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 20:30:52 +0200
Committer:  Thomas Gleixner 
CommitDate: Tue, 30 Jul 2019 23:57:54 +0200

sched: Mark hrtimers to expire in hard interrupt context

The scheduler related hrtimers need to expire in hard interrupt context
even on PREEMPT_RT enabled kernels. Mark then as such.

No functional change.

[ tglx: Split out from larger combo patch. Add changelog. ]

Signed-off-by: Sebastian Andrzej Siewior 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190726185753.077004...@linutronix.de

---
 kernel/sched/core.c | 6 +++---
 kernel/sched/deadline.c | 4 ++--
 kernel/sched/rt.c   | 7 ---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..389e0993fbb4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -255,7 +255,7 @@ static void __hrtick_restart(struct rq *rq)
 {
struct hrtimer *timer = >hrtick_timer;
 
-   hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+   hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
 }
 
 /*
@@ -314,7 +314,7 @@ void hrtick_start(struct rq *rq, u64 delay)
 */
delay = max_t(u64, delay, 1LL);
hrtimer_start(>hrtick_timer, ns_to_ktime(delay),
- HRTIMER_MODE_REL_PINNED);
+ HRTIMER_MODE_REL_PINNED_HARD);
 }
 #endif /* CONFIG_SMP */
 
@@ -328,7 +328,7 @@ static void hrtick_rq_init(struct rq *rq)
rq->hrtick_csd.info = rq;
 #endif
 
-   hrtimer_init(>hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+   hrtimer_init(>hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
rq->hrtick_timer.function = hrtick;
 }
 #else  /* CONFIG_SCHED_HRTICK */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ef5b9f6b1d42..0359612d5443 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -923,7 +923,7 @@ static int start_dl_timer(struct task_struct *p)
 */
if (!hrtimer_is_queued(timer)) {
get_task_struct(p);
-   hrtimer_start(timer, act, HRTIMER_MODE_ABS);
+   hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
}
 
return 1;
@@ -1053,7 +1053,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 {
struct hrtimer *timer = _se->dl_timer;
 
-   hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+   hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
timer->function = dl_task_timer;
 }
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a532558a5176..da3e85e61013 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, 
u64 runtime)
 
raw_spin_lock_init(_b->rt_runtime_lock);
 
-   hrtimer_init(_b->rt_period_timer,
-   CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+   hrtimer_init(_b->rt_period_timer, CLOCK_MONOTONIC,
+HRTIMER_MODE_REL_HARD);
rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 
@@ -67,7 +67,8 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 * to update the period.
 */
hrtimer_forward_now(_b->rt_period_timer, ns_to_ktime(0));
-   hrtimer_start_expires(_b->rt_period_timer, 
HRTIMER_MODE_ABS_PINNED);
+   hrtimer_start_expires(_b->rt_period_timer,
+ HRTIMER_MODE_ABS_PINNED_HARD);
}
raw_spin_unlock(_b->rt_runtime_lock);
 }


[tip:timers/core] hrtimer: Remove task argument from hrtimer_init_sleeper()

2019-07-30 Thread tip-bot for Thomas Gleixner
Commit-ID:  b74494872555d1f7888dfd9225700a363f4a84fc
Gitweb: https://git.kernel.org/tip/b74494872555d1f7888dfd9225700a363f4a84fc
Author: Thomas Gleixner 
AuthorDate: Fri, 26 Jul 2019 20:30:49 +0200
Committer:  Thomas Gleixner 
CommitDate: Tue, 30 Jul 2019 23:57:51 +0200

hrtimer: Remove task argument from hrtimer_init_sleeper()

All callers hand in 'current' and that's the only task pointer which
actually makes sense. Remove the task argument and set current in the
function.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Steven Rostedt (VMware) 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190726185752.791885...@linutronix.de

---
 block/blk-mq.c | 2 +-
 drivers/staging/android/vsoc.c | 2 +-
 include/linux/hrtimer.h| 3 +--
 include/linux/wait.h   | 2 +-
 kernel/futex.c | 2 +-
 kernel/time/hrtimer.c  | 8 
 net/core/pktgen.c  | 2 +-
 7 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b038ec680e84..5f647cb8c695 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3418,7 +3418,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue 
*q,
hrtimer_init_on_stack(, CLOCK_MONOTONIC, mode);
hrtimer_set_expires(, kt);
 
-   hrtimer_init_sleeper(, current);
+   hrtimer_init_sleeper();
do {
if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
break;
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
index 00a1ec7b9154..ce480bcf20d2 100644
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c
@@ -442,7 +442,7 @@ static int handle_vsoc_cond_wait(struct file *filp, struct 
vsoc_cond_wait *arg)
hrtimer_set_expires_range_ns(>timer, wake_time,
 current->timer_slack_ns);
 
-   hrtimer_init_sleeper(to, current);
+   hrtimer_init_sleeper(to);
}
 
while (1) {
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4971100a8cab..3c74f89367c4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -463,8 +463,7 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
  const enum hrtimer_mode mode,
  const clockid_t clockid);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
-struct task_struct *tsk);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl);
 
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
const enum hrtimer_mode mode);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index b6f77cf60dd7..d57832774ca6 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -489,7 +489,7 @@ do {
\
struct hrtimer_sleeper __t; 
\

\
hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);   
\
-   hrtimer_init_sleeper(&__t, current);
\
+   hrtimer_init_sleeper(&__t); 
\
if ((timeout) != KTIME_MAX) 
\
hrtimer_start_range_ns(&__t.timer, timeout, 
\
   current->timer_slack_ns, 
\
diff --git a/kernel/futex.c b/kernel/futex.c
index 6d50728ef2e7..5e9842ea4012 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -490,7 +490,7 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper 
*timeout,
hrtimer_init_on_stack(>timer, (flags & FLAGS_CLOCKRT) ?
  CLOCK_REALTIME : CLOCK_MONOTONIC,
  HRTIMER_MODE_ABS);
-   hrtimer_init_sleeper(timeout, current);
+   hrtimer_init_sleeper(timeout);
 
/*
 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5ee77f1a8a92..de895d86800c 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1639,10 +1639,10 @@ static enum hrtimer_restart hrtimer_wakeup(struct 
hrtimer *timer)
return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
 {
sl->timer.function = hrtimer_wakeup;
-   sl->task = task;
+   sl->task = current;
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
@@ -1669,7 +1669,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper 
*t, enum hrtimer_mode mod
 {
struct restart_block *restart;
 
-   hrtimer_init_sleeper(t, current);
+   

[tip:x86/apic] x86/apic/x2apic: Implement IPI shorthands support

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  43931d350f30c6cd8c2f498d54ef7d65750abc92
Gitweb: https://git.kernel.org/tip/43931d350f30c6cd8c2f498d54ef7d65750abc92
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:30 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:02 +0200

x86/apic/x2apic: Implement IPI shorthands support

All callers of apic->send_IPI_all() and apic->send_IPI_allbutself() contain
the decision logic for shorthand invocation already and invoke
send_IPI_mask() if the prereqisites are not satisfied.

Implement shorthand support for x2apic.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105221.134696...@linutronix.de

---
 arch/x86/kernel/apic/local.h  |  1 +
 arch/x86/kernel/apic/x2apic_cluster.c |  4 ++--
 arch/x86/kernel/apic/x2apic_phys.c| 12 ++--
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index 69ba777cef98..04797f05ce94 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -23,6 +23,7 @@ unsigned int x2apic_get_apic_id(unsigned long id);
 u32 x2apic_set_apic_id(unsigned int id);
 int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
 void x2apic_send_IPI_self(int vector);
+void __x2apic_send_IPI_shorthand(int vector, u32 which);
 
 /* IPI */
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c 
b/arch/x86/kernel/apic/x2apic_cluster.c
index d0a13c88f777..45e92cba92f5 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -82,12 +82,12 @@ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, 
int vector)
 
 static void x2apic_send_IPI_allbutself(int vector)
 {
-   __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+   __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-   __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+   __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC);
 }
 
 static u32 x2apic_calc_apicid(unsigned int cpu)
diff --git a/arch/x86/kernel/apic/x2apic_phys.c 
b/arch/x86/kernel/apic/x2apic_phys.c
index 5d50e1f9d4bf..bc9693841353 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -75,12 +75,12 @@ static void
 
 static void x2apic_send_IPI_allbutself(int vector)
 {
-   __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+   __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLBUT);
 }
 
 static void x2apic_send_IPI_all(int vector)
 {
-   __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+   __x2apic_send_IPI_shorthand(vector, APIC_DEST_ALLINC);
 }
 
 static void init_x2apic_ldr(void)
@@ -112,6 +112,14 @@ void __x2apic_send_IPI_dest(unsigned int apicid, int 
vector, unsigned int dest)
native_x2apic_icr_write(cfg, apicid);
 }
 
+void __x2apic_send_IPI_shorthand(int vector, u32 which)
+{
+   unsigned long cfg = __prepare_ICR(which, vector, 0);
+
+   x2apic_wrmsr_fence();
+   native_x2apic_icr_write(cfg, 0);
+}
+
 unsigned int x2apic_get_apic_id(unsigned long id)
 {
return id;


[tip:x86/apic] x86/apic/flat64: Remove the IPI shorthand decision logic

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  2510d09e9dabc265341f164e0b45b2dfdcb7ef36
Gitweb: https://git.kernel.org/tip/2510d09e9dabc265341f164e0b45b2dfdcb7ef36
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:29 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:02 +0200

x86/apic/flat64: Remove the IPI shorthand decision logic

All callers of apic->send_IPI_all() and apic->send_IPI_allbutself() contain
the decision logic for shorthand invocation already and invoke
send_IPI_mask() if the prereqisites are not satisfied.

Remove the now redundant decision logic in the APIC code and the duplicate
helper in probe_64.c.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105221.042964...@linutronix.de

---
 arch/x86/include/asm/apic.h |  4 ---
 arch/x86/kernel/apic/apic_flat_64.c | 49 +
 arch/x86/kernel/apic/probe_64.c |  7 --
 3 files changed, 6 insertions(+), 54 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index de86c6c15228..2ebc17d9c72c 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -468,10 +468,6 @@ static inline unsigned default_get_apic_id(unsigned long x)
 #define TRAMPOLINE_PHYS_LOW0x467
 #define TRAMPOLINE_PHYS_HIGH   0x469
 
-#ifdef CONFIG_X86_64
-extern void apic_send_IPI_self(int vector);
-#endif
-
 extern void generic_bigsmp_probe(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/apic/apic_flat_64.c 
b/arch/x86/kernel/apic/apic_flat_64.c
index 004611a44962..7862b152a052 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -76,33 +76,6 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, 
int vector)
_flat_send_IPI_mask(mask, vector);
 }
 
-static void flat_send_IPI_allbutself(int vector)
-{
-   int cpu = smp_processor_id();
-
-   if (IS_ENABLED(CONFIG_HOTPLUG_CPU) || vector == NMI_VECTOR) {
-   if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
-   unsigned long mask = cpumask_bits(cpu_online_mask)[0];
-
-   if (cpu < BITS_PER_LONG)
-   __clear_bit(cpu, );
-
-   _flat_send_IPI_mask(mask, vector);
-   }
-   } else if (num_online_cpus() > 1) {
-   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
-   }
-}
-
-static void flat_send_IPI_all(int vector)
-{
-   if (vector == NMI_VECTOR) {
-   flat_send_IPI_mask(cpu_online_mask, vector);
-   } else {
-   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
-   }
-}
-
 static unsigned int flat_get_apic_id(unsigned long x)
 {
return (x >> 24) & 0xFF;
@@ -164,9 +137,9 @@ static struct apic apic_flat __ro_after_init = {
.send_IPI   = default_send_IPI_single,
.send_IPI_mask  = flat_send_IPI_mask,
.send_IPI_mask_allbutself   = flat_send_IPI_mask_allbutself,
-   .send_IPI_allbutself= flat_send_IPI_allbutself,
-   .send_IPI_all   = flat_send_IPI_all,
-   .send_IPI_self  = apic_send_IPI_self,
+   .send_IPI_allbutself= default_send_IPI_allbutself,
+   .send_IPI_all   = default_send_IPI_all,
+   .send_IPI_self  = default_send_IPI_self,
 
.inquire_remote_apic= default_inquire_remote_apic,
 
@@ -216,16 +189,6 @@ static void physflat_init_apic_ldr(void)
 */
 }
 
-static void physflat_send_IPI_allbutself(int vector)
-{
-   default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
-}
-
-static void physflat_send_IPI_all(int vector)
-{
-   default_send_IPI_mask_sequence_phys(cpu_online_mask, vector);
-}
-
 static int physflat_probe(void)
 {
if (apic == _physflat || num_possible_cpus() > 8 ||
@@ -267,9 +230,9 @@ static struct apic apic_physflat __ro_after_init = {
.send_IPI   = default_send_IPI_single_phys,
.send_IPI_mask  = default_send_IPI_mask_sequence_phys,
.send_IPI_mask_allbutself   = default_send_IPI_mask_allbutself_phys,
-   .send_IPI_allbutself= physflat_send_IPI_allbutself,
-   .send_IPI_all   = physflat_send_IPI_all,
-   .send_IPI_self  = apic_send_IPI_self,
+   .send_IPI_allbutself= default_send_IPI_allbutself,
+   .send_IPI_all   = default_send_IPI_all,
+   .send_IPI_self  = default_send_IPI_self,
 
.inquire_remote_apic= default_inquire_remote_apic,
 
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index fb457b540e78..29f0e0984557 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -36,13 +36,6 @@ void __init 

[tip:x86/apic] x86/apic: Share common IPI helpers

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  dea978632e8400b84888bad20df0cd91c18f0aec
Gitweb: https://git.kernel.org/tip/dea978632e8400b84888bad20df0cd91c18f0aec
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:28 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:02 +0200

x86/apic: Share common IPI helpers

The 64bit implementations need the same wrappers around
__default_send_IPI_shortcut() as 32bit.

Move them out of the 32bit section.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.951534...@linutronix.de

---
 arch/x86/kernel/apic/ipi.c   | 30 +++---
 arch/x86/kernel/apic/local.h |  6 +++---
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 71363b0d4a67..6ca0f91372fd 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -226,6 +226,21 @@ void default_send_IPI_single(int cpu, int vector)
apic->send_IPI_mask(cpumask_of(cpu), vector);
 }
 
+void default_send_IPI_allbutself(int vector)
+{
+   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+void default_send_IPI_all(int vector)
+{
+   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+void default_send_IPI_self(int vector)
+{
+   __default_send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
 #ifdef CONFIG_X86_32
 
 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
@@ -285,21 +300,6 @@ void default_send_IPI_mask_logical(const struct cpumask 
*cpumask, int vector)
local_irq_restore(flags);
 }
 
-void default_send_IPI_allbutself(int vector)
-{
-   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
-}
-
-void default_send_IPI_all(int vector)
-{
-   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
-}
-
-void default_send_IPI_self(int vector)
-{
-   __default_send_IPI_shortcut(APIC_DEST_SELF, vector);
-}
-
 /* must come after the send_IPI functions above for inlining */
 static int convert_apicid_to_cpu(int apic_id)
 {
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index 391594cd5ca9..69ba777cef98 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -56,12 +56,12 @@ void default_send_IPI_single(int cpu, int vector);
 void default_send_IPI_single_phys(int cpu, int vector);
 void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int 
vector);
 void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int 
vector);
+void default_send_IPI_allbutself(int vector);
+void default_send_IPI_all(int vector);
+void default_send_IPI_self(int vector);
 
 #ifdef CONFIG_X86_32
 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int 
vector);
 void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int 
vector);
 void default_send_IPI_mask_logical(const struct cpumask *mask, int vector);
-void default_send_IPI_allbutself(int vector);
-void default_send_IPI_all(int vector);
-void default_send_IPI_self(int vector);
 #endif


[tip:x86/apic] x86/apic: Remove the shorthand decision logic

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  1f0ad660488b8eb2450d1834af6a156104281194
Gitweb: https://git.kernel.org/tip/1f0ad660488b8eb2450d1834af6a156104281194
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:27 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:02 +0200

x86/apic: Remove the shorthand decision logic

All callers of apic->send_IPI_all() and apic->send_IPI_allbutself() contain
the decision logic for shorthand invocation already and invoke
send_IPI_mask() if the prereqisites are not satisfied.

Remove the now redundant decision logic in the 32bit implementation.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.860244...@linutronix.de

---
 arch/x86/kernel/apic/ipi.c | 27 +++
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 117ee2323f59..71363b0d4a67 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -8,13 +8,7 @@
 DEFINE_STATIC_KEY_FALSE(apic_use_ipi_shorthand);
 
 #ifdef CONFIG_SMP
-#ifdef CONFIG_HOTPLUG_CPU
-#define DEFAULT_SEND_IPI   (1)
-#else
-#define DEFAULT_SEND_IPI   (0)
-#endif
-
-static int apic_ipi_shorthand_off __ro_after_init = DEFAULT_SEND_IPI;
+static int apic_ipi_shorthand_off __ro_after_init;
 
 static __init int apic_ipi_shorthand(char *str)
 {
@@ -293,27 +287,12 @@ void default_send_IPI_mask_logical(const struct cpumask 
*cpumask, int vector)
 
 void default_send_IPI_allbutself(int vector)
 {
-   /*
-* if there are no other CPUs in the system then we get an APIC send
-* error if we try to broadcast, thus avoid sending IPIs in this case.
-*/
-   if (num_online_cpus() < 2)
-   return;
-
-   if (apic_ipi_shorthand_off || vector == NMI_VECTOR) {
-   apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
-   } else {
-   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
-   }
+   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
 }
 
 void default_send_IPI_all(int vector)
 {
-   if (apic_ipi_shorthand_off || vector == NMI_VECTOR) {
-   apic->send_IPI_mask(cpu_online_mask, vector);
-   } else {
-   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
-   }
+   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 }
 
 void default_send_IPI_self(int vector)


[tip:x86/apic] x86/smp: Enhance native_send_call_func_ipi()

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  832df3d47badcbc860aef617105b6bc1c9459304
Gitweb: https://git.kernel.org/tip/832df3d47badcbc860aef617105b6bc1c9459304
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:26 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:01 +0200

x86/smp: Enhance native_send_call_func_ipi()

Nadav noticed that the cpumask allocations in native_send_call_func_ipi()
are noticeable in microbenchmarks.

Use the new cpumask_or_equal() function to simplify the decision whether
the supplied target CPU mask is either equal to cpu_online_mask or equal to
cpu_online_mask except for the CPU on which the function is invoked.

cpumask_or_equal() or's the target mask and the cpumask of the current CPU
together and compares it to cpu_online_mask.

If the result is false, use the mask based IPI function, otherwise check
whether the current CPU is set in the target mask and invoke either the
send_IPI_all() or the send_IPI_allbutselt() APIC callback.

Make the shorthand decision also depend on the static key which enables
shorthand mode. That allows to remove the extra cpumask comparison with
cpu_callout_mask.

Reported-by: Nadav Amit 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.768238...@linutronix.de

---
 arch/x86/kernel/apic/ipi.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index eaac65bf58f0..117ee2323f59 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -83,23 +83,21 @@ void native_send_call_func_single_ipi(int cpu)
 
 void native_send_call_func_ipi(const struct cpumask *mask)
 {
-   cpumask_var_t allbutself;
+   if (static_branch_likely(_use_ipi_shorthand)) {
+   unsigned int cpu = smp_processor_id();
 
-   if (!alloc_cpumask_var(, GFP_ATOMIC)) {
-   apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+   if (!cpumask_or_equal(mask, cpumask_of(cpu), cpu_online_mask))
+   goto sendmask;
+
+   if (cpumask_test_cpu(cpu, mask))
+   apic->send_IPI_all(CALL_FUNCTION_VECTOR);
+   else if (num_online_cpus() > 1)
+   apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
return;
}
 
-   cpumask_copy(allbutself, cpu_online_mask);
-   __cpumask_clear_cpu(smp_processor_id(), allbutself);
-
-   if (cpumask_equal(mask, allbutself) &&
-   cpumask_equal(cpu_online_mask, cpu_callout_mask))
-   apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-   else
-   apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-   free_cpumask_var(allbutself);
+sendmask:
+   apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
 }
 
 #endif /* CONFIG_SMP */


[tip:x86/apic] x86/smp: Move smp_function_call implementations into IPI code

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  d0a7166bc7ac4feac5c482ebe8b2417aa3302ef4
Gitweb: https://git.kernel.org/tip/d0a7166bc7ac4feac5c482ebe8b2417aa3302ef4
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:25 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:01 +0200

x86/smp: Move smp_function_call implementations into IPI code

Move it where it belongs. That allows to keep all the shorthand logic in
one place.

No functional change.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.677835...@linutronix.de

---
 arch/x86/include/asm/smp.h |  1 +
 arch/x86/kernel/apic/ipi.c | 40 
 arch/x86/kernel/smp.c  | 40 
 3 files changed, 41 insertions(+), 40 deletions(-)

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index e1356a3b8223..e15f364efbcc 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -143,6 +143,7 @@ void play_dead_common(void);
 void wbinvd_on_cpu(int cpu);
 int wbinvd_on_all_cpus(void);
 
+void native_smp_send_reschedule(int cpu);
 void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
 void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index f53de3e0145e..eaac65bf58f0 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -62,6 +62,46 @@ void apic_send_IPI_allbutself(unsigned int vector)
apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
 }
 
+/*
+ * Send a 'reschedule' IPI to another CPU. It goes straight through and
+ * wastes no time serializing anything. Worst case is that we lose a
+ * reschedule ...
+ */
+void native_smp_send_reschedule(int cpu)
+{
+   if (unlikely(cpu_is_offline(cpu))) {
+   WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", 
cpu);
+   return;
+   }
+   apic->send_IPI(cpu, RESCHEDULE_VECTOR);
+}
+
+void native_send_call_func_single_ipi(int cpu)
+{
+   apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
+}
+
+void native_send_call_func_ipi(const struct cpumask *mask)
+{
+   cpumask_var_t allbutself;
+
+   if (!alloc_cpumask_var(, GFP_ATOMIC)) {
+   apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+   return;
+   }
+
+   cpumask_copy(allbutself, cpu_online_mask);
+   __cpumask_clear_cpu(smp_processor_id(), allbutself);
+
+   if (cpumask_equal(mask, allbutself) &&
+   cpumask_equal(cpu_online_mask, cpu_callout_mask))
+   apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+   else
+   apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+
+   free_cpumask_var(allbutself);
+}
+
 #endif /* CONFIG_SMP */
 
 static inline int __prepare_ICR2(unsigned int mask)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index b8ad1876a081..b8d4e9c3c070 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -115,46 +115,6 @@
 static atomic_t stopping_cpu = ATOMIC_INIT(-1);
 static bool smp_no_nmi_ipi = false;
 
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-static void native_smp_send_reschedule(int cpu)
-{
-   if (unlikely(cpu_is_offline(cpu))) {
-   WARN(1, "sched: Unexpected reschedule of offline CPU#%d!\n", 
cpu);
-   return;
-   }
-   apic->send_IPI(cpu, RESCHEDULE_VECTOR);
-}
-
-void native_send_call_func_single_ipi(int cpu)
-{
-   apic->send_IPI(cpu, CALL_FUNCTION_SINGLE_VECTOR);
-}
-
-void native_send_call_func_ipi(const struct cpumask *mask)
-{
-   cpumask_var_t allbutself;
-
-   if (!alloc_cpumask_var(, GFP_ATOMIC)) {
-   apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-   return;
-   }
-
-   cpumask_copy(allbutself, cpu_online_mask);
-   __cpumask_clear_cpu(smp_processor_id(), allbutself);
-
-   if (cpumask_equal(mask, allbutself) &&
-   cpumask_equal(cpu_online_mask, cpu_callout_mask))
-   apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-   else
-   apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-   free_cpumask_var(allbutself);
-}
-
 static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
 {
/* We are registered on stopping cpu too, avoid spurious NMI */


[tip:x86/apic] x86/apic: Provide and use helper for send_IPI_allbutself()

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  22ca7ee933a39f542ff6f81fc64f8036eff56519
Gitweb: https://git.kernel.org/tip/22ca7ee933a39f542ff6f81fc64f8036eff56519
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:23 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:00 +0200

x86/apic: Provide and use helper for send_IPI_allbutself()

To support IPI shorthands wrap invocations of apic->send_IPI_allbutself()
in a helper function, so the static key controlling the shorthand mode is
only in one place.

Fixup all callers.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.492691...@linutronix.de

---
 arch/x86/include/asm/apic.h |  2 ++
 arch/x86/kernel/apic/ipi.c  | 12 
 arch/x86/kernel/kgdb.c  |  2 +-
 arch/x86/kernel/reboot.c|  7 +--
 arch/x86/kernel/smp.c   |  4 ++--
 5 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 4a0d349ab44d..de86c6c15228 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -177,6 +177,8 @@ extern void lapic_online(void);
 extern void lapic_offline(void);
 extern bool apic_needs_pit(void);
 
+extern void apic_send_IPI_allbutself(unsigned int vector);
+
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok 1
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 5bd8a001a887..f53de3e0145e 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -50,6 +50,18 @@ void apic_smt_update(void)
static_branch_enable(_use_ipi_shorthand);
}
 }
+
+void apic_send_IPI_allbutself(unsigned int vector)
+{
+   if (num_online_cpus() < 2)
+   return;
+
+   if (static_branch_likely(_use_ipi_shorthand))
+   apic->send_IPI_allbutself(vector);
+   else
+   apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
+}
+
 #endif /* CONFIG_SMP */
 
 static inline int __prepare_ICR2(unsigned int mask)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index a53dfb09880f..c44fe7d8d9a4 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -416,7 +416,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs)
  */
 void kgdb_roundup_cpus(void)
 {
-   apic->send_IPI_allbutself(NMI_VECTOR);
+   apic_send_IPI_allbutself(NMI_VECTOR);
 }
 #endif
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 09d6bded3c1e..0cc7c0b106bb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -828,11 +828,6 @@ static int crash_nmi_callback(unsigned int val, struct 
pt_regs *regs)
return NMI_HANDLED;
 }
 
-static void smp_send_nmi_allbutself(void)
-{
-   apic->send_IPI_allbutself(NMI_VECTOR);
-}
-
 /*
  * Halt all other CPUs, calling the specified function on each of them
  *
@@ -861,7 +856,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 */
wmb();
 
-   smp_send_nmi_allbutself();
+   apic_send_IPI_allbutself(NMI_VECTOR);
 
/* Kick CPUs looping in NMI context. */
WRITE_ONCE(crash_ipi_issued, 1);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 231fa230ebc7..b8ad1876a081 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -215,7 +215,7 @@ static void native_stop_other_cpus(int wait)
/* sync above data before sending IRQ */
wmb();
 
-   apic->send_IPI_allbutself(REBOOT_VECTOR);
+   apic_send_IPI_allbutself(REBOOT_VECTOR);
 
/*
 * Don't wait longer than a second for IPI completion. The
@@ -241,7 +241,7 @@ static void native_stop_other_cpus(int wait)
 
pr_emerg("Shutting down cpus with NMI\n");
 
-   apic->send_IPI_allbutself(NMI_VECTOR);
+   apic_send_IPI_allbutself(NMI_VECTOR);
}
/*
 * Don't wait longer than 10 ms if the caller didn't


[tip:x86/apic] x86/apic: Add static key to Control IPI shorthands

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  6a1cb5f5c6413222b8532722562dd1edb5fdfd38
Gitweb: https://git.kernel.org/tip/6a1cb5f5c6413222b8532722562dd1edb5fdfd38
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:22 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:00 +0200

x86/apic: Add static key to Control IPI shorthands

The IPI shorthand functionality delivers IPI/NMI broadcasts to all CPUs in
the system. This can have similar side effects as the MCE broadcasting when
CPUs are waiting in the BIOS or are offlined.

The kernel tracks already the state of offlined CPUs whether they have been
brought up at least once so that the CR4 MCE bit is set to make sure that
MCE broadcasts can't brick the machine.

Utilize that information and compare it to the cpu_present_mask. If all
present CPUs have been brought up at least once then the broadcast side
effect is mitigated by disabling regular interrupt/IPI delivery in the APIC
itself and by the cpu offline check at the begin of the NMI handler.

Use a static key to switch between broadcasting via shorthands or sending
the IPI/NMI one by one.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.386410...@linutronix.de

---
 arch/x86/include/asm/apic.h  |  2 ++
 arch/x86/kernel/apic/ipi.c   | 24 +++-
 arch/x86/kernel/apic/local.h |  6 ++
 arch/x86/kernel/cpu/common.c |  2 ++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index cae7e0d02476..4a0d349ab44d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -505,8 +505,10 @@ extern int default_check_phys_apicid_present(int 
phys_apicid);
 
 #ifdef CONFIG_SMP
 bool apic_id_is_primary_thread(unsigned int id);
+void apic_smt_update(void);
 #else
 static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
+static inline void apic_smt_update(void) { }
 #endif
 
 extern void irq_enter(void);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index ca3bcdb7c4a8..5bd8a001a887 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -5,6 +5,8 @@
 
 #include "local.h"
 
+DEFINE_STATIC_KEY_FALSE(apic_use_ipi_shorthand);
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI   (1)
@@ -28,7 +30,27 @@ static int __init print_ipi_mode(void)
return 0;
 }
 late_initcall(print_ipi_mode);
-#endif
+
+void apic_smt_update(void)
+{
+   /*
+* Do not switch to broadcast mode if:
+* - Disabled on the command line
+* - Only a single CPU is online
+* - Not all present CPUs have been at least booted once
+*
+* The latter is important as the local APIC might be in some
+* random state and a broadcast might cause havoc. That's
+* especially true for NMI broadcasting.
+*/
+   if (apic_ipi_shorthand_off || num_online_cpus() == 1 ||
+   !cpumask_equal(cpu_present_mask, _booted_once_mask)) {
+   static_branch_disable(_use_ipi_shorthand);
+   } else {
+   static_branch_enable(_use_ipi_shorthand);
+   }
+}
+#endif /* CONFIG_SMP */
 
 static inline int __prepare_ICR2(unsigned int mask)
 {
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index bd074e5997b0..391594cd5ca9 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -7,6 +7,9 @@
  * (c) 1998-99, 2000 Ingo Molnar 
  * (c) 2002,2003 Andi Kleen, SuSE Labs.
  */
+
+#include 
+
 #include 
 
 /* APIC flat 64 */
@@ -22,6 +25,9 @@ int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
 void x2apic_send_IPI_self(int vector);
 
 /* IPI */
+
+DECLARE_STATIC_KEY_FALSE(apic_use_ipi_shorthand);
+
 static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
 unsigned int dest)
 {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1ee6598c5d83..e0489d2860d3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1953,4 +1953,6 @@ void arch_smt_update(void)
 {
/* Handle the speculative execution misfeatures */
cpu_bugs_smt_update();
+   /* Check whether IPI broadcasting can be enabled */
+   apic_smt_update();
 }


[tip:x86/apic] x86/apic: Move no_ipi_broadcast() out of 32bit

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  bdda3b93e66085abf0b2c16bcdf471176e3c816a
Gitweb: https://git.kernel.org/tip/bdda3b93e66085abf0b2c16bcdf471176e3c816a
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:21 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:12:00 +0200

x86/apic: Move no_ipi_broadcast() out of 32bit

For the upcoming shorthand support for all APIC incarnations the command
line option needs to be available for 64 bit as well.

While at it, rename the control variable, make it static and mark it
__ro_after_init.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.278327...@linutronix.de

---
 arch/x86/kernel/apic/ipi.c  | 29 +++--
 arch/x86/kernel/apic/local.h|  2 --
 arch/x86/kernel/apic/probe_32.c | 25 -
 3 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 7236fefde396..ca3bcdb7c4a8 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -5,6 +5,31 @@
 
 #include "local.h"
 
+#ifdef CONFIG_SMP
+#ifdef CONFIG_HOTPLUG_CPU
+#define DEFAULT_SEND_IPI   (1)
+#else
+#define DEFAULT_SEND_IPI   (0)
+#endif
+
+static int apic_ipi_shorthand_off __ro_after_init = DEFAULT_SEND_IPI;
+
+static __init int apic_ipi_shorthand(char *str)
+{
+   get_option(, _ipi_shorthand_off);
+   return 1;
+}
+__setup("no_ipi_broadcast=", apic_ipi_shorthand);
+
+static int __init print_ipi_mode(void)
+{
+   pr_info("IPI shorthand broadcast: %s\n",
+   apic_ipi_shorthand_off ? "disabled" : "enabled");
+   return 0;
+}
+late_initcall(print_ipi_mode);
+#endif
+
 static inline int __prepare_ICR2(unsigned int mask)
 {
return SET_APIC_DEST_FIELD(mask);
@@ -203,7 +228,7 @@ void default_send_IPI_allbutself(int vector)
if (num_online_cpus() < 2)
return;
 
-   if (no_broadcast || vector == NMI_VECTOR) {
+   if (apic_ipi_shorthand_off || vector == NMI_VECTOR) {
apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
} else {
__default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
@@ -212,7 +237,7 @@ void default_send_IPI_allbutself(int vector)
 
 void default_send_IPI_all(int vector)
 {
-   if (no_broadcast || vector == NMI_VECTOR) {
+   if (apic_ipi_shorthand_off || vector == NMI_VECTOR) {
apic->send_IPI_mask(cpu_online_mask, vector);
} else {
__default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index 47c43381b444..bd074e5997b0 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -51,8 +51,6 @@ void default_send_IPI_single_phys(int cpu, int vector);
 void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int 
vector);
 void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, int 
vector);
 
-extern int no_broadcast;
-
 #ifdef CONFIG_X86_32
 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int 
vector);
 void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int 
vector);
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 7cc961d4f51f..0ac9fd667c99 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -15,31 +15,6 @@
 
 #include "local.h"
 
-#ifdef CONFIG_HOTPLUG_CPU
-#define DEFAULT_SEND_IPI   (1)
-#else
-#define DEFAULT_SEND_IPI   (0)
-#endif
-
-int no_broadcast = DEFAULT_SEND_IPI;
-
-static __init int no_ipi_broadcast(char *str)
-{
-   get_option(, _broadcast);
-   pr_info("Using %s mode\n",
-   no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
-   return 1;
-}
-__setup("no_ipi_broadcast=", no_ipi_broadcast);
-
-static int __init print_ipi_mode(void)
-{
-   pr_info("Using IPI %s mode\n",
-   no_broadcast ? "No-Shortcut" : "Shortcut");
-   return 0;
-}
-late_initcall(print_ipi_mode);
-
 static int default_x86_32_early_logical_apicid(int cpu)
 {
return 1 << cpu;


[tip:x86/apic] x86/apic: Add NMI_VECTOR wait to IPI shorthand

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  bd82dba2fa6ae91061e5d31399d61fe65028f714
Gitweb: https://git.kernel.org/tip/bd82dba2fa6ae91061e5d31399d61fe65028f714
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:20 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:59 +0200

x86/apic: Add NMI_VECTOR wait to IPI shorthand

To support NMI shorthand broadcasts add the safe wait for ICR idle for NMI
vector delivery.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.185838...@linutronix.de

---
 arch/x86/kernel/apic/ipi.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 50c9dcc6f60e..7236fefde396 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -30,7 +30,10 @@ void __default_send_IPI_shortcut(unsigned int shortcut, int 
vector)
/*
 * Wait for idle.
 */
-   __xapic_wait_icr_idle();
+   if (unlikely(vector == NMI_VECTOR))
+   safe_apic_wait_icr_idle();
+   else
+   __xapic_wait_icr_idle();
 
/*
 * No need to touch the target chip field. Also the destination


[tip:x86/apic] x86/apic: Remove dest argument from __default_send_IPI_shortcut()

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  3994ff90acc3b115734fe532720c37a499c502ce
Gitweb: https://git.kernel.org/tip/3994ff90acc3b115734fe532720c37a499c502ce
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:19 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:59 +0200

x86/apic: Remove dest argument from __default_send_IPI_shortcut()

The SDM states:

  "The destination shorthand field of the ICR allows the delivery mode to be
   by-passed in favor of broadcasting the IPI to all the processors on the
   system bus and/or back to itself (see Section 10.6.1, Interrupt Command
   Register (ICR)). Three destination shorthands are supported: self, all
   excluding self, and all including self. The destination mode is ignored
   when a destination shorthand is used."

So there is no point to supply the destination mode to the shorthand
delivery function.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.094613...@linutronix.de

---
 arch/x86/kernel/apic/apic_flat_64.c |  6 ++
 arch/x86/kernel/apic/ipi.c  | 15 +++
 arch/x86/kernel/apic/local.h|  2 +-
 arch/x86/kernel/apic/probe_64.c |  2 +-
 4 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_flat_64.c 
b/arch/x86/kernel/apic/apic_flat_64.c
index f8594b844637..004611a44962 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -90,8 +90,7 @@ static void flat_send_IPI_allbutself(int vector)
_flat_send_IPI_mask(mask, vector);
}
} else if (num_online_cpus() > 1) {
-   __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
-   vector, apic->dest_logical);
+   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
}
 }
 
@@ -100,8 +99,7 @@ static void flat_send_IPI_all(int vector)
if (vector == NMI_VECTOR) {
flat_send_IPI_mask(cpu_online_mask, vector);
} else {
-   __default_send_IPI_shortcut(APIC_DEST_ALLINC,
-   vector, apic->dest_logical);
+   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
}
 }
 
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 6fa9f6ca7eef..50c9dcc6f60e 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -16,7 +16,7 @@ static inline void __xapic_wait_icr_idle(void)
cpu_relax();
 }
 
-void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest)
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector)
 {
/*
 * Subtle. In the case of the 'never do double writes' workaround
@@ -33,9 +33,10 @@ void __default_send_IPI_shortcut(unsigned int shortcut, int 
vector, unsigned int
__xapic_wait_icr_idle();
 
/*
-* No need to touch the target chip field
+* No need to touch the target chip field. Also the destination
+* mode is ignored when a shorthand is used.
 */
-   cfg = __prepare_ICR(shortcut, vector, dest);
+   cfg = __prepare_ICR(shortcut, vector, 0);
 
/*
 * Send the IPI. The write to APIC_ICR fires this off.
@@ -202,8 +203,7 @@ void default_send_IPI_allbutself(int vector)
if (no_broadcast || vector == NMI_VECTOR) {
apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
} else {
-   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector,
-   apic->dest_logical);
+   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
}
 }
 
@@ -212,14 +212,13 @@ void default_send_IPI_all(int vector)
if (no_broadcast || vector == NMI_VECTOR) {
apic->send_IPI_mask(cpu_online_mask, vector);
} else {
-   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector,
-   apic->dest_logical);
+   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector);
}
 }
 
 void default_send_IPI_self(int vector)
 {
-   __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
+   __default_send_IPI_shortcut(APIC_DEST_SELF, vector);
 }
 
 /* must come after the send_IPI functions above for inlining */
diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h
index 95adac0e785b..47c43381b444 100644
--- a/arch/x86/kernel/apic/local.h
+++ b/arch/x86/kernel/apic/local.h
@@ -38,7 +38,7 @@ static inline unsigned int __prepare_ICR(unsigned int 
shortcut, int vector,
return icr;
 }
 
-void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest);
+void __default_send_IPI_shortcut(unsigned int shortcut, int vector);
 
 /*
  * This is used to send an IPI with no shorthand notation (the destination is
diff --git 

[tip:x86/apic] x86/hotplug: Silence APIC and NMI when CPU is dead

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  60dcaad5736faff5a6b1abba5a292499f57197fe
Gitweb: https://git.kernel.org/tip/60dcaad5736faff5a6b1abba5a292499f57197fe
Author: Thomas Gleixner 
AuthorDate: Wed, 24 Jul 2019 17:25:52 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:59 +0200

x86/hotplug: Silence APIC and NMI when CPU is dead

In order to support IPI/NMI broadcasting via the shorthand mechanism side
effects of shorthands need to be mitigated:

 Shorthand IPIs and NMIs hit all CPUs including unplugged CPUs

Neither of those can be handled on unplugged CPUs for obvious reasons.

It would be trivial to just fully disable the APIC via the enable bit in
MSR_APICBASE. But that's not possible because clearing that bit on systems
based on the 3 wire APIC bus would require a hardware reset to bring it
back as the APIC would lose track of bus arbitration. On systems with FSB
delivery APICBASE could be disabled, but it has to be guaranteed that no
interrupt is sent to the APIC while in that state and it's not clear from
the SDM whether it still responds to INIT/SIPI messages.

Therefore stay on the safe side and switch the APIC into soft disabled mode
so it won't deliver any regular vector to the CPU.

NMIs are still propagated to the 'dead' CPUs. To mitigate that add a check
for the CPU being offline on early nmi entry and if so bail.

Note, this cannot use the stop/restart_nmi() magic which is used in the
alternatives code. A dead CPU cannot invoke nmi_enter() or anything else
due to RCU and other reasons.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1907241723290.1...@nanos.tec.linutronix.de

---
 arch/x86/include/asm/apic.h |  1 +
 arch/x86/kernel/apic/apic.c | 35 ---
 arch/x86/kernel/nmi.c   |  3 +++
 arch/x86/kernel/smpboot.c   |  7 ++-
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f53eda2c986b..cae7e0d02476 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -136,6 +136,7 @@ extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
+extern void apic_soft_disable(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index fe30d1854a4e..831274e3c09f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1182,25 +1182,38 @@ void clear_local_APIC(void)
 }
 
 /**
- * disable_local_APIC - clear and disable the local APIC
+ * apic_soft_disable - Clears and software disables the local APIC on hotplug
+ *
+ * Contrary to disable_local_APIC() this does not touch the enable bit in
+ * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
+ * bus would require a hardware reset as the APIC would lose track of bus
+ * arbitration. On systems with FSB delivery APICBASE could be disabled,
+ * but it has to be guaranteed that no interrupt is sent to the APIC while
+ * in that state and it's not clear from the SDM whether it still responds
+ * to INIT/SIPI messages. Stay on the safe side and use software disable.
  */
-void disable_local_APIC(void)
+void apic_soft_disable(void)
 {
-   unsigned int value;
-
-   /* APIC hasn't been mapped yet */
-   if (!x2apic_mode && !apic_phys)
-   return;
+   u32 value;
 
clear_local_APIC();
 
-   /*
-* Disable APIC (implies clearing of registers
-* for 82489DX!).
-*/
+   /* Soft disable APIC (implies clearing of registers for 82489DX!). */
value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED;
apic_write(APIC_SPIV, value);
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+   /* APIC hasn't been mapped yet */
+   if (!x2apic_mode && !apic_phys)
+   return;
+
+   apic_soft_disable();
 
 #ifdef CONFIG_X86_32
/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4df7705022b9..e676a9916c49 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -512,6 +512,9 @@ NOKPROBE_SYMBOL(is_debug_stack);
 dotraplinkage notrace void
 do_nmi(struct pt_regs *regs, long error_code)
 {
+   if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id()))
+   return;
+
if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
this_cpu_write(nmi_state, NMI_LATCHED);
return;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fdbd47ceb84d..c19f8e21b748 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1596,7 +1596,12 @@ int native_cpu_disable(void)
if (ret)
return ret;
 
-   

[tip:x86/apic] x86/cpu: Move arch_smt_update() to a neutral place

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  9c92374b631d233abf5bd355cb4253d3d83d5578
Gitweb: https://git.kernel.org/tip/9c92374b631d233abf5bd355cb4253d3d83d5578
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:17 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:59 +0200

x86/cpu: Move arch_smt_update() to a neutral place

arch_smt_update() will be used to control IPI/NMI broadcasting via the
shorthand mechanism. Keeping it in the bugs file and calling the apic
function from there is possible, but not really intuitive.

Move it to a neutral place and invoke the bugs function from there.

No functional change.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.910317...@linutronix.de

---
 arch/x86/include/asm/bugs.h  | 2 ++
 arch/x86/kernel/cpu/bugs.c   | 2 +-
 arch/x86/kernel/cpu/common.c | 9 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h
index 542509b53e0f..794eb2129bc6 100644
--- a/arch/x86/include/asm/bugs.h
+++ b/arch/x86/include/asm/bugs.h
@@ -18,4 +18,6 @@ int ppro_with_ram_bug(void);
 static inline int ppro_with_ram_bug(void) { return 0; }
 #endif
 
+extern void cpu_bugs_smt_update(void);
+
 #endif /* _ASM_X86_BUGS_H */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 66ca906aa790..6d9636c2ca51 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -700,7 +700,7 @@ static void update_mds_branch_idle(void)
 
 #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See 
https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more 
details.\n"
 
-void arch_smt_update(void)
+void cpu_bugs_smt_update(void)
 {
/* Enhanced IBRS implies STIBP. No update required. */
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 11472178e17f..1ee6598c5d83 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1945,3 +1945,12 @@ void microcode_check(void)
pr_warn("x86/CPU: CPU features have changed after loading microcode, 
but might not take effect.\n");
pr_warn("x86/CPU: Please consider either early loading through 
initrd/built-in or a potential BIOS update.\n");
 }
+
+/*
+ * Invoked from core CPU hotplug code after hotplug operations
+ */
+void arch_smt_update(void)
+{
+   /* Handle the speculative execution misfeatures */
+   cpu_bugs_smt_update();
+}


[tip:x86/apic] x86/apic/uv: Make x2apic_extra_bits static

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  82e574782345aa634e1544e80da85d71a9dbde19
Gitweb: https://git.kernel.org/tip/82e574782345aa634e1544e80da85d71a9dbde19
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:15 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:58 +0200

x86/apic/uv: Make x2apic_extra_bits static

Not used outside of the UV apic source.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.725264...@linutronix.de

---
 arch/x86/include/asm/apic.h| 2 --
 arch/x86/kernel/apic/x2apic_uv_x.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index e647aa095867..f53eda2c986b 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -467,8 +467,6 @@ static inline unsigned default_get_apic_id(unsigned long x)
 
 #ifdef CONFIG_X86_64
 extern void apic_send_IPI_self(int vector);
-
-DECLARE_PER_CPU(int, x2apic_extra_bits);
 #endif
 
 extern void generic_bigsmp_probe(void);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c 
b/arch/x86/kernel/apic/x2apic_uv_x.c
index 73a652093820..e6230af19864 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -22,7 +22,7 @@
 #include 
 #include 
 
-DEFINE_PER_CPU(int, x2apic_extra_bits);
+static DEFINE_PER_CPU(int, x2apic_extra_bits);
 
 static enum uv_system_type uv_system_type;
 static booluv_hubless_system;


[tip:x86/apic] x86/apic: Consolidate the apic local headers

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  c94f0718fb1c171d6dfdd69cb6001fa0d8206710
Gitweb: https://git.kernel.org/tip/c94f0718fb1c171d6dfdd69cb6001fa0d8206710
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:14 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:58 +0200

x86/apic: Consolidate the apic local headers

Now there are three small local headers. Some contain functions which are
only used in one source file.

Move all the inlines and declarations into a single local header and the
inlines which are only used in one source file into that.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.618612...@linutronix.de

---
 arch/x86/kernel/apic/apic_flat_64.c   |  3 +-
 arch/x86/kernel/apic/apic_flat_64.h   |  8 
 arch/x86/kernel/apic/apic_numachip.c  |  3 +-
 arch/x86/kernel/apic/bigsmp_32.c  |  2 +-
 arch/x86/kernel/apic/ipi.c| 14 +-
 arch/x86/kernel/apic/ipi.h| 90 ---
 arch/x86/kernel/apic/local.h  | 63 
 arch/x86/kernel/apic/probe_32.c   |  3 +-
 arch/x86/kernel/apic/probe_64.c   |  2 +-
 arch/x86/kernel/apic/x2apic.h |  9 
 arch/x86/kernel/apic/x2apic_cluster.c |  2 +-
 arch/x86/kernel/apic/x2apic_phys.c|  3 +-
 12 files changed, 83 insertions(+), 119 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_flat_64.c 
b/arch/x86/kernel/apic/apic_flat_64.c
index cfee2e546531..f8594b844637 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -15,8 +15,7 @@
 #include 
 #include 
 
-#include "apic_flat_64.h"
-#include "ipi.h"
+#include "local.h"
 
 static struct apic apic_physflat;
 static struct apic apic_flat;
diff --git a/arch/x86/kernel/apic/apic_flat_64.h 
b/arch/x86/kernel/apic/apic_flat_64.h
deleted file mode 100644
index d3a2b3876ce6..
--- a/arch/x86/kernel/apic/apic_flat_64.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_APIC_FLAT_64_H
-#define _ASM_X86_APIC_FLAT_64_H
-
-extern void flat_init_apic_ldr(void);
-
-#endif
-
diff --git a/arch/x86/kernel/apic/apic_numachip.c 
b/arch/x86/kernel/apic/apic_numachip.c
index 09ec9ffb268e..cdf45b4700f2 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -18,8 +18,7 @@
 
 #include 
 
-#include "apic_flat_64.h"
-#include "ipi.h"
+#include "local.h"
 
 u8 numachip_system __read_mostly;
 static const struct apic apic_numachip1;
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 2c031b75dfce..9703b552f25a 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -10,7 +10,7 @@
 
 #include 
 
-#include "ipi.h"
+#include "local.h"
 
 static unsigned bigsmp_get_apic_id(unsigned long x)
 {
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 0f26141d479c..6fa9f6ca7eef 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -1,10 +1,20 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include 
+#include 
 
-#include 
+#include "local.h"
 
-#include "ipi.h"
+static inline int __prepare_ICR2(unsigned int mask)
+{
+   return SET_APIC_DEST_FIELD(mask);
+}
+
+static inline void __xapic_wait_icr_idle(void)
+{
+   while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
+   cpu_relax();
+}
 
 void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest)
 {
diff --git a/arch/x86/kernel/apic/ipi.h b/arch/x86/kernel/apic/ipi.h
deleted file mode 100644
index 8d4911b122f3..
--- a/arch/x86/kernel/apic/ipi.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ASM_X86_IPI_H
-#define _ASM_X86_IPI_H
-
-#ifdef CONFIG_X86_LOCAL_APIC
-
-/*
- * Copyright 2004 James Cleverdon, IBM.
- *
- * Generic APIC InterProcessor Interrupt code.
- *
- * Moved to include file by James Cleverdon from
- * arch/x86-64/kernel/smp.c
- *
- * Copyrights from kernel/smp.c:
- *
- * (c) 1995 Alan Cox, Building #3 
- * (c) 1998-99, 2000 Ingo Molnar 
- * (c) 2002,2003 Andi Kleen, SuSE Labs.
- */
-
-#include 
-#include 
-#include 
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
-unsigned int dest)
-{
-   unsigned int icr = shortcut | dest;
-
-   switch (vector) {
-   default:
-   icr |= APIC_DM_FIXED | vector;
-   break;
-   case NMI_VECTOR:
-   icr |= APIC_DM_NMI;
-   break;
-   }
-   return icr;
-}
-
-static inline int __prepare_ICR2(unsigned int mask)
-{
-   return SET_APIC_DEST_FIELD(mask);
-}
-
-static inline void __xapic_wait_icr_idle(void)
-{
-   while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
-  

[tip:x86/apic] x86/apic: Move apic_flat_64 header into apic directory

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  ba77b2a02e0099ab0021bc3169b8f674c6be19f0
Gitweb: https://git.kernel.org/tip/ba77b2a02e0099ab0021bc3169b8f674c6be19f0
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:13 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:58 +0200

x86/apic: Move apic_flat_64 header into apic directory

Only used locally.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.526508...@linutronix.de

---
 arch/x86/kernel/apic/apic_flat_64.c  | 2 +-
 arch/x86/{include/asm => kernel/apic}/apic_flat_64.h | 0
 arch/x86/kernel/apic/apic_numachip.c | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_flat_64.c 
b/arch/x86/kernel/apic/apic_flat_64.c
index a38b1ecc018d..cfee2e546531 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -13,9 +13,9 @@
 #include 
 
 #include 
-#include 
 #include 
 
+#include "apic_flat_64.h"
 #include "ipi.h"
 
 static struct apic apic_physflat;
diff --git a/arch/x86/include/asm/apic_flat_64.h 
b/arch/x86/kernel/apic/apic_flat_64.h
similarity index 100%
rename from arch/x86/include/asm/apic_flat_64.h
rename to arch/x86/kernel/apic/apic_flat_64.h
diff --git a/arch/x86/kernel/apic/apic_numachip.c 
b/arch/x86/kernel/apic/apic_numachip.c
index 7d4c00f4e984..09ec9ffb268e 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -16,9 +16,9 @@
 #include 
 #include 
 
-#include 
 #include 
 
+#include "apic_flat_64.h"
 #include "ipi.h"
 
 u8 numachip_system __read_mostly;


[tip:x86/apic] x86/apic: Move ipi header into apic directory

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  8b542da372875373db9688477671151df3418acb
Gitweb: https://git.kernel.org/tip/8b542da372875373db9688477671151df3418acb
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:12 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:57 +0200

x86/apic: Move ipi header into apic directory

Only used locally.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.434738...@linutronix.de

---
 arch/x86/kernel/apic/apic_flat_64.c | 3 ++-
 arch/x86/kernel/apic/apic_numachip.c| 3 ++-
 arch/x86/kernel/apic/bigsmp_32.c| 9 ++---
 arch/x86/kernel/apic/ipi.c  | 3 ++-
 arch/x86/{include/asm => kernel/apic}/ipi.h | 0
 arch/x86/kernel/apic/probe_32.c | 3 ++-
 arch/x86/kernel/apic/probe_64.c | 3 ++-
 arch/x86/kernel/apic/x2apic_phys.c  | 3 +--
 8 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_flat_64.c 
b/arch/x86/kernel/apic/apic_flat_64.c
index 8d7242df1fd6..a38b1ecc018d 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -15,7 +15,8 @@
 #include 
 #include 
 #include 
-#include 
+
+#include "ipi.h"
 
 static struct apic apic_physflat;
 static struct apic apic_flat;
diff --git a/arch/x86/kernel/apic/apic_numachip.c 
b/arch/x86/kernel/apic/apic_numachip.c
index e071e8dcb097..7d4c00f4e984 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -18,7 +18,8 @@
 
 #include 
 #include 
-#include 
+
+#include "ipi.h"
 
 u8 numachip_system __read_mostly;
 static const struct apic apic_numachip1;
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index afee386ff711..2c031b75dfce 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -4,18 +4,13 @@
  *
  * Drives the local APIC in "clustered mode".
  */
-#include 
 #include 
-#include 
-#include 
 #include 
 #include 
 
-#include 
-#include 
-#include 
 #include 
-#include 
+
+#include "ipi.h"
 
 static unsigned bigsmp_get_apic_id(unsigned long x)
 {
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index dad523bbe701..0f26141d479c 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -3,7 +3,8 @@
 #include 
 
 #include 
-#include 
+
+#include "ipi.h"
 
 void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest)
 {
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/kernel/apic/ipi.h
similarity index 100%
rename from arch/x86/include/asm/ipi.h
rename to arch/x86/kernel/apic/ipi.h
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 8f3c7f50b0a9..40b786e3427a 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -11,7 +11,8 @@
 
 #include 
 #include 
-#include 
+
+#include "ipi.h"
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI   (1)
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index f7bd3f48deb2..6268c487f963 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -9,7 +9,8 @@
  * James Cleverdon.
  */
 #include 
-#include 
+
+#include "ipi.h"
 
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
diff --git a/arch/x86/kernel/apic/x2apic_phys.c 
b/arch/x86/kernel/apic/x2apic_phys.c
index e5289a0c595b..3bde4724c1c7 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -3,9 +3,8 @@
 #include 
 #include 
 
-#include 
-
 #include "x2apic.h"
+#include "ipi.h"
 
 int x2apic_phys;
 


[tip:x86/apic] x86/apic: Cleanup the include maze

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  521b82fee98c1e334ba3a2459ba3739d459e9e4e
Gitweb: https://git.kernel.org/tip/521b82fee98c1e334ba3a2459ba3739d459e9e4e
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:11 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:57 +0200

x86/apic: Cleanup the include maze

All of these APIC files include the world and some more. Remove the
unneeded cruft.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.342631...@linutronix.de

---
 arch/x86/kernel/apic/apic_flat_64.c   | 15 ---
 arch/x86/kernel/apic/apic_noop.c  | 18 +-
 arch/x86/kernel/apic/apic_numachip.c  |  6 +++---
 arch/x86/kernel/apic/ipi.c| 17 ++---
 arch/x86/kernel/apic/probe_32.c   | 18 ++
 arch/x86/kernel/apic/probe_64.c   | 11 ---
 arch/x86/kernel/apic/x2apic_cluster.c | 16 +++-
 arch/x86/kernel/apic/x2apic_phys.c|  9 +++--
 arch/x86/kernel/apic/x2apic_uv_x.c| 28 
 9 files changed, 26 insertions(+), 112 deletions(-)

diff --git a/arch/x86/kernel/apic/apic_flat_64.c 
b/arch/x86/kernel/apic/apic_flat_64.c
index bbdca603f94a..8d7242df1fd6 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -8,21 +8,14 @@
  * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
  * James Cleverdon.
  */
-#include 
-#include 
-#include 
 #include 
-#include 
-#include 
-#include 
-#include 
 #include 
+#include 
 
-#include 
-#include 
-#include 
-#include 
 #include 
+#include 
+#include 
+#include 
 
 static struct apic apic_physflat;
 static struct apic apic_flat;
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 5078b5ce63a7..98c9bb75d185 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -9,25 +9,9 @@
  * to not uglify the caller's code and allow to call (some) apic routines
  * like self-ipi, etc...
  */
-
-#include 
 #include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
 
-#include 
-#include 
-
-#include 
-#include 
-#include 
+#include 
 
 static void noop_init_apic_ldr(void) { }
 static void noop_send_IPI(int cpu, int vector) { }
diff --git a/arch/x86/kernel/apic/apic_numachip.c 
b/arch/x86/kernel/apic/apic_numachip.c
index a5464b8b6c46..e071e8dcb097 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -10,15 +10,15 @@
  * Send feedback to 
  *
  */
-
+#include 
 #include 
 
 #include 
 #include 
-#include 
+
 #include 
 #include 
-#include 
+#include 
 
 u8 numachip_system __read_mostly;
 static const struct apic apic_numachip1;
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index de9764605d31..dad523bbe701 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -1,21 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
+
 #include 
-#include 
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-#include 
-#include 
+
 #include 
-#include 
 #include 
 
 void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1492799b8f43..8f3c7f50b0a9 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -6,26 +6,12 @@
  *
  * Generic x86 APIC driver probe layer.
  */
-#include 
-#include 
 #include 
-#include 
-#include 
-#include 
-#include 
 #include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
 
-#include 
+#include 
 #include 
-#include 
+#include 
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI   (1)
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index e6560a02eb46..f7bd3f48deb2 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -8,19 +8,8 @@
  * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
  * James Cleverdon.
  */
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
 #include 
 #include 
-#include 
 
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c 
b/arch/x86/kernel/apic/x2apic_cluster.c
index 609e499387a1..ebde731dc4cf 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -1,14 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
-#include 
+
+#include 
 #include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
+#include 
+#include 
+
+#include 
+
 #include "x2apic.h"
 
 struct cluster_mask {
diff --git a/arch/x86/kernel/apic/x2apic_phys.c 
b/arch/x86/kernel/apic/x2apic_phys.c
index b5cf9e7b3830..e5289a0c595b 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ 

[tip:x86/apic] x86/apic: Move IPI inlines into ipi.c

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  cdc86c9d1f825d13cef85d9ebd3e73572602fb48
Gitweb: https://git.kernel.org/tip/cdc86c9d1f825d13cef85d9ebd3e73572602fb48
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:10 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:57 +0200

x86/apic: Move IPI inlines into ipi.c

No point in having them in an header file.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.252225...@linutronix.de

---
 arch/x86/include/asm/ipi.h | 19 ---
 arch/x86/kernel/apic/ipi.c | 16 +---
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index f73076be546a..8d4911b122f3 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -71,27 +71,8 @@ extern void default_send_IPI_mask_sequence_phys(const struct 
cpumask *mask,
 extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
 int vector);
 
-/* Avoid include hell */
-#define NMI_VECTOR 0x02
-
 extern int no_broadcast;
 
-static inline void __default_local_send_IPI_allbutself(int vector)
-{
-   if (no_broadcast || vector == NMI_VECTOR)
-   apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
-   else
-   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, 
apic->dest_logical);
-}
-
-static inline void __default_local_send_IPI_all(int vector)
-{
-   if (no_broadcast || vector == NMI_VECTOR)
-   apic->send_IPI_mask(cpu_online_mask, vector);
-   else
-   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, 
apic->dest_logical);
-}
-
 #ifdef CONFIG_X86_32
 extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
 int vector);
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 82f9244fe61f..de9764605d31 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -198,15 +198,25 @@ void default_send_IPI_allbutself(int vector)
 * if there are no other CPUs in the system then we get an APIC send
 * error if we try to broadcast, thus avoid sending IPIs in this case.
 */
-   if (!(num_online_cpus() > 1))
+   if (num_online_cpus() < 2)
return;
 
-   __default_local_send_IPI_allbutself(vector);
+   if (no_broadcast || vector == NMI_VECTOR) {
+   apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
+   } else {
+   __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector,
+   apic->dest_logical);
+   }
 }
 
 void default_send_IPI_all(int vector)
 {
-   __default_local_send_IPI_all(vector);
+   if (no_broadcast || vector == NMI_VECTOR) {
+   apic->send_IPI_mask(cpu_online_mask, vector);
+   } else {
+   __default_send_IPI_shortcut(APIC_DEST_ALLINC, vector,
+   apic->dest_logical);
+   }
 }
 
 void default_send_IPI_self(int vector)


[tip:x86/apic] x86/apic: Make apic_pending_intr_clear() more robust

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  cc8bf191378c1da8ad2b99cf470ee70193ace84e
Gitweb: https://git.kernel.org/tip/cc8bf191378c1da8ad2b99cf470ee70193ace84e
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:09 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:56 +0200

x86/apic: Make apic_pending_intr_clear() more robust

In course of developing shorthand based IPI support issues with the
function which tries to clear eventually pending ISR bits in the local APIC
were observed.

  1) O-day testing triggered the WARN_ON() in apic_pending_intr_clear().

 This warning is emitted when the function fails to clear pending ISR
 bits or observes pending IRR bits which are not delivered to the CPU
 after the stale ISR bit(s) are ACK'ed.

 Unfortunately the function only emits a WARN_ON() and fails to dump
 the IRR/ISR content. That's useless for debugging.

 Feng added spot on debug printk's which revealed that the stale IRR
 bit belonged to the APIC timer interrupt vector, but adding ad hoc
 debug code does not help with sporadic failures in the field.

 Rework the loop so the full IRR/ISR contents are saved and on failure
 dumped.

  2) The loop termination logic is interesting at best.

 If the machine has no TSC or cpu_khz is not known yet it tries 1
 million times to ack stale IRR/ISR bits. What?

 With TSC it uses the TSC to calculate the loop termination. It takes a
 timestamp at entry and terminates the loop when:

  (rdtsc() - start_timestamp) >= (cpu_hkz << 10)

 That's roughly one second.

 Both methods are problematic. The APIC has 256 vectors, which means
 that in theory max. 256 IRR/ISR bits can be set. In practice this is
 impossible and the chance that more than a few bits are set is close
 to zero.

 With the pure loop based approach the 1 million retries are complete
 overkill.

 With TSC this can terminate too early in a guest which is running on a
 heavily loaded host even with only a couple of IRR/ISR bits set. The
 reason is that after acknowledging the highest priority ISR bit,
 pending IRRs must get serviced first before the next round of
 acknowledge can take place as the APIC (real and virtualized) does not
 honour EOI without a preceeding interrupt on the CPU. And every APIC
 read/write takes a VMEXIT if the APIC is virtualized. While trying to
 reproduce the issue 0-day reported it was observed that the guest was
 scheduled out long enough under heavy load that it terminated after 8
 iterations.

 Make the loop terminate after 512 iterations. That's plenty enough
 in any case and does not take endless time to complete.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.158847...@linutronix.de

---
 arch/x86/kernel/apic/apic.c | 107 ++--
 1 file changed, 63 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 621992de49ee..fe30d1854a4e 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1453,54 +1453,72 @@ static void lapic_setup_esr(void)
oldvalue, value);
 }
 
-static void apic_pending_intr_clear(void)
+#define APIC_IR_REGS   APIC_ISR_NR
+#define APIC_IR_BITS   (APIC_IR_REGS * 32)
+#define APIC_IR_MAPSIZE(APIC_IR_BITS / BITS_PER_LONG)
+
+union apic_ir {
+   unsigned long   map[APIC_IR_MAPSIZE];
+   u32 regs[APIC_IR_REGS];
+};
+
+static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
 {
-   long long max_loops = cpu_khz ? cpu_khz : 100;
-   unsigned long long tsc = 0, ntsc;
-   unsigned int queued;
-   unsigned long value;
-   int i, j, acked = 0;
+   int i, bit;
+
+   /* Read the IRRs */
+   for (i = 0; i < APIC_IR_REGS; i++)
+   irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
+
+   /* Read the ISRs */
+   for (i = 0; i < APIC_IR_REGS; i++)
+   isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
 
-   if (boot_cpu_has(X86_FEATURE_TSC))
-   tsc = rdtsc();
/*
-* After a crash, we no longer service the interrupts and a pending
-* interrupt from previous kernel might still have ISR bit set.
-*
-* Most probably by now CPU has serviced that pending interrupt and
-* it might not have done the ack_APIC_irq() because it thought,
-* interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-* does not clear the ISR bit and cpu thinks it has already serivced
-* the interrupt. Hence a vector might get locked. It was noticed
-* for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+* If the ISR map is not empty. ACK the APIC and run another round
+* to verify whether a pending IRR has been 

[tip:x86/apic] x86/apic: Soft disable APIC before initializing it

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  2640da4cccf5cc613bf26f0998b9e340f4b5f69c
Gitweb: https://git.kernel.org/tip/2640da4cccf5cc613bf26f0998b9e340f4b5f69c
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:08 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:56 +0200

x86/apic: Soft disable APIC before initializing it

If the APIC was already enabled on entry of setup_local_APIC() then
disabling it soft via the SPIV register makes a lot of sense.

That masks all LVT entries and brings it into a well defined state.

Otherwise previously enabled LVTs which are not touched in the setup
function stay unmasked and might surprise the just booting kernel.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.068290...@linutronix.de

---
 arch/x86/kernel/apic/apic.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index fa0846d4e000..621992de49ee 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1522,6 +1522,14 @@ static void setup_local_APIC(void)
return;
}
 
+   /*
+* If this comes from kexec/kcrash the APIC might be enabled in
+* SPIV. Soft disable it before doing further initialization.
+*/
+   value = apic_read(APIC_SPIV);
+   value &= ~APIC_SPIV_APIC_ENABLED;
+   apic_write(APIC_SPIV, value);
+
 #ifdef CONFIG_X86_32
/* Pound the ESR really hard over the head with a big hammer - mbligh */
if (lapic_is_integrated() && apic->disable_esr) {


[tip:x86/apic] x86/apic: Invoke perf_events_lapic_init() after enabling APIC

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  39c89dff9c366ad98d2e5598db41ff9b1bdb9e88
Gitweb: https://git.kernel.org/tip/39c89dff9c366ad98d2e5598db41ff9b1bdb9e88
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:07 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:56 +0200

x86/apic: Invoke perf_events_lapic_init() after enabling APIC

If the APIC is soft disabled then unmasking an LVT entry does not work and
the write is ignored. perf_events_lapic_init() tries to do so.

Move the invocation after the point where the APIC has been enabled.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105218.962517...@linutronix.de

---
 arch/x86/kernel/apic/apic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 84032bf81476..fa0846d4e000 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1517,7 +1517,6 @@ static void setup_local_APIC(void)
int logical_apicid, ldr_apicid;
 #endif
 
-
if (disable_apic) {
disable_ioapic_support();
return;
@@ -1532,8 +1531,6 @@ static void setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
 #endif
-   perf_events_lapic_init();
-
/*
 * Double-check whether this APIC is really registered.
 * This is meaningless in clustered apic mode, so we skip it.
@@ -1617,6 +1614,8 @@ static void setup_local_APIC(void)
value |= SPURIOUS_APIC_VECTOR;
apic_write(APIC_SPIV, value);
 
+   perf_events_lapic_init();
+
/*
 * Set up LVT0, LVT1:
 *


[tip:x86/apic] x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  2591bc4e8d70b4e1330d327fb7e3921f4e070a51
Gitweb: https://git.kernel.org/tip/2591bc4e8d70b4e1330d327fb7e3921f4e070a51
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:06 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 16:11:56 +0200

x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI

apic->send_IPI_allbutself() takes a vector number as argument.

APIC_DM_NMI is clearly not a vector number. It's defined to 0x400 which is
outside the vector space.

Use NMI_VECTOR instead as that's what it is intended to be.

Fixes: 82da3ff89dc2 ("x86: kgdb support")
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105218.855189...@linutronix.de

---
 arch/x86/kernel/kgdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 23297ea64f5f..a53dfb09880f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -416,7 +416,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs)
  */
 void kgdb_roundup_cpus(void)
 {
-   apic->send_IPI_allbutself(APIC_DM_NMI);
+   apic->send_IPI_allbutself(NMI_VECTOR);
 }
 #endif
 


[tip:smp/hotplug] cpu/hotplug: Cache number of online CPUs

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  0c09ab96fc820109d63097a2adcbbd20836b655f
Gitweb: https://git.kernel.org/tip/0c09ab96fc820109d63097a2adcbbd20836b655f
Author: Thomas Gleixner 
AuthorDate: Tue, 9 Jul 2019 16:23:40 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 15:48:01 +0200

cpu/hotplug: Cache number of online CPUs

Re-evaluating the bitmap wheight of the online cpus bitmap in every
invocation of num_online_cpus() over and over is a pretty useless
exercise. Especially when num_online_cpus() is used in code paths
like the IPI delivery of x86 or the membarrier code.

Cache the number of online CPUs in the core and just return the cached
variable. The accessor function provides only a snapshot when used without
protection against concurrent CPU hotplug.

The storage needs to use an atomic_t because the kexec and reboot code
(ab)use set_cpu_online() in their 'shutdown' handlers without any form of
serialization as pointed out by Mathieu. Regular CPU hotplug usage is
properly serialized.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Mathieu Desnoyers 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1907091622590.1...@nanos.tec.linutronix.de

---
 include/linux/cpumask.h | 25 -
 kernel/cpu.c| 24 
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 0c7db5efe66c..b5a5a1ed9efd 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /* Don't assign or return these: may not be this big! */
@@ -95,8 +96,21 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
 #define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
 
+extern atomic_t __num_online_cpus;
+
 #if NR_CPUS > 1
-#define num_online_cpus()  cpumask_weight(cpu_online_mask)
+/**
+ * num_online_cpus() - Read the number of online CPUs
+ *
+ * Despite the fact that __num_online_cpus is of type atomic_t, this
+ * interface gives only a momentary snapshot and is not protected against
+ * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
+ * region.
+ */
+static inline unsigned int num_online_cpus(void)
+{
+   return atomic_read(&__num_online_cpus);
+}
 #define num_possible_cpus()cpumask_weight(cpu_possible_mask)
 #define num_present_cpus() cpumask_weight(cpu_present_mask)
 #define num_active_cpus()  cpumask_weight(cpu_active_mask)
@@ -821,14 +835,7 @@ set_cpu_present(unsigned int cpu, bool present)
cpumask_clear_cpu(cpu, &__cpu_present_mask);
 }
 
-static inline void
-set_cpu_online(unsigned int cpu, bool online)
-{
-   if (online)
-   cpumask_set_cpu(cpu, &__cpu_online_mask);
-   else
-   cpumask_clear_cpu(cpu, &__cpu_online_mask);
-}
+void set_cpu_online(unsigned int cpu, bool online);
 
 static inline void
 set_cpu_active(unsigned int cpu, bool active)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 05778e32674a..e1967e9eddc2 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2298,6 +2298,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
 struct cpumask __cpu_active_mask __read_mostly;
 EXPORT_SYMBOL(__cpu_active_mask);
 
+atomic_t __num_online_cpus __read_mostly;
+EXPORT_SYMBOL(__num_online_cpus);
+
 void init_cpu_present(const struct cpumask *src)
 {
cpumask_copy(&__cpu_present_mask, src);
@@ -2313,6 +2316,27 @@ void init_cpu_online(const struct cpumask *src)
cpumask_copy(&__cpu_online_mask, src);
 }
 
+void set_cpu_online(unsigned int cpu, bool online)
+{
+   /*
+* atomic_inc/dec() is required to handle the horrid abuse of this
+* function by the reboot and kexec code which invoke it from
+* IPI/NMI broadcasts when shutting down CPUs. Invocation from
+* regular CPU hotplug is properly serialized.
+*
+* Note, that the fact that __num_online_cpus is of type atomic_t
+* does not protect readers which are not serialized against
+* concurrent hotplug operations.
+*/
+   if (online) {
+   if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
+   atomic_inc(&__num_online_cpus);
+   } else {
+   if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
+   atomic_dec(&__num_online_cpus);
+   }
+}
+
 /*
  * Activate the first processor.
  */


[tip:smp/hotplug] cpumask: Implement cpumask_or_equal()

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  b9fa6442f7043e2cdd247905d4f3b80f2e9605cb
Gitweb: https://git.kernel.org/tip/b9fa6442f7043e2cdd247905d4f3b80f2e9605cb
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:24 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 15:47:37 +0200

cpumask: Implement cpumask_or_equal()

The IPI code of x86 needs to evaluate whether the target cpumask is equal
to the cpu_online_mask or equal except for the calling CPU.

To replace the current implementation which requires the usage of a
temporary cpumask, which might involve allocations, add a new function
which compares a cpumask to the result of two other cpumasks which are
or'ed together before comparison.

This allows to make the required decision in one go and the calling code
then can check for the calling CPU being set in the target mask with
cpumask_test_cpu().

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105220.585449...@linutronix.de

---
 include/linux/bitmap.h  | 23 +++
 include/linux/cpumask.h | 14 ++
 lib/bitmap.c| 20 
 3 files changed, 57 insertions(+)

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index f58e97446abc..90528f12bdfa 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -120,6 +120,10 @@ extern int __bitmap_empty(const unsigned long *bitmap, 
unsigned int nbits);
 extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
  const unsigned long *bitmap2, unsigned int nbits);
+extern bool __pure __bitmap_or_equal(const unsigned long *src1,
+const unsigned long *src2,
+const unsigned long *src3,
+unsigned int nbits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
unsigned int nbits);
 extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
@@ -321,6 +325,25 @@ static inline int bitmap_equal(const unsigned long *src1,
return __bitmap_equal(src1, src2, nbits);
 }
 
+/**
+ * bitmap_or_equal - Check whether the or of two bitnaps is equal to a third
+ * @src1:  Pointer to bitmap 1
+ * @src2:  Pointer to bitmap 2 will be or'ed with bitmap 1
+ * @src3:  Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
+ *
+ * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
+ */
+static inline bool bitmap_or_equal(const unsigned long *src1,
+  const unsigned long *src2,
+  const unsigned long *src3,
+  unsigned int nbits)
+{
+   if (!small_const_nbits(nbits))
+   return __bitmap_or_equal(src1, src2, src3, nbits);
+
+   return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits));
+}
+
 static inline int bitmap_intersects(const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
 {
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 693124900f0a..0c7db5efe66c 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -475,6 +475,20 @@ static inline bool cpumask_equal(const struct cpumask 
*src1p,
 nr_cpumask_bits);
 }
 
+/**
+ * cpumask_or_equal - *src1p | *src2p == *src3p
+ * @src1p: the first input
+ * @src2p: the second input
+ * @src3p: the third input
+ */
+static inline bool cpumask_or_equal(const struct cpumask *src1p,
+   const struct cpumask *src2p,
+   const struct cpumask *src3p)
+{
+   return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p),
+  cpumask_bits(src3p), nr_cpumask_bits);
+}
+
 /**
  * cpumask_intersects - (*src1p & *src2p) != 0
  * @src1p: the first input
diff --git a/lib/bitmap.c b/lib/bitmap.c
index bbe2589e8497..f9e834841e94 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -59,6 +59,26 @@ int __bitmap_equal(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_equal);
 
+bool __bitmap_or_equal(const unsigned long *bitmap1,
+  const unsigned long *bitmap2,
+  const unsigned long *bitmap3,
+  unsigned int bits)
+{
+   unsigned int k, lim = bits / BITS_PER_LONG;
+   unsigned long tmp;
+
+   for (k = 0; k < lim; ++k) {
+   if ((bitmap1[k] | bitmap2[k]) != bitmap3[k])
+   return false;
+   }
+
+   if (!(bits % BITS_PER_LONG))
+   return true;
+
+   tmp = (bitmap1[k] | bitmap2[k]) ^ bitmap3[k];
+   return (tmp & BITMAP_LAST_WORD_MASK(bits)) == 0;
+}
+
 void __bitmap_complement(unsigned long *dst, const unsigned long *src, 
unsigned int bits)
 {
unsigned 

[tip:smp/hotplug] smp/hotplug: Track booted once CPUs in a cpumask

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  e797bda3fd29137f6c151dfa10ea6a61c17895ce
Gitweb: https://git.kernel.org/tip/e797bda3fd29137f6c151dfa10ea6a61c17895ce
Author: Thomas Gleixner 
AuthorDate: Mon, 22 Jul 2019 20:47:16 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 15:47:37 +0200

smp/hotplug: Track booted once CPUs in a cpumask

The booted once information which is required to deal with the MCE
broadcast issue on X86 correctly is stored in the per cpu hotplug state,
which is perfectly fine for the intended purpose.

X86 needs that information for supporting NMI broadcasting via shortcuts,
but retrieving it from per cpu data is cumbersome.

Move it to a cpumask so the information can be checked against the
cpu_present_mask quickly.

No functional change intended.

Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20190722105219.818822...@linutronix.de

---
 include/linux/cpumask.h |  2 ++
 kernel/cpu.c| 11 +++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 21755471b1c3..693124900f0a 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -115,6 +115,8 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_active(cpu)((cpu) == 0)
 #endif
 
+extern cpumask_t cpus_booted_once_mask;
+
 static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e84c0873559e..05778e32674a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -62,7 +62,6 @@ struct cpuhp_cpu_state {
boolrollback;
boolsingle;
boolbringup;
-   boolbooted_once;
struct hlist_node   *node;
struct hlist_node   *last;
enum cpuhp_statecb_state;
@@ -76,6 +75,10 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = 
{
.fail = CPUHP_INVALID,
 };
 
+#ifdef CONFIG_SMP
+cpumask_t cpus_booted_once_mask;
+#endif
+
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
 static struct lockdep_map cpuhp_state_up_map =
STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", _state_up_map);
@@ -433,7 +436,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
 * core will shutdown the machine.
 */
-   return !per_cpu(cpuhp_state, cpu).booted_once;
+   return !cpumask_test_cpu(cpu, _booted_once_mask);
 }
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
@@ -1066,7 +1069,7 @@ void notify_cpu_starting(unsigned int cpu)
int ret;
 
rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
-   st->booted_once = true;
+   cpumask_set_cpu(cpu, _booted_once_mask);
while (st->state < target) {
st->state++;
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
@@ -2334,7 +2337,7 @@ void __init boot_cpu_init(void)
 void __init boot_cpu_hotplug_init(void)
 {
 #ifdef CONFIG_SMP
-   this_cpu_write(cpuhp_state.booted_once, true);
+   cpumask_set_cpu(smp_processor_id(), _booted_once_mask);
 #endif
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 }


[tip:x86/urgent] x86/hpet: Undo the early counter is counting check

2019-07-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  643d83f0a3518d6fbcf88f970de0340a5aa6b5a2
Gitweb: https://git.kernel.org/tip/643d83f0a3518d6fbcf88f970de0340a5aa6b5a2
Author: Thomas Gleixner 
AuthorDate: Thu, 25 Jul 2019 08:28:45 +0200
Committer:  Thomas Gleixner 
CommitDate: Thu, 25 Jul 2019 12:21:32 +0200

x86/hpet: Undo the early counter is counting check

Rui reported that on a Pentium D machine which has HPET forced enabled
because it is not advertised by ACPI, the early counter is counting check
leads to a silent boot hang.

The reason is that the ordering of checking the counter first and then
reconfiguring the HPET fails to work on that machine. As the HPET is not
advertised and presumably not initialized by the BIOS the early enable and
the following reconfiguration seems to bring it into a broken state. Adding
clocksource=jiffies to the command line results in the following
clocksource watchdog warning:

  clocksource: timekeeping watchdog on CPU1:
  Marking clocksource 'tsc-early' as unstable because the skew is too large:
  clocksource:  'hpet' wd_now: 33 wd_last: 33 mask: 

That clearly shows that the HPET is not counting after it got reconfigured
and reenabled. If the counter is not working then the HPET timer is not
expiring either, which explains the boot hang.

Move the counter is counting check after the full configuration again to
unbreak these systems.

Reported-by: Rui Salvaterra 
Fixes: 3222daf970f3 ("x86/hpet: Separate counter check out of clocksource 
register code")
Signed-off-by: Thomas Gleixner 
Tested-by: Rui Salvaterra 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1907250810530.1...@nanos.tec.linutronix.de

---
 arch/x86/kernel/hpet.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index c43e96a938d0..c6f791bc481e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -827,10 +827,6 @@ int __init hpet_enable(void)
if (!hpet_cfg_working())
goto out_nohpet;
 
-   /* Validate that the counter is counting */
-   if (!hpet_counting())
-   goto out_nohpet;
-
/*
 * Read the period and check for a sane value:
 */
@@ -896,6 +892,14 @@ int __init hpet_enable(void)
}
hpet_print_config();
 
+   /*
+* Validate that the counter is counting. This needs to be done
+* after sanitizing the config registers to properly deal with
+* force enabled HPETs.
+*/
+   if (!hpet_counting())
+   goto out_nohpet;
+
clocksource_register_hz(_hpet, (u32)hpet_freq);
 
if (id & HPET_ID_LEGSUP) {


[tip:smp/hotplug] cpu/hotplug: Cache number of online CPUs

2019-07-22 Thread tip-bot for Thomas Gleixner
Commit-ID:  3f70915f5be935a145d11b5f46a26627066b6261
Gitweb: https://git.kernel.org/tip/3f70915f5be935a145d11b5f46a26627066b6261
Author: Thomas Gleixner 
AuthorDate: Tue, 9 Jul 2019 16:23:40 +0200
Committer:  Thomas Gleixner 
CommitDate: Mon, 22 Jul 2019 09:52:21 +0200

cpu/hotplug: Cache number of online CPUs

Re-evaluating the bitmap wheight of the online cpus bitmap in every
invocation of num_online_cpus() over and over is a pretty useless
exercise. Especially when num_online_cpus() is used in code paths
like the IPI delivery of x86 or the membarrier code.

Cache the number of online CPUs in the core and just return the cached
variable. The accessor function provides only a snapshot when used without
protection against concurrent CPU hotplug.

The storage needs to use an atomic_t because the kexec and reboot code
(ab)use set_cpu_online() in their 'shutdown' handlers without any form of
serialization as pointed out by Mathieu. Regular CPU hotplug usage is
properly serialized.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Mathieu Desnoyers 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1907091622590.1...@nanos.tec.linutronix.de

---
 include/linux/cpumask.h | 25 -
 kernel/cpu.c| 24 
 2 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 21755471b1c3..fdd627dbfa3a 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /* Don't assign or return these: may not be this big! */
@@ -95,8 +96,21 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
 #define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
 
+extern atomic_t __num_online_cpus;
+
 #if NR_CPUS > 1
-#define num_online_cpus()  cpumask_weight(cpu_online_mask)
+/**
+ * num_online_cpus() - Read the number of online CPUs
+ *
+ * Despite the fact that __num_online_cpus is of type atomic_t, this
+ * interface gives only a momentary snapshot and is not protected against
+ * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
+ * region.
+ */
+static inline unsigned int num_online_cpus(void)
+{
+   return atomic_read(&__num_online_cpus);
+}
 #define num_possible_cpus()cpumask_weight(cpu_possible_mask)
 #define num_present_cpus() cpumask_weight(cpu_present_mask)
 #define num_active_cpus()  cpumask_weight(cpu_active_mask)
@@ -805,14 +819,7 @@ set_cpu_present(unsigned int cpu, bool present)
cpumask_clear_cpu(cpu, &__cpu_present_mask);
 }
 
-static inline void
-set_cpu_online(unsigned int cpu, bool online)
-{
-   if (online)
-   cpumask_set_cpu(cpu, &__cpu_online_mask);
-   else
-   cpumask_clear_cpu(cpu, &__cpu_online_mask);
-}
+void set_cpu_online(unsigned int cpu, bool online);
 
 static inline void
 set_cpu_active(unsigned int cpu, bool active)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e84c0873559e..3bf9881a3be5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2295,6 +2295,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
 struct cpumask __cpu_active_mask __read_mostly;
 EXPORT_SYMBOL(__cpu_active_mask);
 
+atomic_t __num_online_cpus __read_mostly;
+EXPORT_SYMBOL(__num_online_cpus);
+
 void init_cpu_present(const struct cpumask *src)
 {
cpumask_copy(&__cpu_present_mask, src);
@@ -2310,6 +2313,27 @@ void init_cpu_online(const struct cpumask *src)
cpumask_copy(&__cpu_online_mask, src);
 }
 
+void set_cpu_online(unsigned int cpu, bool online)
+{
+   /*
+* atomic_inc/dec() is required to handle the horrid abuse of this
+* function by the reboot and kexec code which invoke it from
+* IPI/NMI broadcasts when shutting down CPUs. Invocation from
+* regular CPU hotplug is properly serialized.
+*
+* Note, that the fact that __num_online_cpus is of type atomic_t
+* does not protect readers which are not serialized against
+* concurrent hotplug operations.
+*/
+   if (online) {
+   if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
+   atomic_inc(&__num_online_cpus);
+   } else {
+   if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
+   atomic_dec(&__num_online_cpus);
+   }
+}
+
 /*
  * Activate the first processor.
  */


[tip:x86/urgent] x86/entry/64: Prevent clobbering of saved CR2 value

2019-07-20 Thread tip-bot for Thomas Gleixner
Commit-ID:  6879298bd0673840cadd1fb36d7225485504ceb4
Gitweb: https://git.kernel.org/tip/6879298bd0673840cadd1fb36d7225485504ceb4
Author: Thomas Gleixner 
AuthorDate: Sat, 20 Jul 2019 10:56:41 +0200
Committer:  Thomas Gleixner 
CommitDate: Sat, 20 Jul 2019 14:28:41 +0200

x86/entry/64: Prevent clobbering of saved CR2 value

The recent fix for CR2 corruption introduced a new way to reliably corrupt
the saved CR2 value.

CR2 is saved early in the entry code in RDX, which is the third argument to
the fault handling functions. But it missed that between saving and
invoking the fault handler enter_from_user_mode() can be called. RDX is a
caller saved register so the invoked function can freely clobber it with
the obvious consequences.

The TRACE_IRQS_OFF call is safe as it calls through the thunk which
preserves RDX, but TRACE_IRQS_OFF_DEBUG is not because it also calls into
C-code outside of the thunk.

Store CR2 in R12 instead which is a callee saved register and move R12 to
RDX just before calling the fault handler.

Fixes: a0d14b8909de ("x86/mm, tracing: Fix CR2 corruption")
Reported-by: Sean Christopherson 
Signed-off-by: Thomas Gleixner 
Acked-by: Peter Zijlstra (Intel) 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1907201020540.1...@nanos.tec.linutronix.de

---
 arch/x86/entry/entry_64.S | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 7cb2e1f1ec09..f7c70c1bee8b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -875,7 +875,12 @@ apicinterrupt IRQ_WORK_VECTOR  
irq_work_interrupt  smp_irq_work_interrupt
UNWIND_HINT_REGS
 
.if \read_cr2
-   GET_CR2_INTO(%rdx); /* can clobber %rax */
+   /*
+* Store CR2 early so subsequent faults cannot clobber it. Use R12 as
+* intermediate storage as RDX can be clobbered in 
enter_from_user_mode().
+* GET_CR2_INTO can clobber RAX.
+*/
+   GET_CR2_INTO(%r12);
.endif
 
.if \shift_ist != -1
@@ -904,6 +909,10 @@ apicinterrupt IRQ_WORK_VECTOR  
irq_work_interrupt  smp_irq_work_interrupt
subq$\ist_offset, CPU_TSS_IST(\shift_ist)
.endif
 
+   .if \read_cr2
+   movq%r12, %rdx  /* Move CR2 into 3rd argument */
+   .endif
+
call\do_sym
 
.if \shift_ist != -1


[tip:sched/urgent] sched/rt, Kconfig: Introduce CONFIG_PREEMPT_RT

2019-07-18 Thread tip-bot for Thomas Gleixner
Commit-ID:  a50a3f4b6a313dc76912bd4ad3b8b4f4b479c801
Gitweb: https://git.kernel.org/tip/a50a3f4b6a313dc76912bd4ad3b8b4f4b479c801
Author: Thomas Gleixner 
AuthorDate: Wed, 17 Jul 2019 22:01:49 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 18 Jul 2019 23:10:57 +0200

sched/rt, Kconfig: Introduce CONFIG_PREEMPT_RT

Add a new entry to the preemption menu which enables the real-time support
for the kernel. The choice is only enabled when an architecture supports
it.

It selects PREEMPT as the RT features depend on it. To achieve that the
existing PREEMPT choice is renamed to PREEMPT_LL which select PREEMPT as
well.

No functional change.

Signed-off-by: Thomas Gleixner 
Acked-by: Paul E. McKenney 
Acked-by: Steven Rostedt (VMware) 
Acked-by: Clark Williams 
Acked-by: Daniel Bristot de Oliveira 
Acked-by: Frederic Weisbecker 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Marc Zyngier 
Acked-by: Daniel Wagner 
Acked-by: Luis Claudio R. Goncalves 
Acked-by: Julia Cartwright 
Acked-by: Tom Zanussi 
Acked-by: Gratian Crisan 
Acked-by: Sebastian Siewior 
Cc: Andrew Morton 
Cc: Christoph Hellwig 
Cc: Greg Kroah-Hartman 
Cc: Linus Torvalds 
Cc: Lukas Bulwahn 
Cc: Mike Galbraith 
Cc: Tejun Heo 
Link: 
http://lkml.kernel.org/r/alpine.deb.2.21.1907172200190.1...@nanos.tec.linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/Kconfig   |  3 +++
 kernel/Kconfig.preempt | 25 +++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c47b328eada0..ada51f36bd5d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -801,6 +801,9 @@ config ARCH_NO_COHERENT_DMA_MMAP
 config ARCH_NO_PREEMPT
bool
 
+config ARCH_SUPPORTS_RT
+   bool
+
 config CPU_NO_EFFICIENT_FFS
def_bool n
 
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index dc0b682ec2d9..fc020c09b7e8 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -35,10 +35,10 @@ config PREEMPT_VOLUNTARY
 
  Select this if you are building a kernel for a desktop system.
 
-config PREEMPT
+config PREEMPT_LL
bool "Preemptible Kernel (Low-Latency Desktop)"
depends on !ARCH_NO_PREEMPT
-   select PREEMPT_COUNT
+   select PREEMPT
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
  This option reduces the latency of the kernel by making
@@ -55,7 +55,28 @@ config PREEMPT
  embedded system with latency requirements in the milliseconds
  range.
 
+config PREEMPT_RT
+   bool "Fully Preemptible Kernel (Real-Time)"
+   depends on EXPERT && ARCH_SUPPORTS_RT
+   select PREEMPT
+   help
+ This option turns the kernel into a real-time kernel by replacing
+ various locking primitives (spinlocks, rwlocks, etc.) with
+ preemptible priority-inheritance aware variants, enforcing
+ interrupt threading and introducing mechanisms to break up long
+ non-preemptible sections. This makes the kernel, except for very
+ low level and critical code pathes (entry code, scheduler, low
+ level interrupt handling) fully preemptible and brings most
+ execution contexts under scheduler control.
+
+ Select this if you are building a kernel for systems which
+ require real-time guarantees.
+
 endchoice
 
 config PREEMPT_COUNT
bool
+
+config PREEMPT
+   bool
+   select PREEMPT_COUNT


[tip:sched/urgent] sched/rt, Kconfig: Introduce CONFIG_PREEMPT_RT

2019-07-18 Thread tip-bot for Thomas Gleixner
Commit-ID:  a003296b7a97dedccb0c35c4c1e44e088050b8c4
Gitweb: https://git.kernel.org/tip/a003296b7a97dedccb0c35c4c1e44e088050b8c4
Author: Thomas Gleixner 
AuthorDate: Wed, 17 Jul 2019 22:01:49 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 18 Jul 2019 19:47:11 +0200

sched/rt, Kconfig: Introduce CONFIG_PREEMPT_RT

Add a new entry to the preemption menu which enables the real-time support
for the kernel. The choice is only enabled when an architecture supports
it.

It selects PREEMPT as the RT features depend on it. To achieve that the
existing PREEMPT choice is renamed to PREEMPT_LL which select PREEMPT as
well.

No functional change.

Signed-off-by: Thomas Gleixner 
Acked-by: Paul E. McKenney 
Acked-by: Steven Rostedt (VMware) 
Acked-by: Clark Williams 
Acked-by: Daniel Bristot de Oliveira 
Acked-by: Frederic Weisbecker 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Marc Zyngier 
Acked-by: Daniel Wagner 
Acked-by: Luis Claudio R. Goncalves 
Acked-by: Julia Cartwright 
Cc: Andrew Morton 
Cc: Christoph Hellwig 
Cc: Clark Williams 
Cc: Gratian Crisan 
Cc: Greg Kroah-Hartman 
Cc: Linus Torvalds 
Cc: Lukas Bulwahn 
Cc: Mike Galbraith 
Cc: Paul McKenney 
Cc: Peter Zijlstra 
Cc: Sebastian Siewior 
Cc: Tejun Heo 
Cc: Tom Zanussi 
Link: 
http://lkml.kernel.org/r/alpine.deb.2.21.1907172200190.1...@nanos.tec.linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/Kconfig   |  3 +++
 kernel/Kconfig.preempt | 25 +++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c47b328eada0..ada51f36bd5d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -801,6 +801,9 @@ config ARCH_NO_COHERENT_DMA_MMAP
 config ARCH_NO_PREEMPT
bool
 
+config ARCH_SUPPORTS_RT
+   bool
+
 config CPU_NO_EFFICIENT_FFS
def_bool n
 
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index dc0b682ec2d9..fc020c09b7e8 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -35,10 +35,10 @@ config PREEMPT_VOLUNTARY
 
  Select this if you are building a kernel for a desktop system.
 
-config PREEMPT
+config PREEMPT_LL
bool "Preemptible Kernel (Low-Latency Desktop)"
depends on !ARCH_NO_PREEMPT
-   select PREEMPT_COUNT
+   select PREEMPT
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
  This option reduces the latency of the kernel by making
@@ -55,7 +55,28 @@ config PREEMPT
  embedded system with latency requirements in the milliseconds
  range.
 
+config PREEMPT_RT
+   bool "Fully Preemptible Kernel (Real-Time)"
+   depends on EXPERT && ARCH_SUPPORTS_RT
+   select PREEMPT
+   help
+ This option turns the kernel into a real-time kernel by replacing
+ various locking primitives (spinlocks, rwlocks, etc.) with
+ preemptible priority-inheritance aware variants, enforcing
+ interrupt threading and introducing mechanisms to break up long
+ non-preemptible sections. This makes the kernel, except for very
+ low level and critical code pathes (entry code, scheduler, low
+ level interrupt handling) fully preemptible and brings most
+ execution contexts under scheduler control.
+
+ Select this if you are building a kernel for systems which
+ require real-time guarantees.
+
 endchoice
 
 config PREEMPT_COUNT
bool
+
+config PREEMPT
+   bool
+   select PREEMPT_COUNT


[tip:sched/urgent] sched/rt, Kconfig: Introduce CONFIG_PREEMPT_RT

2019-07-18 Thread tip-bot for Thomas Gleixner
Commit-ID:  2c2ffb925b368a1f00d4ddcc837f830394861d6c
Gitweb: https://git.kernel.org/tip/2c2ffb925b368a1f00d4ddcc837f830394861d6c
Author: Thomas Gleixner 
AuthorDate: Wed, 17 Jul 2019 22:01:49 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 18 Jul 2019 14:53:32 +0200

sched/rt, Kconfig: Introduce CONFIG_PREEMPT_RT

Add a new entry to the preemption menu which enables the real-time support
for the kernel. The choice is only enabled when an architecture supports
it.

It selects PREEMPT as the RT features depend on it. To achieve that the
existing PREEMPT choice is renamed to PREEMPT_LL which select PREEMPT as
well.

No functional change.

Signed-off-by: Thomas Gleixner 
Acked-by: Paul E. McKenney 
Acked-by: Steven Rostedt (VMware) 
Acked-by: Clark Williams 
Acked-by: Daniel Bristot de Oliveira 
Acked-by: Frederic Weisbecker 
Acked-by: Peter Zijlstra (Intel) 
Acked-by: Marc Zyngier 
Acked-by: Daniel Wagner 
Acked-by: Luis Claudio R. Goncalves 
Cc: Andrew Morton 
Cc: Christoph Hellwig 
Cc: Clark Williams 
Cc: Gratian Crisan 
Cc: Greg Kroah-Hartman 
Cc: Julia Cartwright 
Cc: Linus Torvalds 
Cc: Lukas Bulwahn 
Cc: Mike Galbraith 
Cc: Paul McKenney 
Cc: Peter Zijlstra 
Cc: Sebastian Siewior 
Cc: Tejun Heo 
Cc: Tom Zanussi 
Link: 
http://lkml.kernel.org/r/alpine.deb.2.21.1907172200190.1...@nanos.tec.linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/Kconfig   |  3 +++
 kernel/Kconfig.preempt | 25 +++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c47b328eada0..ada51f36bd5d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -801,6 +801,9 @@ config ARCH_NO_COHERENT_DMA_MMAP
 config ARCH_NO_PREEMPT
bool
 
+config ARCH_SUPPORTS_RT
+   bool
+
 config CPU_NO_EFFICIENT_FFS
def_bool n
 
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index dc0b682ec2d9..fc020c09b7e8 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -35,10 +35,10 @@ config PREEMPT_VOLUNTARY
 
  Select this if you are building a kernel for a desktop system.
 
-config PREEMPT
+config PREEMPT_LL
bool "Preemptible Kernel (Low-Latency Desktop)"
depends on !ARCH_NO_PREEMPT
-   select PREEMPT_COUNT
+   select PREEMPT
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
  This option reduces the latency of the kernel by making
@@ -55,7 +55,28 @@ config PREEMPT
  embedded system with latency requirements in the milliseconds
  range.
 
+config PREEMPT_RT
+   bool "Fully Preemptible Kernel (Real-Time)"
+   depends on EXPERT && ARCH_SUPPORTS_RT
+   select PREEMPT
+   help
+ This option turns the kernel into a real-time kernel by replacing
+ various locking primitives (spinlocks, rwlocks, etc.) with
+ preemptible priority-inheritance aware variants, enforcing
+ interrupt threading and introducing mechanisms to break up long
+ non-preemptible sections. This makes the kernel, except for very
+ low level and critical code pathes (entry code, scheduler, low
+ level interrupt handling) fully preemptible and brings most
+ execution contexts under scheduler control.
+
+ Select this if you are building a kernel for systems which
+ require real-time guarantees.
+
 endchoice
 
 config PREEMPT_COUNT
bool
+
+config PREEMPT
+   bool
+   select PREEMPT_COUNT


[tip:x86/urgent] x86/asm: Move native_write_cr0/4() out of line

2019-07-10 Thread tip-bot for Thomas Gleixner
Commit-ID:  7652ac92018536eb807b6c2130100c85f1ba7e3b
Gitweb: https://git.kernel.org/tip/7652ac92018536eb807b6c2130100c85f1ba7e3b
Author: Thomas Gleixner 
AuthorDate: Wed, 10 Jul 2019 21:42:46 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 10 Jul 2019 22:15:05 +0200

x86/asm: Move native_write_cr0/4() out of line

The pinning of sensitive CR0 and CR4 bits caused a boot crash when loading
the kvm_intel module on a kernel compiled with CONFIG_PARAVIRT=n.

The reason is that the static key which controls the pinning is marked RO
after init. The kvm_intel module contains a CR4 write which requires to
update the static key entry list. That obviously does not work when the key
is in a RO section.

With CONFIG_PARAVIRT enabled this does not happen because the CR4 write
uses the paravirt indirection and the actual write function is built in.

As the key is intended to be immutable after init, move
native_write_cr0/4() out of line.

While at it consolidate the update of the cr4 shadow variable and store the
value right away when the pinning is initialized on a booting CPU. No point
in reading it back 20 instructions later. This allows to confine the static
key and the pinning variable to cpu/common and allows to mark them static.

Fixes: 8dbec27a242c ("x86/asm: Pin sensitive CR0 bits")
Fixes: 873d50d58f67 ("x86/asm: Pin sensitive CR4 bits")
Reported-by: Linus Torvalds 
Reported-by: Xi Ruoyao 
Signed-off-by: Thomas Gleixner 
Tested-by: Xi Ruoyao 
Acked-by: Kees Cook 
Acked-by: Peter Zijlstra (Intel) 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1907102140340.1...@nanos.tec.linutronix.de

---
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/include/asm/special_insns.h | 41 +---
 arch/x86/kernel/cpu/common.c | 72 
 arch/x86/kernel/smpboot.c| 14 +--
 arch/x86/xen/smp_pv.c|  1 +
 5 files changed, 61 insertions(+), 68 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3eab6ece52b4..6e0a3b43d027 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -741,6 +741,7 @@ extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
+extern void cr4_init(void);
 
 static inline unsigned long get_debugctlmsr(void)
 {
diff --git a/arch/x86/include/asm/special_insns.h 
b/arch/x86/include/asm/special_insns.h
index b2e84d113f2a..219be88a59d2 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -18,9 +18,7 @@
  */
 extern unsigned long __force_order;
 
-/* Starts false and gets enabled once CPU feature detection is done. */
-DECLARE_STATIC_KEY_FALSE(cr_pinning);
-extern unsigned long cr4_pinned_bits;
+void native_write_cr0(unsigned long val);
 
 static inline unsigned long native_read_cr0(void)
 {
@@ -29,24 +27,6 @@ static inline unsigned long native_read_cr0(void)
return val;
 }
 
-static inline void native_write_cr0(unsigned long val)
-{
-   unsigned long bits_missing = 0;
-
-set_register:
-   asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
-
-   if (static_branch_likely(_pinning)) {
-   if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
-   bits_missing = X86_CR0_WP;
-   val |= bits_missing;
-   goto set_register;
-   }
-   /* Warn after we've set the missing bits. */
-   WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
-   }
-}
-
 static inline unsigned long native_read_cr2(void)
 {
unsigned long val;
@@ -91,24 +71,7 @@ static inline unsigned long native_read_cr4(void)
return val;
 }
 
-static inline void native_write_cr4(unsigned long val)
-{
-   unsigned long bits_missing = 0;
-
-set_register:
-   asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
-
-   if (static_branch_likely(_pinning)) {
-   if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
-   bits_missing = ~val & cr4_pinned_bits;
-   val |= bits_missing;
-   goto set_register;
-   }
-   /* Warn after we've set the missing bits. */
-   WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
- bits_missing);
-   }
-}
+void native_write_cr4(unsigned long val);
 
 #ifdef CONFIG_X86_64
 static inline unsigned long native_read_cr8(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 309b6b9b49d4..11472178e17f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -366,10 +366,62 @@ out:
cr4_clear_bits(X86_CR4_UMIP);
 }
 
-DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
-EXPORT_SYMBOL(cr_pinning);
-unsigned long cr4_pinned_bits __ro_after_init;

[tip:x86/apic] x86/irq: Seperate unused system vectors from spurious entry again

2019-07-03 Thread tip-bot for Thomas Gleixner
Commit-ID:  f8a8fe61fec8006575699559ead88b0b833d5cad
Gitweb: https://git.kernel.org/tip/f8a8fe61fec8006575699559ead88b0b833d5cad
Author: Thomas Gleixner 
AuthorDate: Fri, 28 Jun 2019 13:11:54 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 3 Jul 2019 10:12:31 +0200

x86/irq: Seperate unused system vectors from spurious entry again

Quite some time ago the interrupt entry stubs for unused vectors in the
system vector range got removed and directly mapped to the spurious
interrupt vector entry point.

Sounds reasonable, but it's subtly broken. The spurious interrupt vector
entry point pushes vector number 0xFF on the stack which makes the whole
logic in __smp_spurious_interrupt() pointless.

As a consequence any spurious interrupt which comes from a vector != 0xFF
is treated as a real spurious interrupt (vector 0xFF) and not
acknowledged. That subsequently stalls all interrupt vectors of equal and
lower priority, which brings the system to a grinding halt.

This can happen because even on 64-bit the system vector space is not
guaranteed to be fully populated. A full compile time handling of the
unused vectors is not possible because quite some of them are conditonally
populated at runtime.

Bring the entry stubs back, which wastes 160 bytes if all stubs are unused,
but gains the proper handling back. There is no point to selectively spare
some of the stubs which are known at compile time as the required code in
the IDT management would be way larger and convoluted.

Do not route the spurious entries through common_interrupt and do_IRQ() as
the original code did. Route it to smp_spurious_interrupt() which evaluates
the vector number and acts accordingly now that the real vector numbers are
handed in.

Fixup the pr_warn so the actual spurious vector (0xff) is clearly
distiguished from the other vectors and also note for the vectored case
whether it was pending in the ISR or not.

 "Spurious APIC interrupt (vector 0xFF) on CPU#0, should never happen."
 "Spurious interrupt vector 0xed on CPU#1. Acked."
 "Spurious interrupt vector 0xee on CPU#1. Not pending!."

Fixes: 2414e021ac8d ("x86: Avoid building unused IRQ entry stubs")
Reported-by: Jan Kiszka 
Signed-off-by: Thomas Gleixner 
Cc: Marc Zyngier 
Cc: Jan Beulich 
Link: https://lkml.kernel.org/r/20190628111440.550568...@linutronix.de

---
 arch/x86/entry/entry_32.S | 24 
 arch/x86/entry/entry_64.S | 30 ++
 arch/x86/include/asm/hw_irq.h |  2 ++
 arch/x86/kernel/apic/apic.c   | 33 ++---
 arch/x86/kernel/idt.c |  3 ++-
 5 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..44c6e6f54bf7 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1104,6 +1104,30 @@ ENTRY(irq_entries_start)
 .endr
 END(irq_entries_start)
 
+#ifdef CONFIG_X86_LOCAL_APIC
+   .align 8
+ENTRY(spurious_entries_start)
+vector=FIRST_SYSTEM_VECTOR
+.rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+   pushl   $(~vector+0x80) /* Note: always in signed byte 
range */
+vector=vector+1
+   jmp common_spurious
+   .align  8
+.endr
+END(spurious_entries_start)
+
+common_spurious:
+   ASM_CLAC
+   addl$-0x80, (%esp)  /* Adjust vector into the 
[-256, -1] range */
+   SAVE_ALL switch_stacks=1
+   ENCODE_FRAME_POINTER
+   TRACE_IRQS_OFF
+   movl%esp, %eax
+   callsmp_spurious_interrupt
+   jmp ret_from_intr
+ENDPROC(common_interrupt)
+#endif
+
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 20e45d9b4e15..6d835991bb23 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -375,6 +375,18 @@ ENTRY(irq_entries_start)
 .endr
 END(irq_entries_start)
 
+   .align 8
+ENTRY(spurious_entries_start)
+vector=FIRST_SYSTEM_VECTOR
+.rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+   UNWIND_HINT_IRET_REGS
+   pushq   $(~vector+0x80) /* Note: always in signed byte 
range */
+   jmp common_spurious
+   .align  8
+   vector=vector+1
+.endr
+END(spurious_entries_start)
+
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
pushq %rax
@@ -571,10 +583,20 @@ _ASM_NOKPROBE(interrupt_entry)
 
 /* Interrupt entry/exit. */
 
-   /*
-* The interrupt stubs push (~vector+0x80) onto the stack and
-* then jump to common_interrupt.
-*/
+/*
+ * The interrupt stubs push (~vector+0x80) onto the stack and
+ * then jump to common_spurious/interrupt.
+ */
+common_spurious:
+   addq$-0x80, (%rsp)  /* Adjust vector to [-256, -1] 
range */
+   callinterrupt_entry
+   UNWIND_HINT_REGS indirect=1
+   call

[tip:x86/apic] x86/irq: Handle spurious interrupt after shutdown gracefully

2019-07-03 Thread tip-bot for Thomas Gleixner
Commit-ID:  b7107a67f0d125459fe41f86e8079afd1a5e0b15
Gitweb: https://git.kernel.org/tip/b7107a67f0d125459fe41f86e8079afd1a5e0b15
Author: Thomas Gleixner 
AuthorDate: Fri, 28 Jun 2019 13:11:53 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 3 Jul 2019 10:12:30 +0200

x86/irq: Handle spurious interrupt after shutdown gracefully

Since the rework of the vector management, warnings about spurious
interrupts have been reported. Robert provided some more information and
did an initial analysis. The following situation leads to these warnings:

   CPU 0  CPU 1   IO_APIC

  interrupt is raised
  sent to CPU1
  Unable to handle
  immediately
  (interrupts off,
   deep idle delay)
   mask()
   ...
   free()
 shutdown()
 synchronize_irq()
 clear_vector()
  do_IRQ()
-> vector is clear

Before the rework the vector entries of legacy interrupts were statically
assigned and occupied precious vector space while most of them were
unused. Due to that the above situation was handled silently because the
vector was handled and the core handler of the assigned interrupt
descriptor noticed that it is shut down and returned.

While this has been usually observed with legacy interrupts, this situation
is not limited to them. Any other interrupt source, e.g. MSI, can cause the
same issue.

After adding proper synchronization for level triggered interrupts, this
can only happen for edge triggered interrupts where the IO-APIC obviously
cannot provide information about interrupts in flight.

While the spurious warning is actually harmless in this case it worries
users and driver developers.

Handle it gracefully by marking the vector entry as VECTOR_SHUTDOWN instead
of VECTOR_UNUSED when the vector is freed up.

If that above late handling happens the spurious detector will not complain
and switch the entry to VECTOR_UNUSED. Any subsequent spurious interrupt on
that line will trigger the spurious warning as before.

Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode")
Reported-by: Robert Hodaszi 
Signed-off-by: Thomas Gleixner -
Tested-by: Robert Hodaszi 
Cc: Marc Zyngier 
Link: https://lkml.kernel.org/r/20190628111440.459647...@linutronix.de

---
 arch/x86/include/asm/hw_irq.h | 3 ++-
 arch/x86/kernel/apic/vector.c | 4 ++--
 arch/x86/kernel/irq.c | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 32e666e1231e..626e1ac6516e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -151,7 +151,8 @@ extern char irq_entries_start[];
 #endif
 
 #define VECTOR_UNUSED  NULL
-#define VECTOR_RETRIGGERED ((void *)~0UL)
+#define VECTOR_SHUTDOWN((void *)~0UL)
+#define VECTOR_RETRIGGERED ((void *)~1UL)
 
 typedef struct irq_desc* vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3173e07d3791..1c6d1d5f28d3 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -343,7 +343,7 @@ static void clear_irq_vector(struct irq_data *irqd)
trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
   apicd->prev_cpu);
 
-   per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+   per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
apicd->vector = 0;
 
@@ -352,7 +352,7 @@ static void clear_irq_vector(struct irq_data *irqd)
if (!vector)
return;
 
-   per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+   per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 59b5f2ea7c2f..a975246074b5 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -246,7 +246,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs 
*regs)
if (!handle_irq(desc, regs)) {
ack_APIC_irq();
 
-   if (desc != VECTOR_RETRIGGERED) {
+   if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
pr_emerg_ratelimited("%s: %d.%d No irq handler for 
vector\n",
 __func__, smp_processor_id(),
 vector);


[tip:x86/apic] x86/ioapic: Implement irq_get_irqchip_state() callback

2019-07-03 Thread tip-bot for Thomas Gleixner
Commit-ID:  dfe0cf8b51b07e56ded571e3de0a4a9382517231
Gitweb: https://git.kernel.org/tip/dfe0cf8b51b07e56ded571e3de0a4a9382517231
Author: Thomas Gleixner 
AuthorDate: Fri, 28 Jun 2019 13:11:52 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 3 Jul 2019 10:12:30 +0200

x86/ioapic: Implement irq_get_irqchip_state() callback

When an interrupt is shut down in free_irq() there might be an inflight
interrupt pending in the IO-APIC remote IRR which is not yet serviced. That
means the interrupt has been sent to the target CPUs local APIC, but the
target CPU is in a state which delays the servicing.

So free_irq() would proceed to free resources and to clear the vector
because synchronize_hardirq() does not see an interrupt handler in
progress.

That can trigger a spurious interrupt warning, which is harmless and just
confuses users, but it also can leave the remote IRR in a stale state
because once the handler is invoked the interrupt resources might be freed
already and therefore acknowledgement is not possible anymore.

Implement the irq_get_irqchip_state() callback for the IO-APIC irq chip. The
callback is invoked from free_irq() via __synchronize_hardirq(). Check the
remote IRR bit of the interrupt and return 'in flight' if it is set and the
interrupt is configured in level mode. For edge mode the remote IRR has no
meaning.

As this is only meaningful for level triggered interrupts this won't cure
the potential spurious interrupt warning for edge triggered interrupts, but
the edge trigger case does not result in stale hardware state. This has to
be addressed at the vector/interrupt entry level seperately.

Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode")
Reported-by: Robert Hodaszi 
Signed-off-by: Thomas Gleixner 
Cc: Marc Zyngier 
Link: https://lkml.kernel.org/r/20190628111440.370295...@linutronix.de

---
 arch/x86/kernel/apic/io_apic.c | 46 ++
 1 file changed, 46 insertions(+)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1bb864798800..c7bb6c69f21c 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1894,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data,
return ret;
 }
 
+/*
+ * Interrupt shutdown masks the ioapic pin, but the interrupt might already
+ * be in flight, but not yet serviced by the target CPU. That means
+ * __synchronize_hardirq() would return and claim that everything is calmed
+ * down. So free_irq() would proceed and deactivate the interrupt and free
+ * resources.
+ *
+ * Once the target CPU comes around to service it it will find a cleared
+ * vector and complain. While the spurious interrupt is harmless, the full
+ * release of resources might prevent the interrupt from being acknowledged
+ * which keeps the hardware in a weird state.
+ *
+ * Verify that the corresponding Remote-IRR bits are clear.
+ */
+static int ioapic_irq_get_chip_state(struct irq_data *irqd,
+  enum irqchip_irq_state which,
+  bool *state)
+{
+   struct mp_chip_data *mcd = irqd->chip_data;
+   struct IO_APIC_route_entry rentry;
+   struct irq_pin_list *p;
+
+   if (which != IRQCHIP_STATE_ACTIVE)
+   return -EINVAL;
+
+   *state = false;
+   raw_spin_lock(_lock);
+   for_each_irq_pin(p, mcd->irq_2_pin) {
+   rentry = __ioapic_read_entry(p->apic, p->pin);
+   /*
+* The remote IRR is only valid in level trigger mode. It's
+* meaning is undefined for edge triggered interrupts and
+* irrelevant because the IO-APIC treats them as fire and
+* forget.
+*/
+   if (rentry.irr && rentry.trigger) {
+   *state = true;
+   break;
+   }
+   }
+   raw_spin_unlock(_lock);
+   return 0;
+}
+
 static struct irq_chip ioapic_chip __read_mostly = {
.name   = "IO-APIC",
.irq_startup= startup_ioapic_irq,
@@ -1903,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_eoi= ioapic_ack_level,
.irq_set_affinity   = ioapic_set_affinity,
.irq_retrigger  = irq_chip_retrigger_hierarchy,
+   .irq_get_irqchip_state  = ioapic_irq_get_chip_state,
.flags  = IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -1915,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = {
.irq_eoi= ioapic_ir_ack_level,
.irq_set_affinity   = ioapic_set_affinity,
.irq_retrigger  = irq_chip_retrigger_hierarchy,
+   .irq_get_irqchip_state  = ioapic_irq_get_chip_state,
.flags  = IRQCHIP_SKIP_SET_WAKE,
 };
 


[tip:x86/apic] genirq: Add optional hardware synchronization for shutdown

2019-07-03 Thread tip-bot for Thomas Gleixner
Commit-ID:  62e0468650c30f0298822c580f382b16328119f6
Gitweb: https://git.kernel.org/tip/62e0468650c30f0298822c580f382b16328119f6
Author: Thomas Gleixner 
AuthorDate: Fri, 28 Jun 2019 13:11:51 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 3 Jul 2019 10:12:29 +0200

genirq: Add optional hardware synchronization for shutdown

free_irq() ensures that no hardware interrupt handler is executing on a
different CPU before actually releasing resources and deactivating the
interrupt completely in a domain hierarchy.

But that does not catch the case where the interrupt is on flight at the
hardware level but not yet serviced by the target CPU. That creates an
interesing race condition:

   CPU 0  CPU 1   IRQ CHIP

  interrupt is raised
  sent to CPU1
  Unable to handle
  immediately
  (interrupts off,
   deep idle delay)
   mask()
   ...
   free()
 shutdown()
 synchronize_irq()
 release_resources()
  do_IRQ()
-> resources are not available

That might be harmless and just trigger a spurious interrupt warning, but
some interrupt chips might get into a wedged state.

Utilize the existing irq_get_irqchip_state() callback for the
synchronization in free_irq().

synchronize_hardirq() is not using this mechanism as it might actually
deadlock unter certain conditions, e.g. when called with interrupts
disabled and the target CPU is the one on which the synchronization is
invoked. synchronize_irq() uses it because that function cannot be called
from non preemtible contexts as it might sleep.

No functional change intended and according to Marc the existing GIC
implementations where the driver supports the callback should be able
to cope with that core change. Famous last words.

Fixes: 464d12309e1b ("x86/vector: Switch IOAPIC to global reservation mode")
Reported-by: Robert Hodaszi 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Marc Zyngier 
Tested-by: Marc Zyngier 
Link: https://lkml.kernel.org/r/20190628111440.279463...@linutronix.de

---
 kernel/irq/internals.h |  4 +++
 kernel/irq/manage.c| 75 +-
 2 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 9c957f8b1198..3a948f41ab00 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -97,6 +97,10 @@ static inline void irq_mark_irq(unsigned int irq) { }
 extern void irq_mark_irq(unsigned int irq);
 #endif
 
+extern int __irq_get_irqchip_state(struct irq_data *data,
+  enum irqchip_irq_state which,
+  bool *state);
+
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 
 irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int 
*flags);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 44fc505815d6..fad61986f35c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -35,8 +35,9 @@ static int __init setup_forced_irqthreads(char *arg)
 early_param("threadirqs", setup_forced_irqthreads);
 #endif
 
-static void __synchronize_hardirq(struct irq_desc *desc)
+static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
 {
+   struct irq_data *irqd = irq_desc_get_irq_data(desc);
bool inprogress;
 
do {
@@ -52,6 +53,20 @@ static void __synchronize_hardirq(struct irq_desc *desc)
/* Ok, that indicated we're done: double-check carefully. */
raw_spin_lock_irqsave(>lock, flags);
inprogress = irqd_irq_inprogress(>irq_data);
+
+   /*
+* If requested and supported, check at the chip whether it
+* is in flight at the hardware level, i.e. already pending
+* in a CPU and waiting for service and acknowledge.
+*/
+   if (!inprogress && sync_chip) {
+   /*
+* Ignore the return code. inprogress is only updated
+* when the chip supports it.
+*/
+   __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
+   );
+   }
raw_spin_unlock_irqrestore(>lock, flags);
 
/* Oops, that failed? */
@@ -74,13 +89,18 @@ static void __synchronize_hardirq(struct irq_desc *desc)
  * Returns: false if a threaded handler is active.
  *
  * This function may be called - with care - from IRQ context.
+ *
+ * It does not check whether there is an interrupt in flight at the
+ * hardware level, but not serviced yet, as this might deadlock when
+ * called with interrupts disabled and the target CPU of the interrupt
+ * 

[tip:x86/apic] genirq: Fix misleading synchronize_irq() documentation

2019-07-03 Thread tip-bot for Thomas Gleixner
Commit-ID:  1d21f2af8571c6a6a44e7c1911780614847b0253
Gitweb: https://git.kernel.org/tip/1d21f2af8571c6a6a44e7c1911780614847b0253
Author: Thomas Gleixner 
AuthorDate: Fri, 28 Jun 2019 13:11:50 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 3 Jul 2019 10:12:29 +0200

genirq: Fix misleading synchronize_irq() documentation

The function might sleep, so it cannot be called from interrupt
context. Not even with care.

Signed-off-by: Thomas Gleixner 
Cc: Marc Zyngier 
Link: https://lkml.kernel.org/r/20190628111440.189241...@linutronix.de

---
 kernel/irq/manage.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index dc8b35f2d545..44fc505815d6 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -96,7 +96,8 @@ EXPORT_SYMBOL(synchronize_hardirq);
  * to complete before returning. If you use this function while
  * holding a resource the IRQ handler may need you will deadlock.
  *
- * This function may be called - with care - from IRQ context.
+ * Can only be called from preemptible code as it might sleep when
+ * an interrupt thread is associated to @irq.
  */
 void synchronize_irq(unsigned int irq)
 {


[tip:x86/apic] genirq: Delay deactivation in free_irq()

2019-07-03 Thread tip-bot for Thomas Gleixner
Commit-ID:  4001d8e8762f57d418b66e4e668601791900a1dd
Gitweb: https://git.kernel.org/tip/4001d8e8762f57d418b66e4e668601791900a1dd
Author: Thomas Gleixner 
AuthorDate: Fri, 28 Jun 2019 13:11:49 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 3 Jul 2019 10:12:28 +0200

genirq: Delay deactivation in free_irq()

When interrupts are shutdown, they are immediately deactivated in the
irqdomain hierarchy. While this looks obviously correct there is a subtle
issue:

There might be an interrupt in flight when free_irq() is invoking the
shutdown. This is properly handled at the irq descriptor / primary handler
level, but the deactivation might completely disable resources which are
required to acknowledge the interrupt.

Split the shutdown code and deactivate the interrupt after synchronization
in free_irq(). Fixup all other usage sites where this is not an issue to
invoke the combined shutdown_and_deactivate() function instead.

This still might be an issue if the interrupt in flight servicing is
delayed on a remote CPU beyond the invocation of synchronize_irq(), but
that cannot be handled at that level and needs to be handled in the
synchronize_irq() context.

Fixes: f8264e34965a ("irqdomain: Introduce new interfaces to support hierarchy 
irqdomains")
Reported-by: Robert Hodaszi 
Signed-off-by: Thomas Gleixner 
Reviewed-by: Marc Zyngier 
Link: https://lkml.kernel.org/r/20190628111440.098196...@linutronix.de

---
 kernel/irq/autoprobe.c  |  6 +++---
 kernel/irq/chip.c   |  6 ++
 kernel/irq/cpuhotplug.c |  2 +-
 kernel/irq/internals.h  |  1 +
 kernel/irq/manage.c | 12 +++-
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 16cbf6beb276..ae60cae24e9a 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -90,7 +90,7 @@ unsigned long probe_irq_on(void)
/* It triggered already - consider it spurious. */
if (!(desc->istate & IRQS_WAITING)) {
desc->istate &= ~IRQS_AUTODETECT;
-   irq_shutdown(desc);
+   irq_shutdown_and_deactivate(desc);
} else
if (i < 32)
mask |= 1 << i;
@@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned long val)
mask |= 1 << i;
 
desc->istate &= ~IRQS_AUTODETECT;
-   irq_shutdown(desc);
+   irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(>lock);
}
@@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val)
nr_of_irqs++;
}
desc->istate &= ~IRQS_AUTODETECT;
-   irq_shutdown(desc);
+   irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(>lock);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 51128bea3846..04fe4f989bd8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc)
}
irq_state_clr_started(desc);
}
+}
+
+
+void irq_shutdown_and_deactivate(struct irq_desc *desc)
+{
+   irq_shutdown(desc);
/*
 * This must be called even if the interrupt was never started up,
 * because the activation can happen before the interrupt is
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 5b1072e394b2..6c7ca2e983a5 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
 */
if (irqd_affinity_is_managed(d)) {
irqd_set_managed_shutdown(d);
-   irq_shutdown(desc);
+   irq_shutdown_and_deactivate(desc);
return false;
}
affinity = cpu_online_mask;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 70c3053bc1f6..9c957f8b1198 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -82,6 +82,7 @@ extern int irq_activate_and_startup(struct irq_desc *desc, 
bool resend);
 extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
 
 extern void irq_shutdown(struct irq_desc *desc);
+extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
 extern void irq_enable(struct irq_desc *desc);
 extern void irq_disable(struct irq_desc *desc);
 extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 53a081392115..dc8b35f2d545 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1699,6 

[tip:core/stacktrace] stacktrace: Use PF_KTHREAD to check for kernel threads

2019-07-03 Thread tip-bot for Thomas Gleixner
Commit-ID:  7e8e6816c6495a1168f9a7a50125d82c23e59300
Gitweb: https://git.kernel.org/tip/7e8e6816c6495a1168f9a7a50125d82c23e59300
Author: Thomas Gleixner 
AuthorDate: Tue, 2 Jul 2019 17:53:35 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 3 Jul 2019 09:04:06 +0200

stacktrace: Use PF_KTHREAD to check for kernel threads

!current->mm is not a reliable indicator for kernel threads as they might
temporarily use a user mm. Check for PF_KTHREAD instead.

Signed-off-by: Thomas Gleixner 
Acked-by: Mark Rutland 
Cc: Josh Poimboeuf 
Cc: Peter Zijlstra 
Cc: Steven Rostedt 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1907021750100.1...@nanos.tec.linutronix.de

---
 kernel/stacktrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 36139de0a3c4..c8d0f05721a1 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -228,7 +228,7 @@ unsigned int stack_trace_save_user(unsigned long *store, 
unsigned int size)
};
 
/* Trace user stack if not a kernel thread */
-   if (!current->mm)
+   if (current->flags & PF_KTHREAD)
return 0;
 
arch_stack_walk_user(consume_entry, , task_pt_regs(current));


[tip:x86/apic] x86/timer: Skip PIT initialization on modern chipsets

2019-06-29 Thread tip-bot for Thomas Gleixner
Commit-ID:  c8c4076723daca08bf35ccd68f22ea1c6219e207
Gitweb: https://git.kernel.org/tip/c8c4076723daca08bf35ccd68f22ea1c6219e207
Author: Thomas Gleixner 
AuthorDate: Fri, 28 Jun 2019 15:23:07 +0800
Committer:  Thomas Gleixner 
CommitDate: Sat, 29 Jun 2019 11:35:35 +0200

x86/timer: Skip PIT initialization on modern chipsets

Recent Intel chipsets including Skylake and ApolloLake have a special
ITSSPRC register which allows the 8254 PIT to be gated.  When gated, the
8254 registers can still be programmed as normal, but there are no IRQ0
timer interrupts.

Some products such as the Connex L1430 and exone go Rugged E11 use this
register to ship with the PIT gated by default. This causes Linux to fail
to boot:

  Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with
  apic=debug and send a report.

The panic happens before the framebuffer is initialized, so to the user, it
appears as an early boot hang on a black screen.

Affected products typically have a BIOS option that can be used to enable
the 8254 and make Linux work (Chipset -> South Cluster Configuration ->
Miscellaneous Configuration -> 8254 Clock Gating), however it would be best
to make Linux support the no-8254 case.

Modern sytems allow to discover the TSC and local APIC timer frequencies,
so the calibration against the PIT is not required. These systems have
always running timers and the local APIC timer works also in deep power
states.

So the setup of the PIT including the IO-APIC timer interrupt delivery
checks are a pointless exercise.

Skip the PIT setup and the IO-APIC timer interrupt checks on these systems,
which avoids the panic caused by non ticking PITs and also speeds up the
boot process.

Thanks to Daniel for providing the changelog, initial analysis of the
problem and testing against a variety of machines.

Reported-by: Daniel Drake 
Signed-off-by: Thomas Gleixner 
Tested-by: Daniel Drake 
Cc: b...@alien8.de
Cc: h...@zytor.com
Cc: li...@endlessm.com
Cc: rafael.j.wyso...@intel.com
Cc: hdego...@redhat.com
Link: https://lkml.kernel.org/r/20190628072307.24678-1-dr...@endlessm.com

---
 arch/x86/include/asm/apic.h|  2 ++
 arch/x86/include/asm/time.h|  1 +
 arch/x86/kernel/apic/apic.c| 27 +++
 arch/x86/kernel/apic/io_apic.c |  4 
 arch/x86/kernel/i8253.c| 25 -
 arch/x86/kernel/time.c |  7 +--
 6 files changed, 63 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c986e32b5a48..693a0ad56019 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -173,6 +173,7 @@ extern void lapic_assign_system_vectors(void);
 extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
 extern void lapic_online(void);
 extern void lapic_offline(void);
+extern bool apic_needs_pit(void);
 
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
@@ -186,6 +187,7 @@ static inline void init_bsp_APIC(void) { }
 static inline void apic_intr_mode_init(void) { }
 static inline void lapic_assign_system_vectors(void) { }
 static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
+static inline bool apic_needs_pit(void) { return true; }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index cef818b16045..8ac563abb567 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -7,6 +7,7 @@
 
 extern void hpet_time_init(void);
 extern void time_init(void);
+extern bool pit_timer_init(void);
 
 extern struct clock_event_device *global_clock_event;
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index dc4ed655dbbb..29fd50840b55 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -820,6 +820,33 @@ static int __init lapic_init_clockevent(void)
return 0;
 }
 
+bool __init apic_needs_pit(void)
+{
+   /*
+* If the frequencies are not known, PIT is required for both TSC
+* and apic timer calibration.
+*/
+   if (!tsc_khz || !cpu_khz)
+   return true;
+
+   /* Is there an APIC at all? */
+   if (!boot_cpu_has(X86_FEATURE_APIC))
+   return true;
+
+   /* Deadline timer is based on TSC so no further PIT action required */
+   if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+   return false;
+
+   /* APIC timer disabled? */
+   if (disable_apic_timer)
+   return true;
+   /*
+* The APIC timer frequency is known already, no PIT calibration
+* required. If unknown, let the PIT be initialized.
+*/
+   return lapic_timer_period == 0;
+}
+
 static int __init calibrate_APIC_clock(void)
 {
struct clock_event_device *levt = this_cpu_ptr(_events);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 

[tip:x86/timers] x86/hpet: Carve out shareable parts of init_one_hpet_msi_clockevent()

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  ea99110dd024d2f31bde19dda049f3fbf3816a70
Gitweb: https://git.kernel.org/tip/ea99110dd024d2f31bde19dda049f3fbf3816a70
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:07 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:26 +0200

x86/hpet: Carve out shareable parts of init_one_hpet_msi_clockevent()

To finally remove the static channel0/clockevent storage and to utilize the
channel 0 storage in hpet_base, it's required to run time initialize the
clockevent. The MSI clockevents already have a run time init function.

Carve out the parts which can be shared between the legacy and the MSI
implementation.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.552451...@linutronix.de

---
 arch/x86/kernel/hpet.c | 33 -
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 47eb4d36864e..80497fe5354c 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -411,6 +411,25 @@ hpet_clkevt_set_next_event(unsigned long delta, struct 
clock_event_device *evt)
return res < HPET_MIN_CYCLES ? -ETIME : 0;
 }
 
+static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating)
+{
+   struct clock_event_device *evt = >evt;
+
+   evt->rating = rating;
+   evt->irq= hc->irq;
+   evt->name   = hc->name;
+   evt->cpumask= cpumask_of(hc->cpu);
+   evt->set_state_oneshot  = hpet_clkevt_set_state_oneshot;
+   evt->set_next_event = hpet_clkevt_set_next_event;
+   evt->set_state_shutdown = hpet_clkevt_set_state_shutdown;
+
+   evt->features = CLOCK_EVT_FEAT_ONESHOT;
+   if (hc->boot_cfg & HPET_TN_PERIODIC) {
+   evt->features   |= CLOCK_EVT_FEAT_PERIODIC;
+   evt->set_state_periodic = hpet_clkevt_set_state_periodic;
+   }
+}
+
 /*
  * The HPET clock event device wrapped in a channel for conversion
  */
@@ -510,22 +529,10 @@ static void init_one_hpet_msi_clockevent(struct 
hpet_channel *hc, int cpu)
 
hc->cpu = cpu;
per_cpu(cpu_hpet_channel, cpu) = hc;
-   evt->name = hc->name;
hpet_setup_msi_irq(hc);
-   evt->irq = hc->irq;
 
-   evt->rating = 110;
-   evt->features = CLOCK_EVT_FEAT_ONESHOT;
-   if (hc->boot_cfg & HPET_TN_PERIODIC) {
-   evt->features |= CLOCK_EVT_FEAT_PERIODIC;
-   evt->set_state_periodic = hpet_clkevt_set_state_periodic;
-   }
-
-   evt->set_state_shutdown = hpet_clkevt_set_state_shutdown;
-   evt->set_state_oneshot = hpet_clkevt_set_state_oneshot;
-   evt->set_next_event = hpet_clkevt_set_next_event;
+   hpet_init_clockevent(hc, 110);
evt->tick_resume = hpet_clkevt_msi_resume;
-   evt->cpumask = cpumask_of(hc->cpu);
 
clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA,
0x7FFF);


[tip:x86/timers] x86/hpet: Wrap legacy clockevent in hpet_channel

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  18e84a2dff00c3c817161a105332cd3fc7592648
Gitweb: https://git.kernel.org/tip/18e84a2dff00c3c817161a105332cd3fc7592648
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:05 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:25 +0200

x86/hpet: Wrap legacy clockevent in hpet_channel

For HPET channel 0 there exist two clockevent structures right now:
  - the static hpet_clockevent
  - the clockevent in channel 0 storage

The goal is to use the clockevent in the channel storage, remove the static
variable and share code with the MSI implementation.

As a first step wrap the legacy clockevent into a hpet_channel struct and
convert the users.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.368141...@linutronix.de

---
 arch/x86/kernel/hpet.c | 49 +++--
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 985a2246d20c..19e3ac81c3b9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -66,7 +66,7 @@ bool  boot_hpet_disable;
 bool   hpet_force_user;
 static boolhpet_verbose;
 
-static struct clock_event_device   hpet_clockevent;
+static struct hpet_channel hpet_channel0;
 
 static inline
 struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt)
@@ -294,7 +294,7 @@ static void hpet_enable_legacy_int(void)
hpet_legacy_int_enabled = true;
 }
 
-static void hpet_legacy_clockevent_register(void)
+static void hpet_legacy_clockevent_register(struct hpet_channel *hc)
 {
/* Start HPET legacy interrupts */
hpet_enable_legacy_int();
@@ -303,10 +303,10 @@ static void hpet_legacy_clockevent_register(void)
 * Start HPET with the boot CPU's cpumask and make it global after
 * the IO_APIC has been initialized.
 */
-   hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index);
-   clockevents_config_and_register(_clockevent, hpet_freq,
+   hc->evt.cpumask = cpumask_of(boot_cpu_data.cpu_index);
+   clockevents_config_and_register(>evt, hpet_freq,
HPET_MIN_PROG_DELTA, 0x7FFF);
-   global_clock_event = _clockevent;
+   global_clock_event = >evt;
pr_debug("Clockevent registered\n");
 }
 
@@ -433,19 +433,21 @@ static int hpet_legacy_next_event(unsigned long delta,
 }
 
 /*
- * The HPET clock event device
+ * The HPET clock event device wrapped in a channel for conversion
  */
-static struct clock_event_device hpet_clockevent = {
-   .name   = "hpet",
-   .features   = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT,
-   .set_state_periodic = hpet_legacy_set_periodic,
-   .set_state_oneshot  = hpet_legacy_set_oneshot,
-   .set_state_shutdown = hpet_legacy_shutdown,
-   .tick_resume= hpet_legacy_resume,
-   .set_next_event = hpet_legacy_next_event,
-   .irq= 0,
-   .rating = 50,
+static struct hpet_channel hpet_channel0 = {
+   .evt = {
+   .name   = "hpet",
+   .features   = CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_ONESHOT,
+   .set_state_periodic = hpet_legacy_set_periodic,
+   .set_state_oneshot  = hpet_legacy_set_oneshot,
+   .set_state_shutdown = hpet_legacy_shutdown,
+   .tick_resume= hpet_legacy_resume,
+   .set_next_event = hpet_legacy_next_event,
+   .irq= 0,
+   .rating = 50,
+   }
 };
 
 /*
@@ -916,7 +918,7 @@ int __init hpet_enable(void)
clocksource_register_hz(_hpet, (u32)hpet_freq);
 
if (id & HPET_ID_LEGSUP) {
-   hpet_legacy_clockevent_register();
+   hpet_legacy_clockevent_register(_channel0);
hpet_base.channels[0].mode = HPET_MODE_LEGACY;
if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC))
hpet_base.channels[1].mode = HPET_MODE_LEGACY;
@@ -1101,10 +1103,11 @@ int hpet_rtc_timer_init(void)
return 0;
 
if (!hpet_default_delta) {
+   struct clock_event_device *evt = _channel0.evt;
uint64_t clc;
 
-   clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
-   clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT;
+   clc = (uint64_t) evt->mult * NSEC_PER_SEC;
+   clc >>= evt->shift + DEFAULT_RTC_SHIFT;

[tip:x86/timers] x86/hpet: Use channel for legacy clockevent storage

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  e44252f4fe79dd9ca93bcf4e8f74389a5b8452f5
Gitweb: https://git.kernel.org/tip/e44252f4fe79dd9ca93bcf4e8f74389a5b8452f5
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:09 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:27 +0200

x86/hpet: Use channel for legacy clockevent storage

All preparations are done. Use the channel storage for the legacy
clockevent and remove the static variable.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.737689...@linutronix.de

---
 arch/x86/kernel/hpet.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 35633e577d21..c43e96a938d0 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -66,11 +66,6 @@ bool boot_hpet_disable;
 bool   hpet_force_user;
 static boolhpet_verbose;
 
-/*
- * The HPET clock event device wrapped in a channel for conversion
- */
-static struct hpet_channel hpet_channel0;
-
 static inline
 struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt)
 {
@@ -904,7 +899,7 @@ int __init hpet_enable(void)
clocksource_register_hz(_hpet, (u32)hpet_freq);
 
if (id & HPET_ID_LEGSUP) {
-   hpet_legacy_clockevent_register(_channel0);
+   hpet_legacy_clockevent_register(_base.channels[0]);
hpet_base.channels[0].mode = HPET_MODE_LEGACY;
if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC))
hpet_base.channels[1].mode = HPET_MODE_LEGACY;
@@ -1089,7 +1084,7 @@ int hpet_rtc_timer_init(void)
return 0;
 
if (!hpet_default_delta) {
-   struct clock_event_device *evt = _channel0.evt;
+   struct clock_event_device *evt = _base.channels[0].evt;
uint64_t clc;
 
clc = (uint64_t) evt->mult * NSEC_PER_SEC;
@@ -1187,7 +1182,7 @@ int hpet_set_periodic_freq(unsigned long freq)
if (freq <= DEFAULT_RTC_INT_FREQ) {
hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq;
} else {
-   struct clock_event_device *evt = _channel0.evt;
+   struct clock_event_device *evt = _base.channels[0].evt;
 
clc = (uint64_t) evt->mult * NSEC_PER_SEC;
do_div(clc, freq);


[tip:x86/timers] x86/hpet: Use common init for legacy clockevent

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  49adaa60fa75a04457d30f38321378cdc3547212
Gitweb: https://git.kernel.org/tip/49adaa60fa75a04457d30f38321378cdc3547212
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:08 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:27 +0200

x86/hpet: Use common init for legacy clockevent

Replace the static initialization of the legacy clockevent with runtime
initialization utilizing the common init function as the last preparatory
step to switch the legacy clockevent over to the channel 0 storage in
hpet_base.

This comes with a twist. The static clockevent initializer has selected
support for periodic and oneshot mode unconditionally whether the HPET
config advertised periodic mode or not. Even the pre clockevents code did
this. But

Using the conditional in hpet_init_clockevent() makes at least Qemu and one
hardware machine fail to boot.  There are two issues which cause the boot
failure:

 #1 After the timer delivery test in IOAPIC and the IOAPIC setup the next
interrupt is not delivered despite the HPET channel being programmed
correctly. Reprogramming the HPET after switching to IOAPIC makes it
work again. After fixing this, the next issue surfaces:

 #2 Due to the unconditional periodic mode 'availability' the Local APIC
timer calibration can hijack the global clockevents event handler
without causing damage. Using oneshot at this stage makes if hang
because the HPET does not get reprogrammed due to the handler
hijacking. Duh, stupid me!

Both issues require major surgery and especially the kick HPET again after
enabling IOAPIC results in really nasty hackery.  This 'assume periodic
works' magic has survived since HPET support got added, so it's
questionable whether this should be fixed. Both Qemu and the failing
hardware machine support periodic mode despite the fact that both don't
advertise it in the configuration register and both need that extra kick
after switching to IOAPIC. Seems to be a feature...

Keep the 'assume periodic works' magic around and add a big fat comment.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.646565...@linutronix.de

---
 arch/x86/kernel/hpet.c | 87 +++---
 1 file changed, 54 insertions(+), 33 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 80497fe5354c..35633e577d21 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -66,6 +66,9 @@ bool  boot_hpet_disable;
 bool   hpet_force_user;
 static boolhpet_verbose;
 
+/*
+ * The HPET clock event device wrapped in a channel for conversion
+ */
 static struct hpet_channel hpet_channel0;
 
 static inline
@@ -294,22 +297,6 @@ static void hpet_enable_legacy_int(void)
hpet_legacy_int_enabled = true;
 }
 
-static void hpet_legacy_clockevent_register(struct hpet_channel *hc)
-{
-   /* Start HPET legacy interrupts */
-   hpet_enable_legacy_int();
-
-   /*
-* Start HPET with the boot CPU's cpumask and make it global after
-* the IO_APIC has been initialized.
-*/
-   hc->evt.cpumask = cpumask_of(boot_cpu_data.cpu_index);
-   clockevents_config_and_register(>evt, hpet_freq,
-   HPET_MIN_PROG_DELTA, 0x7FFF);
-   global_clock_event = >evt;
-   pr_debug("Clockevent registered\n");
-}
-
 static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt)
 {
unsigned int channel = clockevent_to_channel(evt)->num;
@@ -430,23 +417,57 @@ static void hpet_init_clockevent(struct hpet_channel *hc, 
unsigned int rating)
}
 }
 
-/*
- * The HPET clock event device wrapped in a channel for conversion
- */
-static struct hpet_channel hpet_channel0 = {
-   .evt = {
-   .name   = "hpet",
-   .features   = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT,
-   .set_state_periodic = hpet_clkevt_set_state_periodic,
-   .set_state_oneshot  = hpet_clkevt_set_state_oneshot,
-   .set_state_shutdown = hpet_clkevt_set_state_shutdown,
-   .tick_resume= hpet_clkevt_legacy_resume,
-   .set_next_event = hpet_clkevt_set_next_event,
-   .irq= 0,
-   .rating = 50,
-   }
-};
+static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc)
+{
+   /*
+* Start HPET with the boot CPU's cpumask and make it global after
+* the IO_APIC has been initialized.
+*/
+   hc->cpu = boot_cpu_data.cpu_index;
+   

[tip:x86/timers] x86/hpet: Move clockevents into channels

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  4d5e68330df4e79633bcde2bebcbfed1ba0421d5
Gitweb: https://git.kernel.org/tip/4d5e68330df4e79633bcde2bebcbfed1ba0421d5
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:03 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:24 +0200

x86/hpet: Move clockevents into channels

Instead of allocating yet another data structure, move the clock event data
into the channel structure. This allows further consolidation of the
reservation code and the reuse of the cached boot config to replace the
extra flags in the clockevent data.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.185851...@linutronix.de

---
 arch/x86/include/asm/hpet.h |   6 +-
 arch/x86/kernel/apic/msi.c  |   4 +-
 arch/x86/kernel/hpet.c  | 139 +++-
 3 files changed, 64 insertions(+), 85 deletions(-)

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index e3209f5de65d..6352dee37cda 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -75,15 +75,15 @@ extern unsigned int hpet_readl(unsigned int a);
 extern void force_hpet_resume(void);
 
 struct irq_data;
-struct hpet_dev;
+struct hpet_channel;
 struct irq_domain;
 
 extern void hpet_msi_unmask(struct irq_data *data);
 extern void hpet_msi_mask(struct irq_data *data);
-extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
+extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg);
 extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
 extern int hpet_assign_irq(struct irq_domain *domain,
-  struct hpet_dev *dev, int dev_num);
+  struct hpet_channel *hc, int dev_num);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
 
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index dad0dd759de2..7f7533462474 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -370,14 +370,14 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
return d;
 }
 
-int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,
+int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc,
int dev_num)
 {
struct irq_alloc_info info;
 
init_irq_alloc_info(, NULL);
info.type = X86_IRQ_ALLOC_TYPE_HPET;
-   info.hpet_data = dev;
+   info.hpet_data = hc;
info.hpet_id = hpet_dev_id(domain);
info.hpet_index = dev_num;
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 32f21b429881..7f76f07138a6 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -13,15 +13,6 @@
 #undef  pr_fmt
 #define pr_fmt(fmt) "hpet: " fmt
 
-struct hpet_dev {
-   struct clock_event_device   evt;
-   unsigned intnum;
-   int cpu;
-   unsigned intirq;
-   unsigned intflags;
-   charname[10];
-};
-
 enum hpet_mode {
HPET_MODE_UNUSED,
HPET_MODE_LEGACY,
@@ -30,14 +21,19 @@ enum hpet_mode {
 };
 
 struct hpet_channel {
+   struct clock_event_device   evt;
unsigned intnum;
+   unsigned intcpu;
unsigned intirq;
enum hpet_mode  mode;
+   unsigned intflags;
unsigned intboot_cfg;
+   charname[10];
 };
 
 struct hpet_base {
unsigned intnr_channels;
+   unsigned intnr_clockevents;
unsigned intboot_cfg;
struct hpet_channel *channels;
 };
@@ -61,8 +57,7 @@ u8hpet_blockid; /* OS 
timer block num */
 bool   hpet_msi_disable;
 
 #ifdef CONFIG_PCI_MSI
-static struct hpet_dev *hpet_devs;
-static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel);
 static struct irq_domain   *hpet_domain;
 #endif
 
@@ -79,9 +74,9 @@ static bool   hpet_verbose;
 static struct clock_event_device   hpet_clockevent;
 
 static inline
-struct hpet_dev *clockevent_to_channel(struct clock_event_device *evt)
+struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt)
 {
-   return container_of(evt, struct hpet_dev, evt);
+   return container_of(evt, struct hpet_channel, evt);
 }
 
 inline unsigned int hpet_readl(unsigned int a)
@@ -460,10 +455,9 @@ static struct clock_event_device hpet_clockevent = {
 
 void hpet_msi_unmask(struct irq_data *data)
 {
-   struct hpet_dev *hc = 

[tip:x86/timers] x86/hpet: Consolidate clockevent functions

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  310b5b3eb6ba5d3a92d783b9fa1c5a3ffb5932e9
Gitweb: https://git.kernel.org/tip/310b5b3eb6ba5d3a92d783b9fa1c5a3ffb5932e9
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:06 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:26 +0200

x86/hpet: Consolidate clockevent functions

Now that the legacy clockevent is wrapped in a hpet_channel struct most
clockevent functions can be shared between the legacy and the MSI based
clockevents.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.461437...@linutronix.de

---
 arch/x86/kernel/hpet.c | 92 ++
 1 file changed, 25 insertions(+), 67 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 19e3ac81c3b9..47eb4d36864e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -310,8 +310,9 @@ static void hpet_legacy_clockevent_register(struct 
hpet_channel *hc)
pr_debug("Clockevent registered\n");
 }
 
-static int hpet_set_periodic(struct clock_event_device *evt, int channel)
+static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt)
 {
+   unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg, cmp, now;
uint64_t delta;
 
@@ -340,8 +341,9 @@ static int hpet_set_periodic(struct clock_event_device 
*evt, int channel)
return 0;
 }
 
-static int hpet_set_oneshot(struct clock_event_device *evt, int channel)
+static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt)
 {
+   unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg;
 
cfg = hpet_readl(HPET_Tn_CFG(channel));
@@ -352,8 +354,9 @@ static int hpet_set_oneshot(struct clock_event_device *evt, 
int channel)
return 0;
 }
 
-static int hpet_shutdown(struct clock_event_device *evt, int channel)
+static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt)
 {
+   unsigned int channel = clockevent_to_channel(evt)->num;
unsigned int cfg;
 
cfg = hpet_readl(HPET_Tn_CFG(channel));
@@ -363,15 +366,17 @@ static int hpet_shutdown(struct clock_event_device *evt, 
int channel)
return 0;
 }
 
-static int hpet_resume(struct clock_event_device *evt)
+static int hpet_clkevt_legacy_resume(struct clock_event_device *evt)
 {
hpet_enable_legacy_int();
hpet_print_config();
return 0;
 }
 
-static int hpet_next_event(unsigned long delta, int channel)
+static int
+hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt)
 {
+   unsigned int channel = clockevent_to_channel(evt)->num;
u32 cnt;
s32 res;
 
@@ -406,32 +411,6 @@ static int hpet_next_event(unsigned long delta, int 
channel)
return res < HPET_MIN_CYCLES ? -ETIME : 0;
 }
 
-static int hpet_legacy_shutdown(struct clock_event_device *evt)
-{
-   return hpet_shutdown(evt, 0);
-}
-
-static int hpet_legacy_set_oneshot(struct clock_event_device *evt)
-{
-   return hpet_set_oneshot(evt, 0);
-}
-
-static int hpet_legacy_set_periodic(struct clock_event_device *evt)
-{
-   return hpet_set_periodic(evt, 0);
-}
-
-static int hpet_legacy_resume(struct clock_event_device *evt)
-{
-   return hpet_resume(evt);
-}
-
-static int hpet_legacy_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
-   return hpet_next_event(delta, 0);
-}
-
 /*
  * The HPET clock event device wrapped in a channel for conversion
  */
@@ -440,11 +419,11 @@ static struct hpet_channel hpet_channel0 = {
.name   = "hpet",
.features   = CLOCK_EVT_FEAT_PERIODIC |
  CLOCK_EVT_FEAT_ONESHOT,
-   .set_state_periodic = hpet_legacy_set_periodic,
-   .set_state_oneshot  = hpet_legacy_set_oneshot,
-   .set_state_shutdown = hpet_legacy_shutdown,
-   .tick_resume= hpet_legacy_resume,
-   .set_next_event = hpet_legacy_next_event,
+   .set_state_periodic = hpet_clkevt_set_state_periodic,
+   .set_state_oneshot  = hpet_clkevt_set_state_oneshot,
+   .set_state_shutdown = hpet_clkevt_set_state_shutdown,
+   .tick_resume= hpet_clkevt_legacy_resume,
+   .set_next_event = hpet_clkevt_set_next_event,
.irq= 0,
.rating = 50,
}
@@ -481,22 +460,7 @@ void hpet_msi_write(struct hpet_channel *hc, struct 
msi_msg *msg)
hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4);
 }
 
-static int hpet_msi_shutdown(struct clock_event_device *evt)
-{
-   return 

[tip:x86/timers] x86/hpet: Use cached info instead of extra flags

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  45e0a415634600e608188480bc355b20344f9e3f
Gitweb: https://git.kernel.org/tip/45e0a415634600e608188480bc355b20344f9e3f
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:04 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:25 +0200

x86/hpet: Use cached info instead of extra flags

Now that HPET clockevent support is integrated into the channel data, reuse
the cached boot configuration instead of copying the same information into
a flags field.

This also allows to consolidate the reservation code into one place, which
can now solely depend on the mode information.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.277510...@linutronix.de

---
 arch/x86/kernel/hpet.c | 76 +++---
 1 file changed, 23 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 7f76f07138a6..985a2246d20c 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -25,8 +25,8 @@ struct hpet_channel {
unsigned intnum;
unsigned intcpu;
unsigned intirq;
+   unsigned intin_use;
enum hpet_mode  mode;
-   unsigned intflags;
unsigned intboot_cfg;
charname[10];
 };
@@ -40,12 +40,6 @@ struct hpet_base {
 
 #define HPET_MASK  CLOCKSOURCE_MASK(32)
 
-#define HPET_DEV_USED_BIT  2
-#define HPET_DEV_USED  (1 << HPET_DEV_USED_BIT)
-#define HPET_DEV_VALID 0x8
-#define HPET_DEV_FSB_CAP   0x1000
-#define HPET_DEV_PERI_CAP  0x2000
-
 #define HPET_MIN_CYCLES128
 #define HPET_MIN_PROG_DELTA(HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 
1))
 
@@ -62,6 +56,7 @@ static struct irq_domain  *hpet_domain;
 #endif
 
 static void __iomem*hpet_virt_address;
+
 static struct hpet_basehpet_base;
 
 static boolhpet_legacy_int_enabled;
@@ -190,8 +185,6 @@ do {
\
  */
 #ifdef CONFIG_HPET
 
-static void hpet_reserve_msi_timers(struct hpet_data *hd);
-
 static void __init hpet_reserve_platform_timers(void)
 {
struct hpet_data hd;
@@ -201,11 +194,6 @@ static void __init hpet_reserve_platform_timers(void)
hd.hd_phys_address  = hpet_address;
hd.hd_address   = hpet_virt_address;
hd.hd_nirqs = hpet_base.nr_channels;
-   hpet_reserve_timer(, 0);
-
-#ifdef CONFIG_HPET_EMULATE_RTC
-   hpet_reserve_timer(, 1);
-#endif
 
/*
 * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
@@ -215,13 +203,25 @@ static void __init hpet_reserve_platform_timers(void)
hd.hd_irq[0] = HPET_LEGACY_8254;
hd.hd_irq[1] = HPET_LEGACY_RTC;
 
-   for (i = 2; i < hpet_base.nr_channels; i++)
-   hd.hd_irq[i] = hpet_base.channels[i].irq;
+   for (i = 0; i < hpet_base.nr_channels; i++) {
+   struct hpet_channel *hc = hpet_base.channels + i;
 
-   hpet_reserve_msi_timers();
+   if (i >= 2)
+   hd.hd_irq[i] = hc->irq;
 
-   hpet_alloc();
+   switch (hc->mode) {
+   case HPET_MODE_UNUSED:
+   case HPET_MODE_DEVICE:
+   hc->mode = HPET_MODE_DEVICE;
+   break;
+   case HPET_MODE_CLOCKEVT:
+   case HPET_MODE_LEGACY:
+   hpet_reserve_timer(, hc->num);
+   break;
+   }
+   }
 
+   hpet_alloc();
 }
 
 static void __init hpet_select_device_channel(void)
@@ -543,13 +543,11 @@ static int hpet_setup_irq(struct hpet_channel *hc)
return 0;
 }
 
+/* Invoked from the hotplug callback on @cpu */
 static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu)
 {
struct clock_event_device *evt = >evt;
 
-   if (!(hc->flags & HPET_DEV_VALID))
-   return;
-
hc->cpu = cpu;
per_cpu(cpu_hpet_channel, cpu) = hc;
evt->name = hc->name;
@@ -558,7 +556,7 @@ static void init_one_hpet_msi_clockevent(struct 
hpet_channel *hc, int cpu)
 
evt->rating = 110;
evt->features = CLOCK_EVT_FEAT_ONESHOT;
-   if (hc->flags & HPET_DEV_PERI_CAP) {
+   if (hc->boot_cfg & HPET_TN_PERIODIC) {
evt->features |= CLOCK_EVT_FEAT_PERIODIC;
evt->set_state_periodic = hpet_msi_set_periodic;
}
@@ -580,11 +578,9 @@ static struct hpet_channel 
*hpet_get_unused_clockevent(void)
for (i = 0; i < 

[tip:x86/timers] x86/hpet: Use cached channel data

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  2460d5878ad69c178f9ff1cc3eee9f09b017e15f
Gitweb: https://git.kernel.org/tip/2460d5878ad69c178f9ff1cc3eee9f09b017e15f
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:59 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:22 +0200

x86/hpet: Use cached channel data

Instead of rereading the HPET registers over and over use the information
which was cached in hpet_enable().

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.821728...@linutronix.de

---
 arch/x86/kernel/hpet.c | 41 -
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 59a81d7fd05b..8711f1fdef8f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -24,6 +24,7 @@ struct hpet_dev {
 
 struct hpet_channel {
unsigned intnum;
+   unsigned intirq;
unsigned intboot_cfg;
 };
 
@@ -52,7 +53,6 @@ u8hpet_blockid; /* OS 
timer block num */
 bool   hpet_msi_disable;
 
 #ifdef CONFIG_PCI_MSI
-static unsigned inthpet_num_timers;
 static struct hpet_dev *hpet_devs;
 static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
 static struct irq_domain   *hpet_domain;
@@ -189,19 +189,15 @@ do {  
\
 
 static void hpet_reserve_msi_timers(struct hpet_data *hd);
 
-static void __init hpet_reserve_platform_timers(unsigned int id)
+static void __init hpet_reserve_platform_timers(void)
 {
-   struct hpet __iomem *hpet = hpet_virt_address;
-   struct hpet_timer __iomem *timer = >hpet_timers[2];
-   unsigned int nrtimers, i;
struct hpet_data hd;
-
-   nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
+   unsigned int i;
 
memset(, 0, sizeof(hd));
hd.hd_phys_address  = hpet_address;
-   hd.hd_address   = hpet;
-   hd.hd_nirqs = nrtimers;
+   hd.hd_address   = hpet_virt_address;
+   hd.hd_nirqs = hpet_base.nr_channels;
hpet_reserve_timer(, 0);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
@@ -216,10 +212,8 @@ static void __init hpet_reserve_platform_timers(unsigned 
int id)
hd.hd_irq[0] = HPET_LEGACY_8254;
hd.hd_irq[1] = HPET_LEGACY_RTC;
 
-   for (i = 2; i < nrtimers; timer++, i++) {
-   hd.hd_irq[i] = (readl(>hpet_config) &
-   Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
-   }
+   for (i = 2; i < hpet_base.nr_channels; i++)
+   hd.hd_irq[i] = hpet_base.channels[i].irq;
 
hpet_reserve_msi_timers();
 
@@ -227,7 +221,7 @@ static void __init hpet_reserve_platform_timers(unsigned 
int id)
 
 }
 #else
-static void hpet_reserve_platform_timers(unsigned int id) { }
+static inline void hpet_reserve_platform_timers(void) { }
 #endif
 
 /* Common HPET functions */
@@ -569,7 +563,7 @@ static struct hpet_dev *hpet_get_unused_timer(void)
if (!hpet_devs)
return NULL;
 
-   for (i = 0; i < hpet_num_timers; i++) {
+   for (i = 0; i < hpet_base.nr_channels; i++) {
struct hpet_dev *hdev = _devs[i];
 
if (!(hdev->flags & HPET_DEV_VALID))
@@ -612,7 +606,6 @@ static int hpet_cpuhp_dead(unsigned int cpu)
 
 static void __init hpet_msi_capability_lookup(unsigned int start_timer)
 {
-   unsigned int id;
unsigned int num_timers;
unsigned int num_timers_used = 0;
int i, irq;
@@ -622,10 +615,8 @@ static void __init hpet_msi_capability_lookup(unsigned int 
start_timer)
 
if (boot_cpu_has(X86_FEATURE_ARAT))
return;
-   id = hpet_readl(HPET_ID);
 
-   num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
-   num_timers++; /* Value read out starts from 0 */
+   num_timers = hpet_base.nr_channels;
hpet_print_config();
 
hpet_domain = hpet_create_irq_domain(hpet_blockid);
@@ -636,11 +627,9 @@ static void __init hpet_msi_capability_lookup(unsigned int 
start_timer)
if (!hpet_devs)
return;
 
-   hpet_num_timers = num_timers;
-
for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
struct hpet_dev *hdev = _devs[num_timers_used];
-   unsigned int cfg = hpet_readl(HPET_Tn_CFG(i));
+   unsigned int cfg = hpet_base.channels[i].boot_cfg;
 
/* Only consider HPET timer with MSI support */
if (!(cfg & HPET_TN_FSB_CAP))
@@ -676,7 +665,7 @@ static void __init hpet_reserve_msi_timers(struct hpet_data 
*hd)
if (!hpet_devs)

[tip:x86/timers] x86/hpet: Introduce struct hpet_base and struct hpet_channel

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  e37f0881e9d9ec8b12f242cc2b78d93259aa7f0f
Gitweb: https://git.kernel.org/tip/e37f0881e9d9ec8b12f242cc2b78d93259aa7f0f
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:58 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:21 +0200

x86/hpet: Introduce struct hpet_base and struct hpet_channel

Introduce new data structures to replace the ad hoc collection of separate
variables and pointers.

Replace the boot configuration store and restore as a first step.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.728456...@linutronix.de

---
 arch/x86/kernel/hpet.c | 82 +-
 1 file changed, 48 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ed2d556f2c96..59a81d7fd05b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -22,6 +22,17 @@ struct hpet_dev {
charname[10];
 };
 
+struct hpet_channel {
+   unsigned intnum;
+   unsigned intboot_cfg;
+};
+
+struct hpet_base {
+   unsigned intnr_channels;
+   unsigned intboot_cfg;
+   struct hpet_channel *channels;
+};
+
 #define HPET_MASK  CLOCKSOURCE_MASK(32)
 
 #define HPET_DEV_USED_BIT  2
@@ -48,7 +59,7 @@ static struct irq_domain  *hpet_domain;
 #endif
 
 static void __iomem*hpet_virt_address;
-static u32 *hpet_boot_cfg;
+static struct hpet_basehpet_base;
 
 static boolhpet_legacy_int_enabled;
 static unsigned long   hpet_freq;
@@ -860,6 +871,7 @@ int __init hpet_enable(void)
 {
u32 hpet_period, cfg, id;
unsigned int i, channels;
+   struct hpet_channel *hc;
u64 freq;
 
if (!is_hpet_capable())
@@ -899,34 +911,39 @@ int __init hpet_enable(void)
/* This is the HPET channel number which is zero based */
channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
 
-#ifdef CONFIG_HPET_EMULATE_RTC
/*
 * The legacy routing mode needs at least two channels, tick timer
 * and the rtc emulation channel.
 */
-   if (channels < 2)
+   if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2)
goto out_nohpet;
-#endif
 
+   hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL);
+   if (!hc) {
+   pr_warn("Disabling HPET.\n");
+   goto out_nohpet;
+   }
+   hpet_base.channels = hc;
+   hpet_base.nr_channels = channels;
+
+   /* Read, store and sanitize the global configuration */
cfg = hpet_readl(HPET_CFG);
-   /* Allocate entries for the global and the channel configurations */
-   hpet_boot_cfg = kmalloc_array(channels + 1, sizeof(*hpet_boot_cfg),
- GFP_KERNEL);
-   if (hpet_boot_cfg)
-   *hpet_boot_cfg = cfg;
-   else
-   pr_warn("HPET initial state will not be saved\n");
+   hpet_base.boot_cfg = cfg;
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
hpet_writel(cfg, HPET_CFG);
if (cfg)
pr_warn("Global config: Unknown bits %#x\n", cfg);
 
-   for (i = 0; i < channels; ++i) {
+   /* Read, store and sanitize the per channel configuration */
+   for (i = 0; i < channels; i++, hc++) {
+   hc->num = i;
+
cfg = hpet_readl(HPET_Tn_CFG(i));
-   if (hpet_boot_cfg)
-   hpet_boot_cfg[i + 1] = cfg;
+   hc->boot_cfg = cfg;
+
cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB);
hpet_writel(cfg, HPET_Tn_CFG(i));
+
cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP
 | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
 | HPET_TN_FSB | HPET_TN_FSB_CAP);
@@ -944,6 +961,9 @@ int __init hpet_enable(void)
return 0;
 
 out_nohpet:
+   kfree(hpet_base.channels);
+   hpet_base.channels = NULL;
+   hpet_base.nr_channels = 0;
hpet_clear_mapping();
hpet_address = 0;
return 0;
@@ -1000,30 +1020,24 @@ fs_initcall(hpet_late_init);
 
 void hpet_disable(void)
 {
-   if (is_hpet_capable() && hpet_virt_address) {
-   unsigned int cfg = hpet_readl(HPET_CFG), id, last;
-
-   if (hpet_boot_cfg)
-   cfg = *hpet_boot_cfg;
-   else if (hpet_legacy_int_enabled) {
-   cfg &= ~HPET_CFG_LEGACY;
-   hpet_legacy_int_enabled = false;
-   }
-   cfg &= 

[tip:x86/timers] x86/hpet: Add function to select a /dev/hpet channel

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  af5a1dadf3fcf673906af1a1129b2b7528494ee5
Gitweb: https://git.kernel.org/tip/af5a1dadf3fcf673906af1a1129b2b7528494ee5
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:01 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:23 +0200

x86/hpet: Add function to select a /dev/hpet channel

If CONFIG_HPET=y is enabled the x86 specific HPET code should reserve at
least one channel for the /dev/hpet character device, so that not all
channels are absorbed for per CPU clockevent devices.

Create a function to assign HPET_MODE_DEVICE so the rework of the
clockevents allocation code can utilize the mode information instead of
reducing the number of evaluated channels by #ifdef hackery.

The function is not yet used, but provided as a separate patch for ease of
review. It will be used when the rework of the clockevent selection takes
place.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132436.002758...@linutronix.de

---
 arch/x86/kernel/hpet.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 3a8ec363d569..640ff75cc523 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -228,8 +228,25 @@ static void __init hpet_reserve_platform_timers(void)
hpet_alloc();
 
 }
+
+static void __init hpet_select_device_channel(void)
+{
+   int i;
+
+   for (i = 0; i < hpet_base.nr_channels; i++) {
+   struct hpet_channel *hc = hpet_base.channels + i;
+
+   /* Associate the first unused channel to /dev/hpet */
+   if (hc->mode == HPET_MODE_UNUSED) {
+   hc->mode = HPET_MODE_DEVICE;
+   return;
+   }
+   }
+}
+
 #else
 static inline void hpet_reserve_platform_timers(void) { }
+static inline void hpet_select_device_channel(void) {}
 #endif
 
 /* Common HPET functions */


[tip:x86/timers] x86/hpet: Add mode information to struct hpet_channel

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  9e16e4933e48819a259b8967e72e5765349953b1
Gitweb: https://git.kernel.org/tip/9e16e4933e48819a259b8967e72e5765349953b1
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:24:00 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:23 +0200

x86/hpet: Add mode information to struct hpet_channel

The usage of the individual HPET channels is not tracked in a central
place. The information is scattered in different data structures. Also the
HPET reservation in the HPET character device is split out into several
places which makes the code hard to follow.

Assigning a mode to the channel allows to consolidate the reservation code
and paves the way for further simplifications.

As a first step set the mode of the legacy channels when the HPET is in
legacy mode.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.911652...@linutronix.de

---
 arch/x86/kernel/hpet.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8711f1fdef8f..3a8ec363d569 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -22,9 +22,17 @@ struct hpet_dev {
charname[10];
 };
 
+enum hpet_mode {
+   HPET_MODE_UNUSED,
+   HPET_MODE_LEGACY,
+   HPET_MODE_CLOCKEVT,
+   HPET_MODE_DEVICE,
+};
+
 struct hpet_channel {
unsigned intnum;
unsigned intirq;
+   enum hpet_mode  mode;
unsigned intboot_cfg;
 };
 
@@ -947,6 +955,9 @@ int __init hpet_enable(void)
 
if (id & HPET_ID_LEGSUP) {
hpet_legacy_clockevent_register();
+   hpet_base.channels[0].mode = HPET_MODE_LEGACY;
+   if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC))
+   hpet_base.channels[1].mode = HPET_MODE_LEGACY;
return 1;
}
return 0;


[tip:x86/timers] x86/hpet: Shuffle code around for readability sake

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  6bdec41a0cbcbda35c9044915fc8f45503a595a0
Gitweb: https://git.kernel.org/tip/6bdec41a0cbcbda35c9044915fc8f45503a595a0
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:50 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:18 +0200

x86/hpet: Shuffle code around for readability sake

It doesn't make sense to have init functions in the middle of other
code. Aside of that, further changes in that area create horrible diffs if
the code stays where it is.

No functional change

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.951733...@linutronix.de

---
 arch/x86/kernel/hpet.c | 81 +-
 1 file changed, 41 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index d6bd0ed6885b..71533f53fa1d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -559,6 +559,47 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev 
*hdev, int cpu)
0x7FFF);
 }
 
+static struct hpet_dev *hpet_get_unused_timer(void)
+{
+   int i;
+
+   if (!hpet_devs)
+   return NULL;
+
+   for (i = 0; i < hpet_num_timers; i++) {
+   struct hpet_dev *hdev = _devs[i];
+
+   if (!(hdev->flags & HPET_DEV_VALID))
+   continue;
+   if (test_and_set_bit(HPET_DEV_USED_BIT,
+   (unsigned long *)>flags))
+   continue;
+   return hdev;
+   }
+   return NULL;
+}
+
+static int hpet_cpuhp_online(unsigned int cpu)
+{
+   struct hpet_dev *hdev = hpet_get_unused_timer();
+
+   if (hdev)
+   init_one_hpet_msi_clockevent(hdev, cpu);
+   return 0;
+}
+
+static int hpet_cpuhp_dead(unsigned int cpu)
+{
+   struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+
+   if (!hdev)
+   return 0;
+   free_irq(hdev->irq, hdev);
+   hdev->flags &= ~HPET_DEV_USED;
+   per_cpu(cpu_hpet_dev, cpu) = NULL;
+   return 0;
+}
+
 #ifdef CONFIG_HPET
 /* Reserve at least one timer for userspace (/dev/hpet) */
 #define RESERVE_TIMERS 1
@@ -644,46 +685,6 @@ static void __init hpet_reserve_msi_timers(struct 
hpet_data *hd)
 }
 #endif
 
-static struct hpet_dev *hpet_get_unused_timer(void)
-{
-   int i;
-
-   if (!hpet_devs)
-   return NULL;
-
-   for (i = 0; i < hpet_num_timers; i++) {
-   struct hpet_dev *hdev = _devs[i];
-
-   if (!(hdev->flags & HPET_DEV_VALID))
-   continue;
-   if (test_and_set_bit(HPET_DEV_USED_BIT,
-   (unsigned long *)>flags))
-   continue;
-   return hdev;
-   }
-   return NULL;
-}
-
-static int hpet_cpuhp_online(unsigned int cpu)
-{
-   struct hpet_dev *hdev = hpet_get_unused_timer();
-
-   if (hdev)
-   init_one_hpet_msi_clockevent(hdev, cpu);
-   return 0;
-}
-
-static int hpet_cpuhp_dead(unsigned int cpu)
-{
-   struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
-
-   if (!hdev)
-   return 0;
-   free_irq(hdev->irq, hdev);
-   hdev->flags &= ~HPET_DEV_USED;
-   per_cpu(cpu_hpet_dev, cpu) = NULL;
-   return 0;
-}
 #else
 
 static inline void hpet_msi_capability_lookup(unsigned int start_timer) { }


[tip:x86/timers] x86/hpet: Separate counter check out of clocksource register code

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  3222daf970f30133cc4c639cbecdc29c4ae91b2b
Gitweb: https://git.kernel.org/tip/3222daf970f30133cc4c639cbecdc29c4ae91b2b
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:51 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:18 +0200

x86/hpet: Separate counter check out of clocksource register code

The init code checks whether the HPET counter works late in the init
function when the clocksource is registered. That should happen right with
the other sanity checks.

Split it into a separate validation function and move it to the other
sanity checks.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.058540...@linutronix.de

---
 arch/x86/kernel/hpet.c | 65 --
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 71533f53fa1d..8c57dbf15e3b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -809,38 +809,6 @@ static struct clocksource clocksource_hpet = {
.resume = hpet_resume_counter,
 };
 
-static int __init hpet_clocksource_register(void)
-{
-   u64 start, now;
-   u64 t1;
-
-   /* Start the counter */
-   hpet_restart_counter();
-
-   /* Verify whether hpet counter works */
-   t1 = hpet_readl(HPET_COUNTER);
-   start = rdtsc();
-
-   /*
-* We don't know the TSC frequency yet, but waiting for
-* 20 TSC cycles is safe:
-* 4 GHz == 50us
-* 1 GHz == 200us
-*/
-   do {
-   rep_nop();
-   now = rdtsc();
-   } while ((now - start) < 20UL);
-
-   if (t1 == hpet_readl(HPET_COUNTER)) {
-   pr_warn("Counter not counting. HPET disabled\n");
-   return -ENODEV;
-   }
-
-   clocksource_register_hz(_hpet, (u32)hpet_freq);
-   return 0;
-}
-
 /*
  * AMD SB700 based systems with spread spectrum enabled use a SMM based
  * HPET emulation to provide proper frequency setting.
@@ -869,6 +837,32 @@ static bool __init hpet_cfg_working(void)
return false;
 }
 
+static bool __init hpet_counting(void)
+{
+   u64 start, now, t1;
+
+   hpet_restart_counter();
+
+   t1 = hpet_readl(HPET_COUNTER);
+   start = rdtsc();
+
+   /*
+* We don't know the TSC frequency yet, but waiting for
+* 20 TSC cycles is safe:
+* 4 GHz == 50us
+* 1 GHz == 200us
+*/
+   do {
+   rep_nop();
+   now = rdtsc();
+   } while ((now - start) < 20UL);
+
+   if (t1 == hpet_readl(HPET_COUNTER)) {
+   pr_warn("Counter not counting. HPET disabled\n");
+   return false;
+   }
+   return true;
+}
 
 /**
  * hpet_enable - Try to setup the HPET timer. Returns 1 on success.
@@ -890,6 +884,10 @@ int __init hpet_enable(void)
if (!hpet_cfg_working())
goto out_nohpet;
 
+   /* Validate that the counter is counting */
+   if (!hpet_counting())
+   goto out_nohpet;
+
/*
 * Read the period and check for a sane value:
 */
@@ -948,8 +946,7 @@ int __init hpet_enable(void)
}
hpet_print_config();
 
-   if (hpet_clocksource_register())
-   goto out_nohpet;
+   clocksource_register_hz(_hpet, (u32)hpet_freq);
 
if (id & HPET_ID_LEGSUP) {
hpet_legacy_clockevent_register();


[tip:x86/timers] x86/hpet: Decapitalize and rename EVT_TO_HPET_DEV

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  3535aa12f7f26fc755514b13aee8fac15741267e
Gitweb: https://git.kernel.org/tip/3535aa12f7f26fc755514b13aee8fac15741267e
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:53 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:19 +0200

x86/hpet: Decapitalize and rename EVT_TO_HPET_DEV

It's a function not a macro and the upcoming changes use channel for the
individual hpet timer units to allow a step by step refactoring approach.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.241032...@linutronix.de

---
 arch/x86/kernel/hpet.c | 27 ++-
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 74756c0a3a10..4cf93294bacc 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -69,9 +69,10 @@ static bool  hpet_verbose;
 
 static struct clock_event_device   hpet_clockevent;
 
-static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device 
*evtdev)
+static inline
+struct hpet_dev *clockevent_to_channel(struct clock_event_device *evt)
 {
-   return container_of(evtdev, struct hpet_dev, evt);
+   return container_of(evt, struct hpet_dev, evt);
 }
 
 inline unsigned int hpet_readl(unsigned int a)
@@ -458,28 +459,22 @@ void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg 
*msg)
 
 static int hpet_msi_shutdown(struct clock_event_device *evt)
 {
-   struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
-   return hpet_shutdown(evt, hdev->num);
+   return hpet_shutdown(evt, clockevent_to_channel(evt)->num);
 }
 
 static int hpet_msi_set_oneshot(struct clock_event_device *evt)
 {
-   struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
-   return hpet_set_oneshot(evt, hdev->num);
+   return hpet_set_oneshot(evt, clockevent_to_channel(evt)->num);
 }
 
 static int hpet_msi_set_periodic(struct clock_event_device *evt)
 {
-   struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
-   return hpet_set_periodic(evt, hdev->num);
+   return hpet_set_periodic(evt, clockevent_to_channel(evt)->num);
 }
 
 static int hpet_msi_resume(struct clock_event_device *evt)
 {
-   struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+   struct hpet_dev *hdev = clockevent_to_channel(evt);
struct irq_data *data = irq_get_irq_data(hdev->irq);
struct msi_msg msg;
 
@@ -491,16 +486,14 @@ static int hpet_msi_resume(struct clock_event_device *evt)
 }
 
 static int hpet_msi_next_event(unsigned long delta,
-   struct clock_event_device *evt)
+  struct clock_event_device *evt)
 {
-   struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-
-   return hpet_next_event(delta, hdev->num);
+   return hpet_next_event(delta, clockevent_to_channel(evt)->num);
 }
 
 static irqreturn_t hpet_interrupt_handler(int irq, void *data)
 {
-   struct hpet_dev *dev = (struct hpet_dev *)data;
+   struct hpet_dev *dev = data;
struct clock_event_device *hevt = >evt;
 
if (!hevt->event_handler) {


[tip:x86/timers] x86/hpet: Simplify counter validation

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  44b5be5733e119300115b98409cbcf9a45b8d3f1
Gitweb: https://git.kernel.org/tip/44b5be5733e119300115b98409cbcf9a45b8d3f1
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:52 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:19 +0200

x86/hpet: Simplify counter validation

There is no point to loop for 200k TSC cycles to check afterwards whether
the HPET counter is working. Read the counter inside of the loop and break
out when the counter value changed.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132435.149535...@linutronix.de

---
 arch/x86/kernel/hpet.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8c57dbf15e3b..74756c0a3a10 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -853,15 +853,13 @@ static bool __init hpet_counting(void)
 * 1 GHz == 200us
 */
do {
-   rep_nop();
+   if (t1 != hpet_readl(HPET_COUNTER))
+   return true;
now = rdtsc();
} while ((now - start) < 20UL);
 
-   if (t1 == hpet_readl(HPET_COUNTER)) {
-   pr_warn("Counter not counting. HPET disabled\n");
-   return false;
-   }
-   return true;
+   pr_warn("Counter not counting. HPET disabled\n");
+   return false;
 }
 
 /**


[tip:x86/timers] x86/hpet: Mark init functions __init

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  433526cc0502ff13d9b2fd63ba546a202dac0463
Gitweb: https://git.kernel.org/tip/433526cc0502ff13d9b2fd63ba546a202dac0463
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:47 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:17 +0200

x86/hpet: Mark init functions __init

They are only called from init code.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.645357...@linutronix.de

---
 arch/x86/kernel/hpet.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 69cd0829f432..638aaff39819 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -176,7 +176,7 @@ do {
\
 
 static void hpet_reserve_msi_timers(struct hpet_data *hd);
 
-static void hpet_reserve_platform_timers(unsigned int id)
+static void __init hpet_reserve_platform_timers(unsigned int id)
 {
struct hpet __iomem *hpet = hpet_virt_address;
struct hpet_timer __iomem *timer = >hpet_timers[2];
@@ -572,7 +572,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev 
*hdev, int cpu)
 #define RESERVE_TIMERS 0
 #endif
 
-static void hpet_msi_capability_lookup(unsigned int start_timer)
+static void __init hpet_msi_capability_lookup(unsigned int start_timer)
 {
unsigned int id;
unsigned int num_timers;
@@ -631,7 +631,7 @@ static void hpet_msi_capability_lookup(unsigned int 
start_timer)
 }
 
 #ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
+static void __init hpet_reserve_msi_timers(struct hpet_data *hd)
 {
int i;
 


[tip:x86/timers] x86/hpet: Move static and global variables to one place

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  8c273f2c81f0756f65b24771196c0eff7ac90e7b
Gitweb: https://git.kernel.org/tip/8c273f2c81f0756f65b24771196c0eff7ac90e7b
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:49 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:17 +0200

x86/hpet: Move static and global variables to one place

Having static and global variables sprinkled all over the code is just
annoying to read. Move them all to the top of the file.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.860549...@linutronix.de

---
 arch/x86/kernel/hpet.c | 50 ++
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index cb120e412dc6..d6bd0ed6885b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -23,6 +23,15 @@
 #undef  pr_fmt
 #define pr_fmt(fmt) "hpet: " fmt
 
+struct hpet_dev {
+   struct clock_event_device   evt;
+   unsigned intnum;
+   int cpu;
+   unsigned intirq;
+   unsigned intflags;
+   charname[10];
+};
+
 #define HPET_MASK  CLOCKSOURCE_MASK(32)
 
 #define HPET_DEV_USED_BIT  2
@@ -43,18 +52,22 @@ bool
hpet_msi_disable;
 
 #ifdef CONFIG_PCI_MSI
 static unsigned inthpet_num_timers;
+static struct hpet_dev *hpet_devs;
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+static struct irq_domain   *hpet_domain;
 #endif
+
 static void __iomem*hpet_virt_address;
 static u32 *hpet_boot_cfg;
 
-struct hpet_dev {
-   struct clock_event_device   evt;
-   unsigned intnum;
-   int cpu;
-   unsigned intirq;
-   unsigned intflags;
-   charname[10];
-};
+static boolhpet_legacy_int_enabled;
+static unsigned long   hpet_freq;
+
+bool   boot_hpet_disable;
+bool   hpet_force_user;
+static boolhpet_verbose;
+
+static struct clock_event_device   hpet_clockevent;
 
 static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device 
*evtdev)
 {
@@ -85,10 +98,6 @@ static inline void hpet_clear_mapping(void)
 /*
  * HPET command line enable / disable
  */
-bool boot_hpet_disable;
-bool hpet_force_user;
-static bool hpet_verbose;
-
 static int __init hpet_setup(char *str)
 {
while (str) {
@@ -120,11 +129,6 @@ static inline int is_hpet_capable(void)
return !boot_hpet_disable && hpet_address;
 }
 
-/*
- * HPET timer interrupt enable / disable
- */
-static bool hpet_legacy_int_enabled;
-
 /**
  * is_hpet_enabled - check whether the hpet timer interrupt is enabled
  */
@@ -217,13 +221,7 @@ static void __init hpet_reserve_platform_timers(unsigned 
int id)
 static void hpet_reserve_platform_timers(unsigned int id) { }
 #endif
 
-/*
- * Common hpet info
- */
-static unsigned long hpet_freq;
-
-static struct clock_event_device hpet_clockevent;
-
+/* Common hpet functions */
 static void hpet_stop_counter(void)
 {
u32 cfg = hpet_readl(HPET_CFG);
@@ -430,10 +428,6 @@ static struct clock_event_device hpet_clockevent = {
  */
 #ifdef CONFIG_PCI_MSI
 
-static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
-static struct hpet_dev *hpet_devs;
-static struct irq_domain *hpet_domain;
-
 void hpet_msi_unmask(struct irq_data *data)
 {
struct hpet_dev *hdev = irq_data_get_irq_handler_data(data);


[tip:x86/timers] x86/hpet: Remove unused parameter from hpet_next_event()

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  853acaf064acf3aad6189b36de814bd381d35133
Gitweb: https://git.kernel.org/tip/853acaf064acf3aad6189b36de814bd381d35133
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:45 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:16 +0200

x86/hpet: Remove unused parameter from hpet_next_event()

The clockevent device pointer is not used in this function.

While at it, rename the misnamed 'timer' parameter to 'channel', which makes it
clear what this parameter means.

No functional change.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.447880...@linutronix.de

---
 arch/x86/kernel/hpet.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 76d63ed62ce8..b2ec52a7773d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -347,15 +347,14 @@ static int hpet_resume(struct clock_event_device *evt)
return 0;
 }
 
-static int hpet_next_event(unsigned long delta,
-  struct clock_event_device *evt, int timer)
+static int hpet_next_event(unsigned long delta, int channel)
 {
u32 cnt;
s32 res;
 
cnt = hpet_readl(HPET_COUNTER);
cnt += (u32) delta;
-   hpet_writel(cnt, HPET_Tn_CMP(timer));
+   hpet_writel(cnt, HPET_Tn_CMP(channel));
 
/*
 * HPETs are a complete disaster. The compare register is
@@ -407,7 +406,7 @@ static int hpet_legacy_resume(struct clock_event_device 
*evt)
 static int hpet_legacy_next_event(unsigned long delta,
struct clock_event_device *evt)
 {
-   return hpet_next_event(delta, evt, 0);
+   return hpet_next_event(delta, 0);
 }
 
 /*
@@ -508,7 +507,8 @@ static int hpet_msi_next_event(unsigned long delta,
struct clock_event_device *evt)
 {
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
-   return hpet_next_event(delta, evt, hdev->num);
+
+   return hpet_next_event(delta, hdev->num);
 }
 
 static irqreturn_t hpet_interrupt_handler(int irq, void *data)


[tip:x86/timers] x86/hpet: Sanitize stub functions

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  4ce78e2094fc2736f8ecd04ec85e5566acaed516
Gitweb: https://git.kernel.org/tip/4ce78e2094fc2736f8ecd04ec85e5566acaed516
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:48 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:17 +0200

x86/hpet: Sanitize stub functions

Mark them inline and remove the pointless 'return;' statement.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.754768...@linutronix.de

---
 arch/x86/kernel/hpet.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 638aaff39819..cb120e412dc6 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -692,16 +692,10 @@ static int hpet_cpuhp_dead(unsigned int cpu)
 }
 #else
 
-static void hpet_msi_capability_lookup(unsigned int start_timer)
-{
-   return;
-}
+static inline void hpet_msi_capability_lookup(unsigned int start_timer) { }
 
 #ifdef CONFIG_HPET
-static void hpet_reserve_msi_timers(struct hpet_data *hd)
-{
-   return;
-}
+static inline void hpet_reserve_msi_timers(struct hpet_data *hd) { }
 #endif
 
 #define hpet_cpuhp_online  NULL
@@ -820,7 +814,7 @@ static struct clocksource clocksource_hpet = {
.resume = hpet_resume_counter,
 };
 
-static int hpet_clocksource_register(void)
+static int __init hpet_clocksource_register(void)
 {
u64 start, now;
u64 t1;


[tip:x86/timers] x86/hpet: Replace printk(KERN...) with pr_...()

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  46e5b64fdeb49e6f95b875fa4702cedf6c37188d
Gitweb: https://git.kernel.org/tip/46e5b64fdeb49e6f95b875fa4702cedf6c37188d
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:42 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:15 +0200

x86/hpet: Replace printk(KERN...) with pr_...()

And sanitize the format strings while at it.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.140411...@linutronix.de

---
 arch/x86/kernel/hpet.c | 45 +++--
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a6aa22677768..cf3dbf43e548 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -20,6 +20,9 @@
 #include 
 #include 
 
+#undef  pr_fmt
+#define pr_fmt(fmt) "hpet: " fmt
+
 #define HPET_MASK  CLOCKSOURCE_MASK(32)
 
 #define HPET_DEV_USED_BIT  2
@@ -137,31 +140,28 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
 static void _hpet_print_config(const char *function, int line)
 {
u32 i, timers, l, h;
-   printk(KERN_INFO "hpet: %s(%d):\n", function, line);
+   pr_info("%s(%d):\n", function, line);
l = hpet_readl(HPET_ID);
h = hpet_readl(HPET_PERIOD);
timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
-   printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
+   pr_info("ID: 0x%x, PERIOD: 0x%x\n", l, h);
l = hpet_readl(HPET_CFG);
h = hpet_readl(HPET_STATUS);
-   printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
+   pr_info("CFG: 0x%x, STATUS: 0x%x\n", l, h);
l = hpet_readl(HPET_COUNTER);
h = hpet_readl(HPET_COUNTER+4);
-   printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
+   pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
 
for (i = 0; i < timers; i++) {
l = hpet_readl(HPET_Tn_CFG(i));
h = hpet_readl(HPET_Tn_CFG(i)+4);
-   printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
-  i, l, h);
+   pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h);
l = hpet_readl(HPET_Tn_CMP(i));
h = hpet_readl(HPET_Tn_CMP(i)+4);
-   printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
-  i, l, h);
+   pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h);
l = hpet_readl(HPET_Tn_ROUTE(i));
h = hpet_readl(HPET_Tn_ROUTE(i)+4);
-   printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
-  i, l, h);
+   pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h);
}
 }
 
@@ -287,7 +287,7 @@ static void hpet_legacy_clockevent_register(void)
clockevents_config_and_register(_clockevent, hpet_freq,
HPET_MIN_PROG_DELTA, 0x7FFF);
global_clock_event = _clockevent;
-   printk(KERN_DEBUG "hpet clockevent registered\n");
+   pr_debug("Clockevent registered\n");
 }
 
 static int hpet_set_periodic(struct clock_event_device *evt, int timer)
@@ -520,8 +520,7 @@ static irqreturn_t hpet_interrupt_handler(int irq, void 
*data)
struct clock_event_device *hevt = >evt;
 
if (!hevt->event_handler) {
-   printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer 
%d\n",
-   dev->num);
+   pr_info("Spurious interrupt HPET timer %d\n", dev->num);
return IRQ_HANDLED;
}
 
@@ -541,8 +540,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
irq_set_affinity(dev->irq, cpumask_of(dev->cpu));
enable_irq(dev->irq);
 
-   printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
-dev->name, dev->irq);
+   pr_debug("%s irq %d for MSI\n", dev->name, dev->irq);
 
return 0;
 }
@@ -638,7 +636,7 @@ static void hpet_msi_capability_lookup(unsigned int 
start_timer)
break;
}
 
-   printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for 
per-cpu timer\n",
+   pr_info("%d channels of %d reserved for per-cpu timers\n",
num_timers, num_timers_used);
 }
 
@@ -856,8 +854,7 @@ static int hpet_clocksource_register(void)
} while ((now - start) < 20UL);
 
if (t1 == hpet_readl(HPET_COUNTER)) {
-   printk(KERN_WARNING
-  "HPET counter not counting. HPET disabled\n");
+   pr_warn("Counter not counting. HPET disabled\n");
return -ENODEV;
}
 
@@ -903,9 +900,7 @@ int __init hpet_enable(void)
 */
for (i = 0; hpet_readl(HPET_CFG) == 0x; i++) {
if (i 

[tip:x86/timers] x86/hpet: Remove the unused hpet_msi_read() function

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  eb8ec32c45a87efbc6683b771597084c4d904a17
Gitweb: https://git.kernel.org/tip/eb8ec32c45a87efbc6683b771597084c4d904a17
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:46 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:16 +0200

x86/hpet: Remove the unused hpet_msi_read() function

No users.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.553729...@linutronix.de

---
 arch/x86/include/asm/hpet.h | 1 -
 arch/x86/kernel/hpet.c  | 7 ---
 2 files changed, 8 deletions(-)

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 67385d56d4f4..e3209f5de65d 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -81,7 +81,6 @@ struct irq_domain;
 extern void hpet_msi_unmask(struct irq_data *data);
 extern void hpet_msi_mask(struct irq_data *data);
 extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
-extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
 extern struct irq_domain *hpet_create_irq_domain(int hpet_id);
 extern int hpet_assign_irq(struct irq_domain *domain,
   struct hpet_dev *dev, int dev_num);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index b2ec52a7773d..69cd0829f432 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -462,13 +462,6 @@ void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg 
*msg)
hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
 }
 
-void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg)
-{
-   msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
-   msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
-   msg->address_hi = 0;
-}
-
 static int hpet_msi_shutdown(struct clock_event_device *evt)
 {
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);


[tip:x86/timers] x86/hpet: Remove pointless x86-64 specific #include

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  7c4b0e0898ebff4d4821d5dd7a564903a1e88821
Gitweb: https://git.kernel.org/tip/7c4b0e0898ebff4d4821d5dd7a564903a1e88821
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:44 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:16 +0200

x86/hpet: Remove pointless x86-64 specific #include

Nothing requires asm/pgtable.h here anymore.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.339011...@linutronix.de

---
 arch/x86/kernel/hpet.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index daa97e14296b..76d63ed62ce8 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -71,10 +71,6 @@ static inline void hpet_writel(unsigned int d, unsigned int 
a)
writel(d, hpet_virt_address + a);
 }
 
-#ifdef CONFIG_X86_64
-#include 
-#endif
-
 static inline void hpet_set_mapping(void)
 {
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);


[tip:x86/timers] x86/hpet: Restructure init code

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  9b0b28de837a3a59b409613d15e90d5569938945
Gitweb: https://git.kernel.org/tip/9b0b28de837a3a59b409613d15e90d5569938945
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:43 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:15 +0200

x86/hpet: Restructure init code

As a preparatory change for further consolidation, restructure the HPET
init code so it becomes more readable. Fix up misleading and stale comments
and rename variables so they actually make sense.

No intended functional change.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.247842...@linutronix.de

---
 arch/x86/kernel/hpet.c | 81 +++---
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index cf3dbf43e548..daa97e14296b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -45,6 +45,7 @@ bool  hpet_msi_disable;
 static unsigned inthpet_num_timers;
 #endif
 static void __iomem*hpet_virt_address;
+static u32 *hpet_boot_cfg;
 
 struct hpet_dev {
struct clock_event_device   evt;
@@ -862,7 +863,34 @@ static int hpet_clocksource_register(void)
return 0;
 }
 
-static u32 *hpet_boot_cfg;
+/*
+ * AMD SB700 based systems with spread spectrum enabled use a SMM based
+ * HPET emulation to provide proper frequency setting.
+ *
+ * On such systems the SMM code is initialized with the first HPET register
+ * access and takes some time to complete. During this time the config
+ * register reads 0x. We check for max 1000 loops whether the
+ * config register reads a non-0x value to make sure that the
+ * HPET is up and running before we proceed any further.
+ *
+ * A counting loop is safe, as the HPET access takes thousands of CPU cycles.
+ *
+ * On non-SB700 based machines this check is only done once and has no
+ * side effects.
+ */
+static bool __init hpet_cfg_working(void)
+{
+   int i;
+
+   for (i = 0; i < 1000; i++) {
+   if (hpet_readl(HPET_CFG) != 0x)
+   return true;
+   }
+
+   pr_warn("Config register invalid. Disabling HPET\n");
+   return false;
+}
+
 
 /**
  * hpet_enable - Try to setup the HPET timer. Returns 1 on success.
@@ -870,8 +898,8 @@ static u32 *hpet_boot_cfg;
 int __init hpet_enable(void)
 {
u32 hpet_period, cfg, id;
+   unsigned int i, channels;
u64 freq;
-   unsigned int i, last;
 
if (!is_hpet_capable())
return 0;
@@ -880,38 +908,18 @@ int __init hpet_enable(void)
if (!hpet_virt_address)
return 0;
 
+   /* Validate that the config register is working */
+   if (!hpet_cfg_working())
+   goto out_nohpet;
+
/*
 * Read the period and check for a sane value:
 */
hpet_period = hpet_readl(HPET_PERIOD);
-
-   /*
-* AMD SB700 based systems with spread spectrum enabled use a
-* SMM based HPET emulation to provide proper frequency
-* setting. The SMM code is initialized with the first HPET
-* register access and takes some time to complete. During
-* this time the config register reads 0x. We check
-* for max. 1000 loops whether the config register reads a non
-* 0x value to make sure that HPET is up and running
-* before we go further. A counting loop is safe, as the HPET
-* access takes thousands of CPU cycles. On non SB700 based
-* machines this check is only done once and has no side
-* effects.
-*/
-   for (i = 0; hpet_readl(HPET_CFG) == 0x; i++) {
-   if (i == 1000) {
-   pr_warn("Config register invalid. Disabling HPET\n");
-   goto out_nohpet;
-   }
-   }
-
if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
goto out_nohpet;
 
-   /*
-* The period is a femto seconds value. Convert it to a
-* frequency.
-*/
+   /* The period is a femtoseconds value. Convert it to a frequency. */
freq = FSEC_PER_SEC;
do_div(freq, hpet_period);
hpet_freq = freq;
@@ -923,19 +931,21 @@ int __init hpet_enable(void)
id = hpet_readl(HPET_ID);
hpet_print_config();
 
-   last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+   /* This is the HPET channel number which is zero based */
+   channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
 
 #ifdef CONFIG_HPET_EMULATE_RTC
/*
 * The legacy routing mode needs at least two channels, tick timer

[tip:x86/timers] x86/hpet: Simplify CPU online code

2019-06-27 Thread tip-bot for Thomas Gleixner
Commit-ID:  36b9017f0250a5299bb715b3b8c41b5e2b05b320
Gitweb: https://git.kernel.org/tip/36b9017f0250a5299bb715b3b8c41b5e2b05b320
Author: Thomas Gleixner 
AuthorDate: Sun, 23 Jun 2019 15:23:41 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 28 Jun 2019 00:57:15 +0200

x86/hpet: Simplify CPU online code

The indirection via work scheduled on the upcoming CPU was necessary with the
old hotplug code because the online callback was invoked on the control CPU
not on the upcoming CPU. The rework of the CPU hotplug core guarantees that
the online callbacks are invoked on the upcoming CPU.

Remove the now pointless work redirection.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Cc: Peter Zijlstra 
Cc: Ricardo Neri 
Cc: Ashok Raj 
Cc: Andi Kleen 
Cc: Suravee Suthikulpanit 
Cc: Stephane Eranian 
Cc: Ravi Shankar 
Link: https://lkml.kernel.org/r/20190623132434.047254...@linutronix.de

---
 arch/x86/kernel/hpet.c | 31 ++-
 1 file changed, 2 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a0573f2e7763..a6aa22677768 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -547,12 +547,10 @@ static int hpet_setup_irq(struct hpet_dev *dev)
return 0;
 }
 
-/* This should be called in specific @cpu */
 static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
 {
struct clock_event_device *evt = >evt;
 
-   WARN_ON(cpu != smp_processor_id());
if (!(hdev->flags & HPET_DEV_VALID))
return;
 
@@ -684,36 +682,12 @@ static struct hpet_dev *hpet_get_unused_timer(void)
return NULL;
 }
 
-struct hpet_work_struct {
-   struct delayed_work work;
-   struct completion complete;
-};
-
-static void hpet_work(struct work_struct *w)
+static int hpet_cpuhp_online(unsigned int cpu)
 {
-   struct hpet_dev *hdev;
-   int cpu = smp_processor_id();
-   struct hpet_work_struct *hpet_work;
+   struct hpet_dev *hdev = hpet_get_unused_timer();
 
-   hpet_work = container_of(w, struct hpet_work_struct, work.work);
-
-   hdev = hpet_get_unused_timer();
if (hdev)
init_one_hpet_msi_clockevent(hdev, cpu);
-
-   complete(_work->complete);
-}
-
-static int hpet_cpuhp_online(unsigned int cpu)
-{
-   struct hpet_work_struct work;
-
-   INIT_DELAYED_WORK_ONSTACK(, hpet_work);
-   init_completion();
-   /* FIXME: add schedule_work_on() */
-   schedule_delayed_work_on(cpu, , 0);
-   wait_for_completion();
-   destroy_delayed_work_on_stack();
return 0;
 }
 
@@ -1045,7 +1019,6 @@ static __init int hpet_late_init(void)
if (boot_cpu_has(X86_FEATURE_ARAT))
return 0;
 
-   /* This notifier should be called after workqueue is ready */
ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online",
hpet_cpuhp_online, NULL);
if (ret)


[tip:timers/vdso] MAINTAINERS: Add entry for the generic VDSO library

2019-06-26 Thread tip-bot for Thomas Gleixner
Commit-ID:  e70980312a946a56173843cbc0104b3b0e57a0c7
Gitweb: https://git.kernel.org/tip/e70980312a946a56173843cbc0104b3b0e57a0c7
Author: Thomas Gleixner 
AuthorDate: Mon, 24 Jun 2019 02:34:24 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 26 Jun 2019 07:28:11 +0200

MAINTAINERS: Add entry for the generic VDSO library

Assign the following folks in alphabetic order:

 - Andy for being the VDSO wizard of x86 and in general. He's also the
   performance monitor of choice and the code in the generic library is
   heavily influenced by his previous x86 VDSO work.

 - Thomas for being the dude who has to deal with any form of time(r)
   nonsense anyway

 - Vincenzo for being the poor sod who went through all the different
   architecture implementations in order to unify them. A lot of knowledge
   gained from VDSO implementation details to the intricacies of taming the
   build system.

Signed-off-by: Thomas Gleixner 
Cc: Vincenzo Frascino 
Cc: linux-a...@vger.kernel.org
Cc: LAK 
Cc: linux-m...@vger.kernel.org
Cc: linux-kselft...@vger.kernel.org
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Arnd Bergmann 
Cc: Russell King 
Cc: Ralf Baechle 
Cc: Paul Burton 
Cc: Daniel Lezcano 
Cc: Mark Salyzyn 
Cc: Peter Collingbourne 
Cc: Shuah Khan 
Cc: Dmitry Safonov <0x7f454...@gmail.com>
Cc: Rasmus Villemoes 
Cc: Huw Davies 
Cc: Shijith Thotton 
Cc: Andre Przywara 
Cc: Dmitry Safonov 
Cc: Andrei Vagin 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Michael Kelley 
Cc: Sasha Levin 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1906240142000.32...@nanos.tec.linutronix.de

---
 MAINTAINERS | 12 
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d0ed735994a5..13ece5479167 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6664,6 +6664,18 @@ L:   k...@vger.kernel.org
 S: Supported
 F: drivers/uio/uio_pci_generic.c
 
+GENERIC VDSO LIBRARY:
+M: Andy Lutomirksi 
+M: Thomas Gleixner 
+M: Vincenzo Frascino 
+L: linux-kernel@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
timers/vdso
+S: Maintained
+F: lib/vdso
+F: kernel/time/vsyscall.c
+F: include/vdso
+F: include/asm-generic/vdso/vsyscall.h
+
 GENWQE (IBM Generic Workqueue Card)
 M: Frank Haverkamp 
 S: Supported


[tip:timers/vdso] lib/vdso: Make delta calculation work correctly

2019-06-26 Thread tip-bot for Thomas Gleixner
Commit-ID:  9d90b93bf325e015bbae31b83f16da5e4e17effa
Gitweb: https://git.kernel.org/tip/9d90b93bf325e015bbae31b83f16da5e4e17effa
Author: Thomas Gleixner 
AuthorDate: Wed, 26 Jun 2019 12:02:00 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 26 Jun 2019 14:26:53 +0200

lib/vdso: Make delta calculation work correctly

The x86 vdso implementation on which the generic vdso library is based on
has subtle (unfortunately undocumented) twists:

 1) The code assumes that the clocksource mask is U64_MAX which means that
no bits are masked. Which is true for any valid x86 VDSO clocksource.
Stupidly it still did the mask operation for no reason and at the wrong
place right after reading the clocksource.

 2) It contains a sanity check to catch the case where slightly
unsynchronized TSC values can be observed which would cause the delta
calculation to make a huge jump. It therefore checks whether the
current TSC value is larger than the value on which the current
conversion is based on. If it's not larger the base value is used to
prevent time jumps.

#1 Is not only stupid for the X86 case because it does the masking for no
reason it is also completely wrong for clocksources with a smaller mask
which can legitimately wrap around during a conversion period. The core
timekeeping code does it correct by applying the mask after the delta
calculation:

(now - base) & mask

#2 is equally broken for clocksources which have smaller masks and can wrap
around during a conversion period because there the now > base check is
just wrong and causes stale time stamps and time going backwards issues.

Unbreak it by:

  1) Removing the mask operation from the clocksource read which makes the
 fallback detection work for all clocksources

  2) Replacing the conditional delta calculation with a overrideable inline
 function.

#2 could reuse clocksource_delta() from the timekeeping code but that
results in a significant performance hit for the x86 VSDO. The timekeeping
core code must have the non optimized version as it has to operate
correctly with clocksources which have smaller masks as well to handle the
case where TSC is discarded as timekeeper clocksource and replaced by HPET
or pmtimer. For the VDSO there is no replacement clocksource. If TSC is
unusable the syscall is enforced which does the right thing.

To accommodate to the needs of various architectures provide an
override-able inline function which defaults to the regular delta
calculation with masking:

(now - base) & mask

Override it for x86 with the non-masking and checking version.

This unbreaks the ARM64 syscall fallback operation, allows to use
clocksources with arbitrary width and preserves the performance
optimization for x86.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Vincenzo Frascino 
Cc: linux-a...@vger.kernel.org
Cc: LAK 
Cc: linux-m...@vger.kernel.org
Cc: linux-kselft...@vger.kernel.org
Cc: catalin.mari...@arm.com
Cc: Will Deacon 
Cc: Arnd Bergmann 
Cc: li...@armlinux.org.uk
Cc: Ralf Baechle 
Cc: paul.bur...@mips.com
Cc: Daniel Lezcano 
Cc: saly...@android.com
Cc: p...@google.com
Cc: sh...@kernel.org
Cc: 0x7f454...@gmail.com
Cc: li...@rasmusvillemoes.dk
Cc: h...@codeweavers.com
Cc: sthot...@marvell.com
Cc: andre.przyw...@arm.com
Cc: Andy Lutomirski 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1906261159230.32...@nanos.tec.linutronix.de

---
 arch/x86/include/asm/vdso/gettimeofday.h | 27 +++
 lib/vdso/gettimeofday.c  | 19 +++
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/vdso/gettimeofday.h 
b/arch/x86/include/asm/vdso/gettimeofday.h
index 5b63f1f78a1f..a14039a59abd 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -229,6 +229,33 @@ static __always_inline const struct vdso_data 
*__arch_get_vdso_data(void)
return __vdso_data;
 }
 
+/*
+ * x86 specific delta calculation.
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @last. If not then use @last, which is the base time of the current
+ * conversion period.
+ *
+ * This variant also removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+   if (cycles > last)
+   return (cycles - last) * mult;
+   return 0;
+}
+#define vdso_calc_delta vdso_calc_delta
+
 #endif /* !__ASSEMBLY__ */
 

[tip:timers/vdso] MAINTAINERS: Add entry for the generic VDSO library

2019-06-25 Thread tip-bot for Thomas Gleixner
Commit-ID:  c82d735b3d3f0bbfd49a6a4da96bd27c4ba57eb0
Gitweb: https://git.kernel.org/tip/c82d735b3d3f0bbfd49a6a4da96bd27c4ba57eb0
Author: Thomas Gleixner 
AuthorDate: Mon, 24 Jun 2019 02:34:24 +0200
Committer:  Thomas Gleixner 
CommitDate: Tue, 25 Jun 2019 09:44:08 +0200

MAINTAINERS: Add entry for the generic VDSO library

Assign the following folks in alphabetic order:

 - Andy for being the VDSO wizard of x86 and in general. He's also the
   performance monitor of choice and the code in the generic library is
   heavily influenced by his previous x86 VDSO work.

 - Thomas for being the dude who has to deal with any form of time(r)
   nonsense anyway

 - Vincenzo for being the poor sod who went through all the different
   architecture implementations in order to unify them. A lot of knowledge
   gained from VDSO implementation details to the intricacies of taming the
   build system.

Signed-off-by: Thomas Gleixner 
Cc: Vincenzo Frascino 
Cc: linux-a...@vger.kernel.org
Cc: LAK 
Cc: linux-m...@vger.kernel.org
Cc: linux-kselft...@vger.kernel.org
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Arnd Bergmann 
Cc: Russell King 
Cc: Ralf Baechle 
Cc: Paul Burton 
Cc: Daniel Lezcano 
Cc: Mark Salyzyn 
Cc: Peter Collingbourne 
Cc: Shuah Khan 
Cc: Dmitry Safonov <0x7f454...@gmail.com>
Cc: Rasmus Villemoes 
Cc: Huw Davies 
Cc: Shijith Thotton 
Cc: Andre Przywara 
Cc: Dmitry Safonov 
Cc: Andrei Vagin 
Cc: Linus Torvalds 
Cc: Andy Lutomirski 
Cc: Michael Kelley 
Cc: Sasha Levin 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1906240142000.32...@nanos.tec.linutronix.de
---
 MAINTAINERS | 12 
 1 file changed, 12 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d0ed735994a5..13ece5479167 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6664,6 +6664,18 @@ L:   k...@vger.kernel.org
 S: Supported
 F: drivers/uio/uio_pci_generic.c
 
+GENERIC VDSO LIBRARY:
+M: Andy Lutomirksi 
+M: Thomas Gleixner 
+M: Vincenzo Frascino 
+L: linux-kernel@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
timers/vdso
+S: Maintained
+F: lib/vdso
+F: kernel/time/vsyscall.c
+F: include/vdso
+F: include/asm-generic/vdso/vsyscall.h
+
 GENWQE (IBM Generic Workqueue Card)
 M: Frank Haverkamp 
 S: Supported


[tip:x86/cpu] Documentation/x86/64: Add documentation for GS/FS addressing mode

2019-06-22 Thread tip-bot for Thomas Gleixner
Commit-ID:  2c7b5ac5d5a93c4b0557293d06c6677f765081a6
Gitweb: https://git.kernel.org/tip/2c7b5ac5d5a93c4b0557293d06c6677f765081a6
Author: Thomas Gleixner 
AuthorDate: Thu, 13 Jun 2019 22:04:24 +0300
Committer:  Thomas Gleixner 
CommitDate: Sat, 22 Jun 2019 11:38:57 +0200

Documentation/x86/64: Add documentation for GS/FS addressing mode

Explain how the GS/FS based addressing can be utilized in user space
applications along with the differences between the generic prctl() based
GS/FS base control and the FSGSBASE version available on newer CPUs.

Originally-by: Andi Kleen 
Signed-off-by: Thomas Gleixner 
Cc: "Bae, Chang Seok" 
Cc: Andy Lutomirski ,
Cc: H . Peter Anvin 
Cc: "Shankar, Ravi V" 
Cc: Randy Dunlap 
Cc: Jonathan Corbet 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1906132246310.1...@nanos.tec.linutronix.de
---
 Documentation/x86/x86_64/fsgs.rst  | 199 +
 Documentation/x86/x86_64/index.rst |   1 +
 2 files changed, 200 insertions(+)

diff --git a/Documentation/x86/x86_64/fsgs.rst 
b/Documentation/x86/x86_64/fsgs.rst
new file mode 100644
index ..380c0b5ccca2
--- /dev/null
+++ b/Documentation/x86/x86_64/fsgs.rst
@@ -0,0 +1,199 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Using FS and GS segments in user space applications
+===
+
+The x86 architecture supports segmentation. Instructions which access
+memory can use segment register based addressing mode. The following
+notation is used to address a byte within a segment:
+
+  Segment-register:Byte-address
+
+The segment base address is added to the Byte-address to compute the
+resulting virtual address which is accessed. This allows to access multiple
+instances of data with the identical Byte-address, i.e. the same code. The
+selection of a particular instance is purely based on the base-address in
+the segment register.
+
+In 32-bit mode the CPU provides 6 segments, which also support segment
+limits. The limits can be used to enforce address space protections.
+
+In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is
+always 0 to provide a full 64bit address space. The FS and GS segments are
+still functional in 64-bit mode.
+
+Common FS and GS usage
+--
+
+The FS segment is commonly used to address Thread Local Storage (TLS). FS
+is usually managed by runtime code or a threading library. Variables
+declared with the '__thread' storage class specifier are instantiated per
+thread and the compiler emits the FS: address prefix for accesses to these
+variables. Each thread has its own FS base address so common code can be
+used without complex address offset calculations to access the per thread
+instances. Applications should not use FS for other purposes when they use
+runtimes or threading libraries which manage the per thread FS.
+
+The GS segment has no common use and can be used freely by
+applications. GCC and Clang support GS based addressing via address space
+identifiers.
+
+Reading and writing the FS/GS base address
+--
+
+There exist two mechanisms to read and write the FS/FS base address:
+
+ - the arch_prctl() system call
+
+ - the FSGSBASE instruction family
+
+Accessing FS/GS base with arch_prctl()
+--
+
+ The arch_prctl(2) based mechanism is available on all 64bit CPUs and all
+ kernel versions.
+
+ Reading the base:
+
+   arch_prctl(ARCH_GET_FS, );
+   arch_prctl(ARCH_GET_GS, );
+
+ Writing the base:
+
+   arch_prctl(ARCH_SET_FS, fsbase);
+   arch_prctl(ARCH_SET_GS, gsbase);
+
+ The ARCH_SET_GS prctl may be disabled depending on kernel configuration
+ and security settings.
+
+Accessing FS/GS base with the FSGSBASE instructions
+---
+
+ With the Ivy Bridge CPU generation Intel introduced a new set of
+ instructions to access the FS and GS base registers directly from user
+ space. These instructions are also supported on AMD Family 17H CPUs. The
+ following instructions are available:
+
+  === ===
+  RDFSBASE %reg   Read the FS base register
+  RDGSBASE %reg   Read the GS base register
+  WRFSBASE %reg   Write the FS base register
+  WRGSBASE %reg   Write the GS base register
+  === ===
+
+ The instructions avoid the overhead of the arch_prctl() syscall and allow
+ more flexible usage of the FS/GS addressing modes in user space
+ applications. This does not prevent conflicts between threading libraries
+ and runtimes which utilize FS and applications which want to use it for
+ their own purpose.
+
+FSGSBASE instructions enablement
+
+ The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If
+ available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs.
+
+ The availability of the instructions does not enable them
+ automatically. The 

[tip:x86/urgent] x86/microcode: Fix the microcode load on CPU hotplug for real

2019-06-19 Thread tip-bot for Thomas Gleixner
Commit-ID:  5423f5ce5ca410b3646f355279e4e937d452e622
Gitweb: https://git.kernel.org/tip/5423f5ce5ca410b3646f355279e4e937d452e622
Author: Thomas Gleixner 
AuthorDate: Tue, 18 Jun 2019 22:31:40 +0200
Committer:  Borislav Petkov 
CommitDate: Wed, 19 Jun 2019 09:16:35 +0200

x86/microcode: Fix the microcode load on CPU hotplug for real

A recent change moved the microcode loader hotplug callback into the early
startup phase which is running with interrupts disabled. It missed that
the callbacks invoke sysfs functions which might sleep causing nice 'might
sleep' splats with proper debugging enabled.

Split the callbacks and only load the microcode in the early startup phase
and move the sysfs handling back into the later threaded and preemptible
bringup phase where it was before.

Fixes: 78f4e932f776 ("x86/microcode, cpuhotplug: Add a microcode loader CPU 
hotplug callback")
Signed-off-by: Thomas Gleixner 
Signed-off-by: Borislav Petkov 
Cc: "H. Peter Anvin" 
Cc: Ingo Molnar 
Cc: sta...@vger.kernel.org
Cc: x86-ml 
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1906182228350.1...@nanos.tec.linutronix.de
---
 arch/x86/kernel/cpu/microcode/core.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c 
b/arch/x86/kernel/cpu/microcode/core.c
index a813987b5552..cb0fdcaf1415 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = {
.resume = mc_bp_resume,
 };
 
-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
 {
-   struct device *dev;
-
-   dev = get_cpu_device(cpu);
microcode_update_cpu(cpu);
pr_debug("CPU%d added\n", cpu);
+   return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+   struct device *dev = get_cpu_device(cpu);
 
if (sysfs_create_group(>kobj, _attr_group))
pr_err("Failed to create group for CPU%d\n", cpu);
@@ -872,7 +875,9 @@ int __init microcode_init(void)
goto out_ucode_group;
 
register_syscore_ops(_syscore_ops);
-   cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, 
"x86/microcode:online",
+   cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, 
"x86/microcode:starting",
+ mc_cpu_starting, NULL);
+   cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
  mc_cpu_online, mc_cpu_down_prep);
 
pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);


[tip:timers/urgent] timekeeping: Repair ktime_get_coarse*() granularity

2019-06-14 Thread tip-bot for Thomas Gleixner
Commit-ID:  e3ff9c3678b4d80e22d2557b68726174578eaf52
Gitweb: https://git.kernel.org/tip/e3ff9c3678b4d80e22d2557b68726174578eaf52
Author: Thomas Gleixner 
AuthorDate: Thu, 13 Jun 2019 21:40:45 +0200
Committer:  Thomas Gleixner 
CommitDate: Fri, 14 Jun 2019 11:51:44 +0200

timekeeping: Repair ktime_get_coarse*() granularity

Jason reported that the coarse ktime based time getters advance only once
per second and not once per tick as advertised.

The code reads only the monotonic base time, which advances once per
second. The nanoseconds are accumulated on every tick in xtime_nsec up to
a second and the regular time getters take this nanoseconds offset into
account, but the ktime_get_coarse*() implementation fails to do so.

Add the accumulated xtime_nsec value to the monotonic base time to get the
proper per tick advancing coarse tinme.

Fixes: b9ff604cff11 ("timekeeping: Add ktime_get_coarse_with_offset")
Reported-by: Jason A. Donenfeld 
Signed-off-by: Thomas Gleixner 
Tested-by: Jason A. Donenfeld 
Cc: Arnd Bergmann 
Cc: Peter Zijlstra 
Cc: Clemens Ladisch 
Cc: Sultan Alsawaf 
Cc: Waiman Long 
Cc: sta...@vger.kernel.org
Link: 
https://lkml.kernel.org/r/alpine.deb.2.21.1906132136280.1...@nanos.tec.linutronix.de

---
 kernel/time/timekeeping.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 85f5912d8f70..44b726bab4bd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -808,17 +808,18 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
struct timekeeper *tk = _core.timekeeper;
unsigned int seq;
ktime_t base, *offset = offsets[offs];
+   u64 nsecs;
 
WARN_ON(timekeeping_suspended);
 
do {
seq = read_seqcount_begin(_core.seq);
base = ktime_add(tk->tkr_mono.base, *offset);
+   nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 
} while (read_seqcount_retry(_core.seq, seq));
 
-   return base;
-
+   return base + nsecs;
 }
 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
 


[tip:x86/paravirt] x86/paravirt: Unify the 32/64 bit paravirt patching code

2019-05-24 Thread tip-bot for Thomas Gleixner
Commit-ID:  fb2af0712fe8831dc152b0b5dd8bc516970da336
Gitweb: https://git.kernel.org/tip/fb2af0712fe8831dc152b0b5dd8bc516970da336
Author: Thomas Gleixner 
AuthorDate: Wed, 24 Apr 2019 15:41:17 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 25 Apr 2019 12:00:44 +0200

x86/paravirt: Unify the 32/64 bit paravirt patching code

Large parts of these two files are identical. Merge them together.

Signed-off-by: Thomas Gleixner 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Link: http://lkml.kernel.org/r/20190424134223.603491...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/Makefile   |  4 +-
 .../{paravirt_patch_64.c => paravirt_patch.c}  | 62 -
 arch/x86/kernel/paravirt_patch_32.c| 64 --
 3 files changed, 50 insertions(+), 80 deletions(-)

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 00b7e27bc2b7..62e78a3fd31e 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o := n
 
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o:= y
-OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y
 
 ifdef CONFIG_FRAME_POINTER
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
@@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB)+= amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
diff --git a/arch/x86/kernel/paravirt_patch_64.c 
b/arch/x86/kernel/paravirt_patch.c
similarity index 59%
rename from arch/x86/kernel/paravirt_patch_64.c
rename to arch/x86/kernel/paravirt_patch.c
index bd1558f90cfb..a47899db9932 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
+#include 
+
 #include 
 #include 
-#include 
 
-#ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_X86_64
+# ifdef CONFIG_PARAVIRT_XXL
 DEF_NATIVE(irq, irq_disable, "cli");
 DEF_NATIVE(irq, irq_enable, "sti");
 DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
@@ -12,24 +14,49 @@ DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
 DEF_NATIVE(cpu, wbinvd, "wbinvd");
-
 DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
 DEF_NATIVE(cpu, swapgs, "swapgs");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
-unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
+unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
 {
-   return paravirt_patch_insns(insnbuf, len,
-   start__mov64, end__mov64);
+   return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64);
 }
-#endif
+# endif /* CONFIG_PARAVIRT_XXL */
 
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+# ifdef CONFIG_PARAVIRT_SPINLOCKS
 DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
+# endif
+
+#else /* CONFIG_X86_64 */
+
+# ifdef CONFIG_PARAVIRT_XXL
+DEF_NATIVE(irq, irq_disable, "cli");
+DEF_NATIVE(irq, irq_enable, "sti");
+DEF_NATIVE(irq, restore_fl, "push %eax; popf");
+DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
+DEF_NATIVE(cpu, iret, "iret");
+DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+
+unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
+{
+   /* arg in %edx:%eax, return in %edx:%eax */
+   return 0;
+}
+# endif /* CONFIG_PARAVIRT_XXL */
+
+# ifdef CONFIG_PARAVIRT_SPINLOCKS
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+# endif
+
+#endif /* !CONFIG_X86_64 */
 
-unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
+unsigned int native_patch(u8 type, void *ibuf, unsigned long addr,
+ unsigned int len)
 {
 #define PATCH_SITE(ops, x) \
case PARAVIRT_PATCH(ops.x): \
@@ -41,14 +68,21 @@ unsigned native_patch(u8 type, void *ibuf, unsigned long 
addr, unsigned len)
PATCH_SITE(irq, save_fl);
PATCH_SITE(irq, irq_enable);
PATCH_SITE(irq, irq_disable);
-   PATCH_SITE(cpu, usergs_sysret64);
-   

[tip:x86/paravirt] x86/paravirt: Replace the paravirt patch asm magic

2019-05-24 Thread tip-bot for Thomas Gleixner
Commit-ID:  0b9d2fc1d0d628c94c6866a2ed3005c6730db512
Gitweb: https://git.kernel.org/tip/0b9d2fc1d0d628c94c6866a2ed3005c6730db512
Author: Thomas Gleixner 
AuthorDate: Wed, 24 Apr 2019 15:41:18 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 25 Apr 2019 12:00:44 +0200

x86/paravirt: Replace the paravirt patch asm magic

The magic macro DEF_NATIVE() in the paravirt patching code uses inline
assembly to generate a data table for patching in the native instructions.

While clever this is falling apart with LTO and even aside of LTO the
construct is just working by chance according to GCC folks.

Aside of that the tables are constant data and not some form of magic
text.

As these constructs are not subject to frequent changes it is not a
maintenance issue to convert them to regular data tables which are
initialized with hex bytes.

Create a new set of macros and data structures to store the instruction
sequences and convert the code over.

Reported-by: Andi Kleen 
Signed-off-by: Thomas Gleixner 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Link: http://lkml.kernel.org/r/20190424134223.690835...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/include/asm/paravirt_types.h |   4 -
 arch/x86/kernel/paravirt_patch.c  | 142 +++---
 2 files changed, 81 insertions(+), 65 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h 
b/arch/x86/include/asm/paravirt_types.h
index 2474e434a6f7..ae8d6ddfe39a 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -370,10 +370,6 @@ extern struct paravirt_patch_template pv_ops;
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
 
-#define DEF_NATIVE(ops, name, code)\
-   __visible extern const char start_##ops##_##name[], 
end_##ops##_##name[];   \
-   asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, 
name))
-
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
 unsigned paravirt_patch_default(u8 type, void *insnbuf,
unsigned long addr, unsigned len);
diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c
index a47899db9932..60e7a5e236c0 100644
--- a/arch/x86/kernel/paravirt_patch.c
+++ b/arch/x86/kernel/paravirt_patch.c
@@ -4,103 +4,123 @@
 #include 
 #include 
 
-#ifdef CONFIG_X86_64
-# ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq");
-DEF_NATIVE(irq, save_fl, "pushfq; popq %rax");
-DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax");
-DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax");
-DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(cpu, wbinvd, "wbinvd");
-DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
-DEF_NATIVE(cpu, swapgs, "swapgs");
-DEF_NATIVE(, mov64, "mov %rdi, %rax");
+#define PSTART(d, m)   \
+   patch_data_##d.m
 
-unsigned int paravirt_patch_ident_64(void *insnbuf, unsigned int len)
-{
-   return paravirt_patch_insns(insnbuf, len, start__mov64, end__mov64);
-}
-# endif /* CONFIG_PARAVIRT_XXL */
+#define PEND(d, m) \
+   (PSTART(d, m) + sizeof(patch_data_##d.m))
 
-# ifdef CONFIG_PARAVIRT_SPINLOCKS
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-# endif
+#define PATCH(d, m, ibuf, len) \
+   paravirt_patch_insns(ibuf, len, PSTART(d, m), PEND(d, m))
 
-#else /* CONFIG_X86_64 */
+#define PATCH_CASE(ops, m, data, ibuf, len)\
+   case PARAVIRT_PATCH(ops.m): \
+   return PATCH(data, ops##_##m, ibuf, len)
 
-# ifdef CONFIG_PARAVIRT_XXL
-DEF_NATIVE(irq, irq_disable, "cli");
-DEF_NATIVE(irq, irq_enable, "sti");
-DEF_NATIVE(irq, restore_fl, "push %eax; popf");
-DEF_NATIVE(irq, save_fl, "pushf; pop %eax");
-DEF_NATIVE(cpu, iret, "iret");
-DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
+#ifdef CONFIG_PARAVIRT_XXL
+struct patch_xxl {
+   const unsigned char irq_irq_disable[1];
+   const unsigned char irq_irq_enable[1];
+   const unsigned char irq_restore_fl[2];
+   const unsigned char irq_save_fl[2];
+   const unsigned char mmu_read_cr2[3];
+   const unsigned char mmu_read_cr3[3];
+   const unsigned char mmu_write_cr3[3];
+# ifdef CONFIG_X86_64
+   const unsigned char cpu_wbinvd[2];
+   const unsigned char cpu_usergs_sysret64[6];
+   const unsigned char 

[tip:x86/paravirt] x86/paravirt: Remove bogus extern declarations

2019-05-24 Thread tip-bot for Thomas Gleixner
Commit-ID:  e05196401657cff3178dc392b739e520b26d4aef
Gitweb: https://git.kernel.org/tip/e05196401657cff3178dc392b739e520b26d4aef
Author: Thomas Gleixner 
AuthorDate: Wed, 24 Apr 2019 15:41:16 +0200
Committer:  Ingo Molnar 
CommitDate: Thu, 25 Apr 2019 11:35:55 +0200

x86/paravirt: Remove bogus extern declarations

These functions are already declared in asm/paravirt.h

Signed-off-by: Thomas Gleixner 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Dave Hansen 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Juergen Gross 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Rik van Riel 
Link: http://lkml.kernel.org/r/20190424134223.501598...@linutronix.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/paravirt_patch_32.c | 3 ---
 arch/x86/kernel/paravirt_patch_64.c | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/arch/x86/kernel/paravirt_patch_32.c 
b/arch/x86/kernel/paravirt_patch_32.c
index de138d3912e4..05d771f81e74 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -23,9 +23,6 @@ DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
 unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 #define PATCH_SITE(ops, x) \
diff --git a/arch/x86/kernel/paravirt_patch_64.c 
b/arch/x86/kernel/paravirt_patch_64.c
index 9d9e04b31077..bd1558f90cfb 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -29,9 +29,6 @@ DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
 DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
 #endif
 
-extern bool pv_is_native_spin_unlock(void);
-extern bool pv_is_native_vcpu_is_preempted(void);
-
 unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len)
 {
 #define PATCH_SITE(ops, x) \


[tip:core/stacktrace] x86/stacktrace: Use common infrastructure

2019-04-29 Thread tip-bot for Thomas Gleixner
Commit-ID:  3599fe12a125fa7118da2bcc5033d7741fb5f3a1
Gitweb: https://git.kernel.org/tip/3599fe12a125fa7118da2bcc5033d7741fb5f3a1
Author: Thomas Gleixner 
AuthorDate: Thu, 25 Apr 2019 11:45:22 +0200
Committer:  Thomas Gleixner 
CommitDate: Mon, 29 Apr 2019 12:37:57 +0200

x86/stacktrace: Use common infrastructure

Replace the stack_trace_save*() functions with the new arch_stack_walk()
interfaces.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Josh Poimboeuf 
Cc: Andy Lutomirski 
Cc: linux-a...@vger.kernel.org
Cc: Steven Rostedt 
Cc: Alexander Potapenko 
Cc: Alexey Dobriyan 
Cc: Andrew Morton 
Cc: Christoph Lameter 
Cc: Pekka Enberg 
Cc: linux...@kvack.org
Cc: David Rientjes 
Cc: Catalin Marinas 
Cc: Dmitry Vyukov 
Cc: Andrey Ryabinin 
Cc: kasan-...@googlegroups.com
Cc: Mike Rapoport 
Cc: Akinobu Mita 
Cc: Christoph Hellwig 
Cc: io...@lists.linux-foundation.org
Cc: Robin Murphy 
Cc: Marek Szyprowski 
Cc: Johannes Thumshirn 
Cc: David Sterba 
Cc: Chris Mason 
Cc: Josef Bacik 
Cc: linux-bt...@vger.kernel.org
Cc: dm-de...@redhat.com
Cc: Mike Snitzer 
Cc: Alasdair Kergon 
Cc: Daniel Vetter 
Cc: intel-...@lists.freedesktop.org
Cc: Joonas Lahtinen 
Cc: Maarten Lankhorst 
Cc: dri-de...@lists.freedesktop.org
Cc: David Airlie 
Cc: Jani Nikula 
Cc: Rodrigo Vivi 
Cc: Tom Zanussi 
Cc: Miroslav Benes 
Link: https://lkml.kernel.org/r/20190425094803.816485...@linutronix.de

---
 arch/x86/Kconfig |   1 +
 arch/x86/kernel/stacktrace.c | 116 +++
 2 files changed, 20 insertions(+), 97 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19..b5978e35a8a8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -74,6 +74,7 @@ config X86
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+   select ARCH_STACKWALK
select ARCH_SUPPORTS_ACPI
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b2f706f1e0b7..2abf27d7df6b 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -12,75 +12,31 @@
 #include 
 #include 
 
-static int save_stack_address(struct stack_trace *trace, unsigned long addr,
- bool nosched)
-{
-   if (nosched && in_sched_functions(addr))
-   return 0;
-
-   if (trace->skip > 0) {
-   trace->skip--;
-   return 0;
-   }
-
-   if (trace->nr_entries >= trace->max_entries)
-   return -1;
-
-   trace->entries[trace->nr_entries++] = addr;
-   return 0;
-}
-
-static void noinline __save_stack_trace(struct stack_trace *trace,
-  struct task_struct *task, struct pt_regs *regs,
-  bool nosched)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+struct task_struct *task, struct pt_regs *regs)
 {
struct unwind_state state;
unsigned long addr;
 
-   if (regs)
-   save_stack_address(trace, regs->ip, nosched);
+   if (regs && !consume_entry(cookie, regs->ip, false))
+   return;
 
for (unwind_start(, task, regs, NULL); !unwind_done();
 unwind_next_frame()) {
addr = unwind_get_return_address();
-   if (!addr || save_stack_address(trace, addr, nosched))
+   if (!addr || !consume_entry(cookie, addr, false))
break;
}
 }
 
 /*
- * Save stack-backtrace addresses into a stack_trace buffer.
+ * This function returns an error if it detects any unreliable features of the
+ * stack.  Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
  */
-void save_stack_trace(struct stack_trace *trace)
-{
-   trace->skip++;
-   __save_stack_trace(trace, current, NULL, false);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
-   __save_stack_trace(trace, current, regs, false);
-}
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
-   if (!try_get_task_stack(tsk))
-   return;
-
-   if (tsk == current)
-   trace->skip++;
-   __save_stack_trace(trace, tsk, NULL, true);
-
-   put_task_stack(tsk);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
-
-static int __always_inline
-__save_stack_trace_reliable(struct stack_trace *trace,
-   struct task_struct *task)
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+void *cookie, struct task_struct *task)
 {
struct unwind_state state;
struct pt_regs *regs;
@@ -117,7 +73,7 @@ 

[tip:core/stacktrace] lib/stackdepot: Remove obsolete functions

2019-04-29 Thread tip-bot for Thomas Gleixner
Commit-ID:  56d8f079c51afc8b564b9fb0252d48e7b437c1e5
Gitweb: https://git.kernel.org/tip/56d8f079c51afc8b564b9fb0252d48e7b437c1e5
Author: Thomas Gleixner 
AuthorDate: Thu, 25 Apr 2019 11:45:20 +0200
Committer:  Thomas Gleixner 
CommitDate: Mon, 29 Apr 2019 12:37:57 +0200

lib/stackdepot: Remove obsolete functions

No more users of the struct stack_trace based interfaces.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Josh Poimboeuf 
Acked-by: Alexander Potapenko 
Cc: Andy Lutomirski 
Cc: Steven Rostedt 
Cc: Alexey Dobriyan 
Cc: Andrew Morton 
Cc: Christoph Lameter 
Cc: Pekka Enberg 
Cc: linux...@kvack.org
Cc: David Rientjes 
Cc: Catalin Marinas 
Cc: Dmitry Vyukov 
Cc: Andrey Ryabinin 
Cc: kasan-...@googlegroups.com
Cc: Mike Rapoport 
Cc: Akinobu Mita 
Cc: Christoph Hellwig 
Cc: io...@lists.linux-foundation.org
Cc: Robin Murphy 
Cc: Marek Szyprowski 
Cc: Johannes Thumshirn 
Cc: David Sterba 
Cc: Chris Mason 
Cc: Josef Bacik 
Cc: linux-bt...@vger.kernel.org
Cc: dm-de...@redhat.com
Cc: Mike Snitzer 
Cc: Alasdair Kergon 
Cc: Daniel Vetter 
Cc: intel-...@lists.freedesktop.org
Cc: Joonas Lahtinen 
Cc: Maarten Lankhorst 
Cc: dri-de...@lists.freedesktop.org
Cc: David Airlie 
Cc: Jani Nikula 
Cc: Rodrigo Vivi 
Cc: Tom Zanussi 
Cc: Miroslav Benes 
Cc: linux-a...@vger.kernel.org
Link: https://lkml.kernel.org/r/20190425094803.617937...@linutronix.de

---
 include/linux/stackdepot.h |  4 
 lib/stackdepot.c   | 20 
 2 files changed, 24 deletions(-)

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 4297c6d2991d..0805dee1b6b8 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -23,13 +23,9 @@
 
 typedef u32 depot_stack_handle_t;
 
-struct stack_trace;
-
-depot_stack_handle_t depot_save_stack(struct stack_trace *trace, gfp_t flags);
 depot_stack_handle_t stack_depot_save(unsigned long *entries,
  unsigned int nr_entries, gfp_t gfp_flags);
 
-void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace);
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
   unsigned long **entries);
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index e84f8e58495c..605c61f65d94 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -216,14 +216,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 }
 EXPORT_SYMBOL_GPL(stack_depot_fetch);
 
-void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
-{
-   unsigned int nent = stack_depot_fetch(handle, >entries);
-
-   trace->max_entries = trace->nr_entries = nent;
-}
-EXPORT_SYMBOL_GPL(depot_fetch_stack);
-
 /**
  * stack_depot_save - Save a stack trace from an array
  *
@@ -318,15 +310,3 @@ fast_exit:
return retval;
 }
 EXPORT_SYMBOL_GPL(stack_depot_save);
-
-/**
- * depot_save_stack - save stack in a stack depot.
- * @trace - the stacktrace to save.
- * @alloc_flags - flags for allocating additional memory if required.
- */
-depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
- gfp_t alloc_flags)
-{
-   return stack_depot_save(trace->entries, trace->nr_entries, alloc_flags);
-}
-EXPORT_SYMBOL_GPL(depot_save_stack);


[tip:core/stacktrace] stacktrace: Provide common infrastructure

2019-04-29 Thread tip-bot for Thomas Gleixner
Commit-ID:  214d8ca6ee854f696f75e75511fe66b409e656db
Gitweb: https://git.kernel.org/tip/214d8ca6ee854f696f75e75511fe66b409e656db
Author: Thomas Gleixner 
AuthorDate: Thu, 25 Apr 2019 11:45:21 +0200
Committer:  Thomas Gleixner 
CommitDate: Mon, 29 Apr 2019 12:37:57 +0200

stacktrace: Provide common infrastructure

All architectures which support stacktrace carry duplicated code and
do the stack storage and filtering at the architecture side.

Provide a consolidated interface with a callback function for consuming the
stack entries provided by the architecture specific stack walker. This
removes lots of duplicated code and allows to implement better filtering
than 'skip number of entries' in the future without touching any
architecture specific code.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Josh Poimboeuf 
Cc: Andy Lutomirski 
Cc: linux-a...@vger.kernel.org
Cc: Steven Rostedt 
Cc: Alexander Potapenko 
Cc: Alexey Dobriyan 
Cc: Andrew Morton 
Cc: Christoph Lameter 
Cc: Pekka Enberg 
Cc: linux...@kvack.org
Cc: David Rientjes 
Cc: Catalin Marinas 
Cc: Dmitry Vyukov 
Cc: Andrey Ryabinin 
Cc: kasan-...@googlegroups.com
Cc: Mike Rapoport 
Cc: Akinobu Mita 
Cc: Christoph Hellwig 
Cc: io...@lists.linux-foundation.org
Cc: Robin Murphy 
Cc: Marek Szyprowski 
Cc: Johannes Thumshirn 
Cc: David Sterba 
Cc: Chris Mason 
Cc: Josef Bacik 
Cc: linux-bt...@vger.kernel.org
Cc: dm-de...@redhat.com
Cc: Mike Snitzer 
Cc: Alasdair Kergon 
Cc: Daniel Vetter 
Cc: intel-...@lists.freedesktop.org
Cc: Joonas Lahtinen 
Cc: Maarten Lankhorst 
Cc: dri-de...@lists.freedesktop.org
Cc: David Airlie 
Cc: Jani Nikula 
Cc: Rodrigo Vivi 
Cc: Tom Zanussi 
Cc: Miroslav Benes 
Link: https://lkml.kernel.org/r/20190425094803.713568...@linutronix.de

---
 include/linux/stacktrace.h |  39 ++
 kernel/stacktrace.c| 173 +
 lib/Kconfig|   4 ++
 3 files changed, 216 insertions(+)

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 40decfbb9a24..f0cfd12cb45e 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -23,6 +23,44 @@ unsigned int stack_trace_save_regs(struct pt_regs *regs, 
unsigned long *store,
 unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
 
 /* Internal interfaces. Do not use in generic code */
+#ifdef CONFIG_ARCH_STACKWALK
+
+/**
+ * stack_trace_consume_fn - Callback for arch_stack_walk()
+ * @cookie:Caller supplied pointer handed back by arch_stack_walk()
+ * @addr:  The stack entry address to consume
+ * @reliable:  True when the stack entry is reliable. Required by
+ * some printk based consumers.
+ *
+ * Return: True, if the entry was consumed or skipped
+ * False, if there is no space left to store
+ */
+typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
+  bool reliable);
+/**
+ * arch_stack_walk - Architecture specific function to walk the stack
+ * @consume_entry: Callback which is invoked by the architecture code for
+ * each entry.
+ * @cookie:Caller supplied pointer which is handed back to
+ * @consume_entry
+ * @task:  Pointer to a task struct, can be NULL
+ * @regs:  Pointer to registers, can be NULL
+ *
+ *  === 
+ * taskregs
+ *  === 
+ * taskNULLStack trace from task (can be current)
+ * current regsStack trace starting on regs->stackpointer
+ *  === 
+ */
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+struct task_struct *task, struct pt_regs *regs);
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, void 
*cookie,
+struct task_struct *task);
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs);
+
+#else /* CONFIG_ARCH_STACKWALK */
 struct stack_trace {
unsigned int nr_entries, max_entries;
unsigned long *entries;
@@ -37,6 +75,7 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
 extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
 struct stack_trace *trace);
 extern void save_stack_trace_user(struct stack_trace *trace);
+#endif /* !CONFIG_ARCH_STACKWALK */
 #endif /* CONFIG_STACKTRACE */
 
 #if defined(CONFIG_STACKTRACE) && defined(CONFIG_HAVE_RELIABLE_STACKTRACE)
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index dd55312f3fe9..27bafc1e271e 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -5,6 +5,8 @@
  *
  *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar 
  */
+#include 
+#include 
 #include 
 #include 

  1   2   3   4   5   6   7   8   9   10   >