On some hardware with multiple clocksources, we have course grained
clocksources that support the CLOCK_SOURCE_SUSPEND_NONSTOP flag, but
which are less ideal for timekeeping then other clocksources which
halt in suspend.

Currently, the timekeeping core only supports timing suspend using
CLOCK_SOURCE_SUSPEND_NONSTOP clocksources if that clocksource is the
current clocksource for timekeeping.

As a result, some architectures try to implement read_persisitent_clock64()
using those non-stop clocksources, but isn't really ideal. Thus this
patch provides logic to allow a registered SUSPEND_NONSTOP clocksource,
which isn't the current clocksource, to be used to calculate the suspend
time.

Suggested-by: Thomas Gleixner <t...@linutronix.de>
Signed-off-by: Baolin Wang <baolin.w...@linaro.org>
---
 include/linux/clocksource.h |    3 +
 kernel/time/clocksource.c   |  152 +++++++++++++++++++++++++++++++++++++++++++
 kernel/time/timekeeping.c   |   22 ++++---
 3 files changed, 169 insertions(+), 8 deletions(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7dff196..3089189 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -194,6 +194,9 @@ static inline s64 clocksource_cyc2ns(u64 cycles, u32 mult, 
u32 shift)
 extern void clocksource_resume(void);
 extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
+extern void
+clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles);
+extern u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 now);
 
 extern u64
 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 
*max_cycles);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f89a78e..7778eaa 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -94,6 +94,8 @@
 /*[Clocksource internal variables]---------
  * curr_clocksource:
  *     currently selected clocksource.
+ * suspend_clocksource:
+ *     used to calculate the suspend time.
  * clocksource_list:
  *     linked list with the registered clocksources
  * clocksource_mutex:
@@ -102,10 +104,12 @@
  *     Name of the user-specified clocksource.
  */
 static struct clocksource *curr_clocksource;
+static struct clocksource *suspend_clocksource;
 static LIST_HEAD(clocksource_list);
 static DEFINE_MUTEX(clocksource_mutex);
 static char override_name[CS_NAME_LEN];
 static int finished_booting;
+static u64 suspend_start;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
@@ -447,6 +451,133 @@ static inline void clocksource_watchdog_unlock(unsigned 
long *flags) { }
 
 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 
+static bool clocksource_is_suspend(struct clocksource *cs)
+{
+       return cs == suspend_clocksource;
+}
+
+static void __clocksource_suspend_select(struct clocksource *cs)
+{
+       /*
+        * Skip the clocksource which will be stopped in suspend state.
+        */
+       if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
+               return;
+
+       /* Pick the best rating. */
+       if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
+               suspend_clocksource = cs;
+}
+
+/**
+ * clocksource_suspend_select - Select the best clocksource for suspend timing
+ * @fallback:  if select a fallback clocksource
+ */
+static void clocksource_suspend_select(bool fallback)
+{
+       struct clocksource *cs, *old_suspend;
+
+       old_suspend = suspend_clocksource;
+       if (fallback)
+               suspend_clocksource = NULL;
+
+       list_for_each_entry(cs, &clocksource_list, list) {
+               /* Skip current if we were requested for a fallback. */
+               if (fallback && cs == old_suspend)
+                       continue;
+
+               __clocksource_suspend_select(cs);
+       }
+
+       /* If we failed to find a fallback restore the old one. */
+       if (!suspend_clocksource)
+               suspend_clocksource = old_suspend;
+}
+
+/**
+ * clocksource_start_suspend_timing - Start measuring the suspend timing
+ * @cs:                        current clocksource from timekeeping
+ * @start_cycles:      current cycles from timekeeping
+ *
+ * This function will save the start cycle values of suspend timer to calculate
+ * the suspend time when resuming system.
+ *
+ * This function is called late in the suspend process from 
timekeeping_suspend(),
+ * that means processes are freezed, non-boot cpus and interrupts are disabled
+ * now. It is therefore possible to start the suspend timer without taking the
+ * clocksource mutex.
+ */
+void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
+{
+       if (!suspend_clocksource)
+               return;
+
+       /*
+        * If current clocksource is the suspend timer, we should use the
+        * tkr_mono.cycle_last value as suspend_start to avoid same reading
+        * from suspend timer.
+        */
+       if (clocksource_is_suspend(cs)) {
+               suspend_start = start_cycles;
+               return;
+       }
+
+       if (suspend_clocksource->enable &&
+           WARN_ON_ONCE(suspend_clocksource->enable(suspend_clocksource))) {
+               pr_warn_once("Failed to enable the non-suspend-able 
clocksource.\n");
+               return;
+       }
+
+       suspend_start = suspend_clocksource->read(suspend_clocksource);
+}
+
+/**
+ * clocksource_stop_suspend_timing - Stop measuring the suspend timing
+ * @cs:                current clocksource from timekeeping
+ * @cycle_now: current cycles from timekeeping
+ *
+ * This function will calculate the suspend time from suspend timer, and return
+ * nanoseconds since suspend started or 0 if no usable clocksource.
+ *
+ * This function is called early in the resume process from 
timekeeping_resume(),
+ * that means there is only one cpu, no processes are running and the 
interrupts
+ * are disabled. It is therefore possible to stop the suspend timer without
+ * taking the clocksource mutex.
+ */
+u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
+{
+       u64 now, delta, nsec = 0;
+
+       if (!suspend_clocksource)
+               return 0;
+
+       /*
+        * If current clocksource is the suspend timer, we should use the
+        * tkr_mono.cycle_last value from timekeeping as current cycle to
+        * avoid same reading from suspend timer.
+        */
+       if (clocksource_is_suspend(cs))
+               now = cycle_now;
+       else
+               now = suspend_clocksource->read(suspend_clocksource);
+
+       if (now > suspend_start) {
+               delta = clocksource_delta(now, suspend_start,
+                                         suspend_clocksource->mask);
+               nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
+                                      suspend_clocksource->shift);
+       }
+
+       /*
+        * Disable the suspend timer to save power if current clocksource is
+        * not the suspend timer.
+        */
+       if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
+               suspend_clocksource->disable(suspend_clocksource);
+
+       return nsec;
+}
+
 /**
  * clocksource_suspend - suspend the clocksource(s)
  */
@@ -779,6 +910,16 @@ int __clocksource_register_scale(struct clocksource *cs, 
u32 scale, u32 freq)
 {
        unsigned long flags;
 
+       /*
+        * The nonstop clocksource can be selected as the suspend clocksource to
+        * calculate the suspend time, so it should not supply suspend/resume
+        * interfaces to suspend the nonstop clocksource when system suspends.
+        */
+       if ((cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
+           (cs->suspend || cs->resume))
+               pr_warn("Nonstop clocksource %s should not supply 
suspend/resume interfaces\n",
+                       cs->name);
+
        /* Initialize mult/shift and max_idle_ns */
        __clocksource_update_freq_scale(cs, scale, freq);
 
@@ -792,6 +933,7 @@ int __clocksource_register_scale(struct clocksource *cs, 
u32 scale, u32 freq)
 
        clocksource_select();
        clocksource_select_watchdog(false);
+       __clocksource_suspend_select(cs);
        mutex_unlock(&clocksource_mutex);
        return 0;
 }
@@ -820,6 +962,7 @@ void clocksource_change_rating(struct clocksource *cs, int 
rating)
 
        clocksource_select();
        clocksource_select_watchdog(false);
+       clocksource_suspend_select(false);
        mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_change_rating);
@@ -838,6 +981,15 @@ static int clocksource_unbind(struct clocksource *cs)
                        return -EBUSY;
        }
 
+       if (clocksource_is_suspend(cs)) {
+               /*
+                * Select and try to install a replacement suspend clocksource.
+                */
+               clocksource_suspend_select(true);
+               if (clocksource_is_suspend(cs))
+                       return -EBUSY;
+       }
+
        if (cs == curr_clocksource) {
                /* Select and try to install a replacement clock source */
                clocksource_select_fallback();
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4786df9..d80dba3 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1669,7 +1669,7 @@ void timekeeping_resume(void)
        struct clocksource *clock = tk->tkr_mono.clock;
        unsigned long flags;
        struct timespec64 ts_new, ts_delta;
-       u64 cycle_now;
+       u64 cycle_now, nsec;
 
        sleeptime_injected = false;
        read_persistent_clock64(&ts_new);
@@ -1693,13 +1693,8 @@ void timekeeping_resume(void)
         * usable source. The rtc part is handled separately in rtc core code.
         */
        cycle_now = tk_clock_read(&tk->tkr_mono);
-       if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-               cycle_now > tk->tkr_mono.cycle_last) {
-               u64 nsec, cyc_delta;
-
-               cyc_delta = clocksource_delta(cycle_now, 
tk->tkr_mono.cycle_last,
-                                             tk->tkr_mono.mask);
-               nsec = mul_u64_u32_shr(cyc_delta, clock->mult, clock->shift);
+       nsec = clocksource_stop_suspend_timing(clock, cycle_now);
+       if (nsec > 0) {
                ts_delta = ns_to_timespec64(nsec);
                sleeptime_injected = true;
        } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
@@ -1732,6 +1727,8 @@ int timekeeping_suspend(void)
        unsigned long flags;
        struct timespec64               delta, delta_delta;
        static struct timespec64        old_delta;
+       struct clocksource *curr_clock;
+       u64 cycle_now;
 
        read_persistent_clock64(&timekeeping_suspend_time);
 
@@ -1748,6 +1745,15 @@ int timekeeping_suspend(void)
        timekeeping_forward_now(tk);
        timekeeping_suspended = 1;
 
+       /*
+        * Since we've called forward_now, cycle_last stores the value
+        * just read from the current clocksource. Save this to potentially
+        * use in suspend timing.
+        */
+       curr_clock = tk->tkr_mono.clock;
+       cycle_now = tk->tkr_mono.cycle_last;
+       clocksource_start_suspend_timing(curr_clock, cycle_now);
+
        if (persistent_clock_exists) {
                /*
                 * To avoid drift caused by repeated suspend/resumes,
-- 
1.7.9.5

Reply via email to