Hi Thomas,

On 22/08/2019 13:52, Chris Clayton wrote:
> Thanks Thomas.
> 
> On 22/08/2019 12:00, Thomas Gleixner wrote:
>> The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the
>> nanoseconds based boot time offset left by the clocksource shift. That
>> overflows once the boot time offset becomes large enough. As a consequence
>> CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to
>> misbehave.
>>
>> Fix it by storing a timespec64 representation of the offset when boot time
>> is adjusted and add that to the MONOTONIC base time value in the vdso data
>> page. Using the timespec64 representation avoids a 64bit division in the
>> update code.
>>
> 
> I've tested resume from both suspend and hibernate and this patch fixes the 
> problem I reported.
> 
> Tested-by: Chris Clayton <chris2...@googlemail.com>
> 

I can confirm what reported by Chris. Please see below the scissors.

With this:

Tested-by: Vincenzo Frascino <vincenzo.frasc...@arm.com>

--->8---

Clock test start
clk_id: CLOCK_BOOTTIME
clock_getres: 0 1
clock_gettime:2697 489679147
2019-08-22 16:21:57.911
Clock test end

<...Suspend/Resume...>

Clock test start
clk_id: CLOCK_BOOTTIME
clock_getres: 0 1
clock_gettime:4489 684341925
2019-08-22 16:51:50.106
Clock test end


>> Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() 
>> implementation")
>> Reported-by: Chris Clayton <chris2...@googlemail.com>
>> Signed-off-by: Thomas Gleixner <t...@linutronix.de>
>> ---
>>  include/linux/timekeeper_internal.h |    5 +++++
>>  kernel/time/timekeeping.c           |    5 +++++
>>  kernel/time/vsyscall.c              |   22 +++++++++++++---------
>>  3 files changed, 23 insertions(+), 9 deletions(-)
>>
>> --- a/include/linux/timekeeper_internal.h
>> +++ b/include/linux/timekeeper_internal.h
>> @@ -57,6 +57,7 @@ struct tk_read_base {
>>   * @cs_was_changed_seq:     The sequence number of clocksource change events
>>   * @next_leap_ktime:        CLOCK_MONOTONIC time value of a pending 
>> leap-second
>>   * @raw_sec:                CLOCK_MONOTONIC_RAW  time in seconds
>> + * @monotonic_to_boot:      CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
>>   * @cycle_interval: Number of clock cycles in one NTP interval
>>   * @xtime_interval: Number of clock shifted nano seconds in one NTP
>>   *                  interval.
>> @@ -84,6 +85,9 @@ struct tk_read_base {
>>   *
>>   * wall_to_monotonic is no longer the boot time, getboottime must be
>>   * used instead.
>> + *
>> + * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
>> + * accelerate the VDSO update for CLOCK_BOOTTIME.
>>   */
>>  struct timekeeper {
>>      struct tk_read_base     tkr_mono;
>> @@ -99,6 +103,7 @@ struct timekeeper {
>>      u8                      cs_was_changed_seq;
>>      ktime_t                 next_leap_ktime;
>>      u64                     raw_sec;
>> +    struct timespec64       monotonic_to_boot;
>>  
>>      /* The following members are for timekeeping internal use */
>>      u64                     cycle_interval;
>> --- a/kernel/time/timekeeping.c
>> +++ b/kernel/time/timekeeping.c
>> @@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct t
>>  static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t 
>> delta)
>>  {
>>      tk->offs_boot = ktime_add(tk->offs_boot, delta);
>> +    /*
>> +     * Timespec representation for VDSO update to avoid 64bit division
>> +     * on every update.
>> +     */
>> +    tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
>>  }
>>  
>>  /*
>> --- a/kernel/time/vsyscall.c
>> +++ b/kernel/time/vsyscall.c
>> @@ -17,7 +17,7 @@ static inline void update_vdso_data(stru
>>                                  struct timekeeper *tk)
>>  {
>>      struct vdso_timestamp *vdso_ts;
>> -    u64 nsec;
>> +    u64 nsec, sec;
>>  
>>      vdata[CS_HRES_COARSE].cycle_last        = tk->tkr_mono.cycle_last;
>>      vdata[CS_HRES_COARSE].mask              = tk->tkr_mono.mask;
>> @@ -45,23 +45,27 @@ static inline void update_vdso_data(stru
>>      }
>>      vdso_ts->nsec   = nsec;
>>  
>> -    /* CLOCK_MONOTONIC_RAW */
>> -    vdso_ts         = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
>> -    vdso_ts->sec    = tk->raw_sec;
>> -    vdso_ts->nsec   = tk->tkr_raw.xtime_nsec;
>> +    /* Copy MONOTONIC time for BOOTTIME */
>> +    sec     = vdso_ts->sec;
>> +    /* Add the boot offset */
>> +    sec     += tk->monotonic_to_boot.tv_sec;
>> +    nsec    += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
>>  
>>      /* CLOCK_BOOTTIME */
>>      vdso_ts         = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
>> -    vdso_ts->sec    = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
>> -    nsec = tk->tkr_mono.xtime_nsec;
>> -    nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
>> -                   ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
>> +    vdso_ts->sec    = sec;
>> +
>>      while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
>>              nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
>>              vdso_ts->sec++;
>>      }
>>      vdso_ts->nsec   = nsec;
>>  
>> +    /* CLOCK_MONOTONIC_RAW */
>> +    vdso_ts         = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
>> +    vdso_ts->sec    = tk->raw_sec;
>> +    vdso_ts->nsec   = tk->tkr_raw.xtime_nsec;
>> +
>>      /* CLOCK_TAI */
>>      vdso_ts         = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
>>      vdso_ts->sec    = tk->xtime_sec + (s64)tk->tai_offset;
>>

-- 
Regards,
Vincenzo

Reply via email to