Is there a reason for "step = leap"?
On Wed, Apr 15, 2015 at 11:41 AM, Miroslav Lichvar <mlich...@redhat.com> wrote:
> On systems with 32-bit time_t, it seems there are quite a few problems
> that applications may have with time overflowing in year 2038. Beside
> getting in an unexpected state by not checking integer operations with
> time_t variables, some system calls have unexpected behavior, e.g. the
> system time can't be set back to the current time (negative value),
> timers with the ABSTIME flag can't be set (negative value) or they
> expire immediately (current time is always larger).
>
> It would be unrealistic to expect all applications to be able to handle
> all these problems. Year 2038 is still many years away, but this can be
> a problem even now. The clock can be set close to the overflow
> accidentally or maliciously, e.g. when the clock is synchronized over
> network by NTP or PTP.
>
> This patch sets a maximum value of the system time to prevent the system
> time from getting too close to the overflow. The time can't be set to a
> larger value. When the maximum is reached in normal time accumulation,
> the clock will be stepped back by one week.
>
> A new kernel sysctl time_max is added to select the maximum time. It can
> be set to 0 for no limit, 1 for one week before 32-bit time_t overflow,
> and 2 for one week before ktime_t overflow. The default value is 1 with
> 32-bit time_t and 2 with 64-bit time_t. This can be changed later to be
> always 2 when 64-bit versions of system calls using 32-bit time_t are
> available.
>
> Signed-off-by: Miroslav Lichvar <mlich...@redhat.com>
> ---
>  Documentation/sysctl/kernel.txt | 19 +++++++++++++
>  include/linux/timekeeping.h     |  5 ++++
>  include/uapi/linux/sysctl.h     |  1 +
>  kernel/sysctl.c                 |  9 ++++++
>  kernel/sysctl_binary.c          |  1 +
>  kernel/time/timekeeping.c       | 62 
> +++++++++++++++++++++++++++++++++++++----
>  6 files changed, 91 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
> index 83ab256..08e4f28 100644
> --- a/Documentation/sysctl/kernel.txt
> +++ b/Documentation/sysctl/kernel.txt
> @@ -82,6 +82,7 @@ show up in /proc/sys/kernel:
>  - sysctl_writes_strict
>  - tainted
>  - threads-max
> +- time_max
>  - unknown_nmi_panic
>  - watchdog_thresh
>  - version
> @@ -847,6 +848,24 @@ can be ORed together:
>
>  ==============================================================
>
> +time_max:
> +
> +Select the maximum allowed value of the system time. The system clock cannot 
> be
> +set to a larger value and when it reaches the maximum on its own, it will be
> +stepped back by one week.
> +
> +0: No limit.
> +
> +1: One week before 32-bit time_t overflows, i.e. Jan 12 03:14:07 UTC 2038.
> +   This is currently the default value with 32-bit time_t, but it will likely
> +   change when 64-bit versions of system calls using time_t are available.
> +
> +2: One week before time in the internal kernel representation (ktime_t)
> +   overflows, i.e. Apr 4 23:47:16 UTC 2262. This is the default value with
> +   64-bit time_t.
> +
> +==============================================================
> +
>  unknown_nmi_panic:
>
>  The value in this file affects behavior of handling NMI. When the
> diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
> index 99176af..51fc970 100644
> --- a/include/linux/timekeeping.h
> +++ b/include/linux/timekeeping.h
> @@ -5,6 +5,11 @@
>
>  void timekeeping_init(void);
>  extern int timekeeping_suspended;
> +extern int sysctl_time_max;
> +
> +struct ctl_table;
> +extern int sysctl_time_max_handler(struct ctl_table *table, int write,
> +               void __user *buffer, size_t *length, loff_t *ppos);
>
>  /*
>   * Get and set timeofday
> diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
> index 0956373..8fd2aab 100644
> --- a/include/uapi/linux/sysctl.h
> +++ b/include/uapi/linux/sysctl.h
> @@ -154,6 +154,7 @@ enum
>         KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
>         KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered 
> */
>         KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */
> +       KERN_TIMEMAX=78,        /* int: select maximum allowed system time */
>  };
>
>
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index ce410bb..6713a5b 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1120,6 +1120,15 @@ static struct ctl_table kern_table[] = {
>                 .extra1         = &zero,
>                 .extra2         = &one,
>         },
> +       {
> +               .procname       = "time_max",
> +               .data           = &sysctl_time_max,
> +               .maxlen         = sizeof(int),
> +               .mode           = 0644,
> +               .proc_handler   = sysctl_time_max_handler,
> +               .extra1         = &zero,
> +               .extra2         = &two,
> +       },
>         { }
>  };
>
> diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
> index 7e7746a..66c0946 100644
> --- a/kernel/sysctl_binary.c
> +++ b/kernel/sysctl_binary.c
> @@ -138,6 +138,7 @@ static const struct bin_table bin_kern_table[] = {
>         { CTL_INT,      KERN_MAX_LOCK_DEPTH,            "max_lock_depth" },
>         { CTL_INT,      KERN_PANIC_ON_NMI,              
> "panic_on_unrecovered_nmi" },
>         { CTL_INT,      KERN_PANIC_ON_WARN,             "panic_on_warn" },
> +       { CTL_INT,      KERN_TIMEMAX,                   "time_max" },
>         {}
>  };
>
> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
> index 946acb7..112932f 100644
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -32,6 +32,9 @@
>  #define TK_MIRROR              (1 << 1)
>  #define TK_CLOCK_WAS_SET       (1 << 2)
>
> +#define SEC_PER_DAY            (24 * 3600)
> +#define SEC_PER_WEEK           (7 * SEC_PER_DAY)
> +
>  /*
>   * The most important data for readout fits into a single 64 byte
>   * cache line.
> @@ -884,6 +887,37 @@ EXPORT_SYMBOL(getnstime_raw_and_real);
>
>  #endif /* CONFIG_NTP_PPS */
>
> +/* Maximum allowed system time as a sysctl setting and in seconds */
> +int sysctl_time_max __read_mostly;
> +static u64 time_max_sec __read_mostly;
> +
> +static void update_time_max_sec(int tm)
> +{
> +       if (tm > 1) {
> +               /* One week before ktime_t overflow */
> +               time_max_sec = KTIME_SEC_MAX - SEC_PER_WEEK;
> +       } else if (tm == 1) {
> +               /* One week before 32-bit time_t overflow */
> +               time_max_sec = 0x7fffffff - SEC_PER_WEEK;
> +       } else {
> +               /* No limit */
> +               time_max_sec = -1;
> +       }
> +}
> +
> +int sysctl_time_max_handler(struct ctl_table *table, int write,
> +               void __user *buffer, size_t *length, loff_t *ppos)
> +{
> +       int rc;
> +
> +       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
> +       if (rc)
> +               return rc;
> +
> +       update_time_max_sec(sysctl_time_max);
> +       return 0;
> +}
> +
>  /**
>   * do_gettimeofday - Returns the time of day in a timeval
>   * @tv:                pointer to the timeval to be set
> @@ -912,7 +946,7 @@ int do_settimeofday64(const struct timespec64 *ts)
>         struct timespec64 ts_delta, xt;
>         unsigned long flags;
>
> -       if (!timespec64_valid_strict(ts))
> +       if (!timespec64_valid_strict(ts) || ts->tv_sec >= time_max_sec)
>                 return -EINVAL;
>
>         raw_spin_lock_irqsave(&timekeeper_lock, flags);
> @@ -965,7 +999,7 @@ int timekeeping_inject_offset(struct timespec *ts)
>
>         /* Make sure the proposed value is valid */
>         tmp = timespec64_add(tk_xtime(tk),  ts64);
> -       if (!timespec64_valid_strict(&tmp)) {
> +       if (!timespec64_valid_strict(&tmp) || tmp.tv_sec >= time_max_sec) {
>                 ret = -EINVAL;
>                 goto error;
>         }
> @@ -1238,6 +1272,10 @@ void __init timekeeping_init(void)
>         write_seqcount_begin(&tk_core.seq);
>         ntp_init();
>
> +       /* For now, prevent 32-bit time_t overflow by default */
> +       sysctl_time_max = sizeof(time_t) > 4 ? 2 : 1;
> +       update_time_max_sec(sysctl_time_max);
> +
>         clock = clocksource_default_clock();
>         if (clock->enable)
>                 clock->enable(clock);
> @@ -1687,23 +1725,35 @@ static inline unsigned int 
> accumulate_nsecs_to_secs(struct timekeeper *tk)
>
>         while (tk->tkr_mono.xtime_nsec >= nsecps) {
>                 int leap;
> +               s64 step;
>
>                 tk->tkr_mono.xtime_nsec -= nsecps;
>                 tk->xtime_sec++;
>
>                 /* Figure out if its a leap sec and apply if needed */
>                 leap = second_overflow(tk->xtime_sec);
> -               if (unlikely(leap)) {
> +               step = leap;
> +
> +               /* If the system time reached the maximum, step it back */
> +               if (unlikely(tk->xtime_sec >= time_max_sec)) {
> +                       step = time_max_sec - tk->xtime_sec - SEC_PER_WEEK;
> +                       printk(KERN_NOTICE
> +                               "Clock: maximum time reached, stepping 
> back\n");
> +               }
> +
> +               if (unlikely(step)) {
>                         struct timespec64 ts;
>
> -                       tk->xtime_sec += leap;
> +                       tk->xtime_sec += step;
>
> -                       ts.tv_sec = leap;
> +                       ts.tv_sec = step;
>                         ts.tv_nsec = 0;
>                         tk_set_wall_to_mono(tk,
>                                 timespec64_sub(tk->wall_to_monotonic, ts));
>
> -                       __timekeeping_set_tai_offset(tk, tk->tai_offset - 
> leap);
> +                       if (leap)
> +                               __timekeeping_set_tai_offset(tk,
> +                                               tk->tai_offset - leap);
>
>                         clock_set = TK_CLOCK_WAS_SET;
>                 }
> --
> 2.1.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to