[patch 03/46] [RFC] HZ free ntp
>From [EMAIL PROTECTED] Wed Dec 20 02:36:15 2006 Distangle the NTP update from HZ. This is necessary for dynamic tick enabled kernels. --- include/linux/timex.h |7 +++ kernel/hrtimer.c | 11 --- kernel/time/ntp.c | 30 +++--- kernel/timer.c|4 ++-- 4 files changed, 36 insertions(+), 16 deletions(-) Index: linux-2.6.20-rc4-mm1-bo/include/linux/timex.h === --- linux-2.6.20-rc4-mm1-bo.orig/include/linux/timex.h +++ linux-2.6.20-rc4-mm1-bo/include/linux/timex.h @@ -286,6 +286,13 @@ static inline void time_interpolator_upd #define TICK_LENGTH_SHIFT 32 +#ifdef CONFIG_NO_HZ +#define NTP_INTERVAL_FREQ (2) +#else +#define NTP_INTERVAL_FREQ (HZ) +#endif +#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) + /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ extern u64 current_tick_length(void); Index: linux-2.6.20-rc4-mm1-bo/kernel/time/ntp.c === --- linux-2.6.20-rc4-mm1-bo.orig/kernel/time/ntp.c +++ linux-2.6.20-rc4-mm1-bo/kernel/time/ntp.c @@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base #define MAX_TICKADJ500 /* microsecs */ #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ - TICK_LENGTH_SHIFT) / HZ) + TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) /* * phase-lock loop variables @@ -46,13 +46,17 @@ long time_adjust; static void ntp_update_frequency(void) { - tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; - tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; - tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); + u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) + << TICK_LENGTH_SHIFT; + second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; + second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); - do_div(tick_length_base, HZ); + tick_length_base = second_length; - tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; + do_div(second_length, HZ); + tick_nsec = second_length >> TICK_LENGTH_SHIFT; + + do_div(tick_length_base, NTP_INTERVAL_FREQ); } /** @@ -162,7 +166,7 @@ void second_overflow(void) tick_length -= MAX_TICKADJ_SCALED; } else { tick_length += (s64)(time_adjust * NSEC_PER_USEC / -HZ) << TICK_LENGTH_SHIFT; + NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT; time_adjust = 0; } } @@ -239,7 +243,8 @@ int do_adjtimex(struct timex *txc) result = -EINVAL; goto leave; } - time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - SHIFT_NSEC); + time_freq = ((s64)txc->freq * NSEC_PER_USEC) + >> (SHIFT_USEC - SHIFT_NSEC); } if (txc->modes & ADJ_MAXERROR) { @@ -309,7 +314,8 @@ int do_adjtimex(struct timex *txc) freq_adj += time_freq; freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); - time_offset = (time_offset / HZ) << SHIFT_UPDATE; + time_offset = (time_offset / NTP_INTERVAL_FREQ) + << SHIFT_UPDATE; } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ if (txc->modes & ADJ_TICK) @@ -324,8 +330,10 @@ leave: if ((time_status & (STA_UNSYNC|ST if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) txc->offset= save_adjust; else - txc->offset= shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; - txc->freq = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - SHIFT_NSEC); + txc->offset= shift_right(time_offset, SHIFT_UPDATE) + * NTP_INTERVAL_FREQ / 1000; + txc->freq = (time_freq / NSEC_PER_USEC) + << (SHIFT_USEC - SHIFT_NSEC); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status= time_status; Index: linux-2.6.20-rc4-mm1-bo/kernel/timer.c === --- linux-2.6.20-rc4-mm1-bo.orig/kernel/timer.c +++ linux-2.6.20-rc4-mm1-bo/kernel/timer.c @@ -890,7 +890,7 @@ void __init timekeeping_init(void) ntp_clear(); clock = clocksource_get_next(); - clocksource_calculate_interval(clock, tick_nsec); +
[patch 03/46] [RFC] HZ free ntp
From [EMAIL PROTECTED] Wed Dec 20 02:36:15 2006 Distangle the NTP update from HZ. This is necessary for dynamic tick enabled kernels. --- include/linux/timex.h |7 +++ kernel/hrtimer.c | 11 --- kernel/time/ntp.c | 30 +++--- kernel/timer.c|4 ++-- 4 files changed, 36 insertions(+), 16 deletions(-) Index: linux-2.6.20-rc4-mm1-bo/include/linux/timex.h === --- linux-2.6.20-rc4-mm1-bo.orig/include/linux/timex.h +++ linux-2.6.20-rc4-mm1-bo/include/linux/timex.h @@ -286,6 +286,13 @@ static inline void time_interpolator_upd #define TICK_LENGTH_SHIFT 32 +#ifdef CONFIG_NO_HZ +#define NTP_INTERVAL_FREQ (2) +#else +#define NTP_INTERVAL_FREQ (HZ) +#endif +#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) + /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ extern u64 current_tick_length(void); Index: linux-2.6.20-rc4-mm1-bo/kernel/time/ntp.c === --- linux-2.6.20-rc4-mm1-bo.orig/kernel/time/ntp.c +++ linux-2.6.20-rc4-mm1-bo/kernel/time/ntp.c @@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base #define MAX_TICKADJ500 /* microsecs */ #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) \ - TICK_LENGTH_SHIFT) / HZ) + TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) /* * phase-lock loop variables @@ -46,13 +46,17 @@ long time_adjust; static void ntp_update_frequency(void) { - tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) TICK_LENGTH_SHIFT; - tick_length_base += (s64)CLOCK_TICK_ADJUST TICK_LENGTH_SHIFT; - tick_length_base += (s64)time_freq (TICK_LENGTH_SHIFT - SHIFT_NSEC); + u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) +TICK_LENGTH_SHIFT; + second_length += (s64)CLOCK_TICK_ADJUST TICK_LENGTH_SHIFT; + second_length += (s64)time_freq (TICK_LENGTH_SHIFT - SHIFT_NSEC); - do_div(tick_length_base, HZ); + tick_length_base = second_length; - tick_nsec = tick_length_base TICK_LENGTH_SHIFT; + do_div(second_length, HZ); + tick_nsec = second_length TICK_LENGTH_SHIFT; + + do_div(tick_length_base, NTP_INTERVAL_FREQ); } /** @@ -162,7 +166,7 @@ void second_overflow(void) tick_length -= MAX_TICKADJ_SCALED; } else { tick_length += (s64)(time_adjust * NSEC_PER_USEC / -HZ) TICK_LENGTH_SHIFT; + NTP_INTERVAL_FREQ) TICK_LENGTH_SHIFT; time_adjust = 0; } } @@ -239,7 +243,8 @@ int do_adjtimex(struct timex *txc) result = -EINVAL; goto leave; } - time_freq = ((s64)txc-freq * NSEC_PER_USEC) (SHIFT_USEC - SHIFT_NSEC); + time_freq = ((s64)txc-freq * NSEC_PER_USEC) +(SHIFT_USEC - SHIFT_NSEC); } if (txc-modes ADJ_MAXERROR) { @@ -309,7 +314,8 @@ int do_adjtimex(struct timex *txc) freq_adj += time_freq; freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); - time_offset = (time_offset / HZ) SHIFT_UPDATE; + time_offset = (time_offset / NTP_INTERVAL_FREQ) +SHIFT_UPDATE; } /* STA_PLL */ } /* txc-modes ADJ_OFFSET */ if (txc-modes ADJ_TICK) @@ -324,8 +330,10 @@ leave: if ((time_status (STA_UNSYNC|ST if ((txc-modes ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) txc-offset= save_adjust; else - txc-offset= shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; - txc-freq = (time_freq / NSEC_PER_USEC) (SHIFT_USEC - SHIFT_NSEC); + txc-offset= shift_right(time_offset, SHIFT_UPDATE) + * NTP_INTERVAL_FREQ / 1000; + txc-freq = (time_freq / NSEC_PER_USEC) +(SHIFT_USEC - SHIFT_NSEC); txc-maxerror = time_maxerror; txc-esterror = time_esterror; txc-status= time_status; Index: linux-2.6.20-rc4-mm1-bo/kernel/timer.c === --- linux-2.6.20-rc4-mm1-bo.orig/kernel/timer.c +++ linux-2.6.20-rc4-mm1-bo/kernel/timer.c @@ -890,7 +890,7 @@ void __init timekeeping_init(void) ntp_clear(); clock = clocksource_get_next(); - clocksource_calculate_interval(clock, tick_nsec); + clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
Re: [RFC] HZ free ntp
Hi, On Tuesday 02 January 2007 20:42, john stultz wrote: > > tick_nsec doesn't require special treatment, in the middle term it's > > obsolete anyway, it could be replaced with (current_tick_length() >> > > TICK_LENGTH_SHIFT) and current_tick_length() being inlined. > > If NTP_INTERVAL_FREQ is different then HZ, then tick_nsec still has a > different meaning then (current_tick_length() >> TICK_LENGTH_SHIFT). > So since tick_nsec is still used in quite a few places, so I'm hesitant > to pull it. The current usage under arch is pretty much bogus and they likely can't use dyntick anyway, so it would be easier to disable tick_nsec completely if dyntick is enabled. > > NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at > > runtime), it's already gone from all important paths. > > Wait, so you're suggesting NTP_INTERVAL_FREQ be a dynamic variable > instead of a constant? Curious, could you give a bit more detail on why? We already have more than enough config options, where the user has barely any idea what to do, so we should try to configure and initialize as much as possible at runtime depending on what the hardware is capable of. That reminds me that the main problem left for a dynamic variable is time_offset. It should become a 64 bit value, so SHIFT_UPDATE isn't needed anymore. Right now it depends on HZ to maximize the value range. > > In the short term I'd prefered a clock would store its frequency instead > > of using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it > > doesn't has to be derived there. > > I don't follow this at all. clocksources do store their own frequency > (via mult/shift). Could you explain? mult is not a constant and calculating the frequency like this is not very precise. bye, Roman - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Tuesday 02 January 2007 21:50, john stultz wrote: > > > It should be called every NTP_INTERVAL_FREQ times, but occasionally > > > it's off > > Wait, so second_overflow should be called every NTP_INTERVAL_FREQ times > (instead of every second)? Surely that's not right. But it is, that's the reason the various adjustment values are divided by it, so they are applied to the next NTP_INTERVAL_FREQ times. BTW I think NTP_INTERVAL_FREQ isn't the right name, CLOCK_UPDATE_FREQ would be a better name, currently ntp is the main user, but a clock can also be updated via other means (e.g. adjtimex or another clock). > > > So in this case the loop in update_wall_time() should rather look like > > > this: > > > > > > while (offset >= clock->cycle_interval) { > > > ... > > > second_overflow(); > > > while (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { > > > clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; > > > xtime.tv_sec++; > > > } > > > ... > > > } > > > > > > (Also note the change from "if" to "while".) > > This would assume that clock->cycle_interval would *always* be the > length of a full second and that isn't what the patch trying to do. > > Maybe could you explain this some more? As I said this was the case for a value of one. Anyway, to avoid these problems, I'd prefer to keep it at least at 2 or better at 4. This would still drastically reduce the time spent in the loop and we can revisit the issue later. bye, Roman - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Tuesday 02 January 2007 20:42, john stultz wrote: tick_nsec doesn't require special treatment, in the middle term it's obsolete anyway, it could be replaced with (current_tick_length() TICK_LENGTH_SHIFT) and current_tick_length() being inlined. If NTP_INTERVAL_FREQ is different then HZ, then tick_nsec still has a different meaning then (current_tick_length() TICK_LENGTH_SHIFT). So since tick_nsec is still used in quite a few places, so I'm hesitant to pull it. The current usage under arch is pretty much bogus and they likely can't use dyntick anyway, so it would be easier to disable tick_nsec completely if dyntick is enabled. NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at runtime), it's already gone from all important paths. Wait, so you're suggesting NTP_INTERVAL_FREQ be a dynamic variable instead of a constant? Curious, could you give a bit more detail on why? We already have more than enough config options, where the user has barely any idea what to do, so we should try to configure and initialize as much as possible at runtime depending on what the hardware is capable of. That reminds me that the main problem left for a dynamic variable is time_offset. It should become a 64 bit value, so SHIFT_UPDATE isn't needed anymore. Right now it depends on HZ to maximize the value range. In the short term I'd prefered a clock would store its frequency instead of using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't has to be derived there. I don't follow this at all. clocksources do store their own frequency (via mult/shift). Could you explain? mult is not a constant and calculating the frequency like this is not very precise. bye, Roman - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Tuesday 02 January 2007 21:50, john stultz wrote: It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off Wait, so second_overflow should be called every NTP_INTERVAL_FREQ times (instead of every second)? Surely that's not right. But it is, that's the reason the various adjustment values are divided by it, so they are applied to the next NTP_INTERVAL_FREQ times. BTW I think NTP_INTERVAL_FREQ isn't the right name, CLOCK_UPDATE_FREQ would be a better name, currently ntp is the main user, but a clock can also be updated via other means (e.g. adjtimex or another clock). So in this case the loop in update_wall_time() should rather look like this: while (offset = clock-cycle_interval) { ... second_overflow(); while (clock-xtime_nsec = (u64)NSEC_PER_SEC clock-shift) { clock-xtime_nsec -= (u64)NSEC_PER_SEC clock-shift; xtime.tv_sec++; } ... } (Also note the change from if to while.) This would assume that clock-cycle_interval would *always* be the length of a full second and that isn't what the patch trying to do. Maybe could you explain this some more? As I said this was the case for a value of one. Anyway, to avoid these problems, I'd prefer to keep it at least at 2 or better at 4. This would still drastically reduce the time spent in the loop and we can revisit the issue later. bye, Roman - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Tue, 2007-01-02 at 11:46 -0800, john stultz wrote: > On Mon, 2007-01-01 at 19:29 +0100, Roman Zippel wrote: > > On Wednesday 20 December 2006 02:54, john stultz wrote: > > > > > And here would be the follow on patch (again *untested*) for > > > CONFIG_NO_HZ slowing the time accumulation down to once per second. > > > > Changing it to one creates a potential problem with calling > > second_overflow(). Wait, at first I thought I understood this, but looking closer, I'm not so sure I do. > > It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off Wait, so second_overflow should be called every NTP_INTERVAL_FREQ times (instead of every second)? Surely that's not right. > > by one (when xtime is close to a full second and the tick length is > > different > > from 1sec). At a higher frequency that's not much of a problem, but at one > > it > > means second_overflow() is occasionally called twice a second or skipped for > > a second. Usually the error should be quite small, but sometimes it can be > > significant. > > So in this case the loop in update_wall_time() should rather look like this: > > > > while (offset >= clock->cycle_interval) { > > ... > > second_overflow(); > > while (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { > > clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; > > xtime.tv_sec++; > > } > > ... > > } > > > > (Also note the change from "if" to "while".) This would assume that clock->cycle_interval would *always* be the length of a full second and that isn't what the patch trying to do. Maybe could you explain this some more? thanks -john - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Mon, 2007-01-01 at 19:29 +0100, Roman Zippel wrote: > On Wednesday 20 December 2006 02:54, john stultz wrote: > > > And here would be the follow on patch (again *untested*) for > > CONFIG_NO_HZ slowing the time accumulation down to once per second. > > Changing it to one creates a potential problem with calling second_overflow(). > It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off > by one (when xtime is close to a full second and the tick length is different > from 1sec). At a higher frequency that's not much of a problem, but at one it > means second_overflow() is occasionally called twice a second or skipped for > a second. Usually the error should be quite small, but sometimes it can be > significant. > So in this case the loop in update_wall_time() should rather look like this: > > while (offset >= clock->cycle_interval) { > ... > second_overflow(); > while (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { > clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; > xtime.tv_sec++; > } > ... > } > > (Also note the change from "if" to "while".) Ah, good catch! Thank you, I'll add that, retest and send it to akpm. thanks -john - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Mon, 2007-01-01 at 17:27 +0100, Roman Zippel wrote: > On Wednesday 20 December 2006 02:32, john stultz wrote: > > > > I know and all you have to change in the ntp and some related code is to > > > replace HZ there with a variable, thus make it changable, so you can > > > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). > > > > Untested patch below. Does this vibe better with you are suggesting? > > Yes, thanks. > tick_nsec doesn't require special treatment, in the middle term it's obsolete > anyway, it could be replaced with (current_tick_length() >> > TICK_LENGTH_SHIFT) and current_tick_length() being inlined. If NTP_INTERVAL_FREQ is different then HZ, then tick_nsec still has a different meaning then (current_tick_length() >> TICK_LENGTH_SHIFT). So since tick_nsec is still used in quite a few places, so I'm hesitant to pull it. > NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at > runtime), it's already gone from all important paths. Wait, so you're suggesting NTP_INTERVAL_FREQ be a dynamic variable instead of a constant? Curious, could you give a bit more detail on why? > In the short term I'd prefered a clock would store its frequency instead of > using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't > has to be derived there. I don't follow this at all. clocksources do store their own frequency (via mult/shift). Could you explain? thanks -john - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Mon, 2007-01-01 at 17:27 +0100, Roman Zippel wrote: On Wednesday 20 December 2006 02:32, john stultz wrote: I know and all you have to change in the ntp and some related code is to replace HZ there with a variable, thus make it changable, so you can increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). Untested patch below. Does this vibe better with you are suggesting? Yes, thanks. tick_nsec doesn't require special treatment, in the middle term it's obsolete anyway, it could be replaced with (current_tick_length() TICK_LENGTH_SHIFT) and current_tick_length() being inlined. If NTP_INTERVAL_FREQ is different then HZ, then tick_nsec still has a different meaning then (current_tick_length() TICK_LENGTH_SHIFT). So since tick_nsec is still used in quite a few places, so I'm hesitant to pull it. NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at runtime), it's already gone from all important paths. Wait, so you're suggesting NTP_INTERVAL_FREQ be a dynamic variable instead of a constant? Curious, could you give a bit more detail on why? In the short term I'd prefered a clock would store its frequency instead of using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't has to be derived there. I don't follow this at all. clocksources do store their own frequency (via mult/shift). Could you explain? thanks -john - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Mon, 2007-01-01 at 19:29 +0100, Roman Zippel wrote: On Wednesday 20 December 2006 02:54, john stultz wrote: And here would be the follow on patch (again *untested*) for CONFIG_NO_HZ slowing the time accumulation down to once per second. Changing it to one creates a potential problem with calling second_overflow(). It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off by one (when xtime is close to a full second and the tick length is different from 1sec). At a higher frequency that's not much of a problem, but at one it means second_overflow() is occasionally called twice a second or skipped for a second. Usually the error should be quite small, but sometimes it can be significant. So in this case the loop in update_wall_time() should rather look like this: while (offset = clock-cycle_interval) { ... second_overflow(); while (clock-xtime_nsec = (u64)NSEC_PER_SEC clock-shift) { clock-xtime_nsec -= (u64)NSEC_PER_SEC clock-shift; xtime.tv_sec++; } ... } (Also note the change from if to while.) Ah, good catch! Thank you, I'll add that, retest and send it to akpm. thanks -john - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Tue, 2007-01-02 at 11:46 -0800, john stultz wrote: On Mon, 2007-01-01 at 19:29 +0100, Roman Zippel wrote: On Wednesday 20 December 2006 02:54, john stultz wrote: And here would be the follow on patch (again *untested*) for CONFIG_NO_HZ slowing the time accumulation down to once per second. Changing it to one creates a potential problem with calling second_overflow(). Wait, at first I thought I understood this, but looking closer, I'm not so sure I do. It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off Wait, so second_overflow should be called every NTP_INTERVAL_FREQ times (instead of every second)? Surely that's not right. by one (when xtime is close to a full second and the tick length is different from 1sec). At a higher frequency that's not much of a problem, but at one it means second_overflow() is occasionally called twice a second or skipped for a second. Usually the error should be quite small, but sometimes it can be significant. So in this case the loop in update_wall_time() should rather look like this: while (offset = clock-cycle_interval) { ... second_overflow(); while (clock-xtime_nsec = (u64)NSEC_PER_SEC clock-shift) { clock-xtime_nsec -= (u64)NSEC_PER_SEC clock-shift; xtime.tv_sec++; } ... } (Also note the change from if to while.) This would assume that clock-cycle_interval would *always* be the length of a full second and that isn't what the patch trying to do. Maybe could you explain this some more? thanks -john - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Wednesday 20 December 2006 02:54, john stultz wrote: > And here would be the follow on patch (again *untested*) for > CONFIG_NO_HZ slowing the time accumulation down to once per second. Changing it to one creates a potential problem with calling second_overflow(). It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off by one (when xtime is close to a full second and the tick length is different from 1sec). At a higher frequency that's not much of a problem, but at one it means second_overflow() is occasionally called twice a second or skipped for a second. Usually the error should be quite small, but sometimes it can be significant. So in this case the loop in update_wall_time() should rather look like this: while (offset >= clock->cycle_interval) { ... second_overflow(); while (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; xtime.tv_sec++; } ... } (Also note the change from "if" to "while".) Another problem area is that the clock shift value might need adjustments, since when you reduce the update frequency, it also increases the error adjustment steps and thus influences the jitter. This is especially important if we want to synchronize the TSC on multiple cpu's. bye, Roman - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Wednesday 20 December 2006 02:32, john stultz wrote: > > I know and all you have to change in the ntp and some related code is to > > replace HZ there with a variable, thus make it changable, so you can > > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). > > Untested patch below. Does this vibe better with you are suggesting? Yes, thanks. tick_nsec doesn't require special treatment, in the middle term it's obsolete anyway, it could be replaced with (current_tick_length() >> TICK_LENGTH_SHIFT) and current_tick_length() being inlined. NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at runtime), it's already gone from all important paths. In the short term I'd prefered a clock would store its frequency instead of using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't has to be derived there. bye, Roman - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Wednesday 20 December 2006 02:32, john stultz wrote: I know and all you have to change in the ntp and some related code is to replace HZ there with a variable, thus make it changable, so you can increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). Untested patch below. Does this vibe better with you are suggesting? Yes, thanks. tick_nsec doesn't require special treatment, in the middle term it's obsolete anyway, it could be replaced with (current_tick_length() TICK_LENGTH_SHIFT) and current_tick_length() being inlined. NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at runtime), it's already gone from all important paths. In the short term I'd prefered a clock would store its frequency instead of using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't has to be derived there. bye, Roman - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Wednesday 20 December 2006 02:54, john stultz wrote: And here would be the follow on patch (again *untested*) for CONFIG_NO_HZ slowing the time accumulation down to once per second. Changing it to one creates a potential problem with calling second_overflow(). It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off by one (when xtime is close to a full second and the tick length is different from 1sec). At a higher frequency that's not much of a problem, but at one it means second_overflow() is occasionally called twice a second or skipped for a second. Usually the error should be quite small, but sometimes it can be significant. So in this case the loop in update_wall_time() should rather look like this: while (offset = clock-cycle_interval) { ... second_overflow(); while (clock-xtime_nsec = (u64)NSEC_PER_SEC clock-shift) { clock-xtime_nsec -= (u64)NSEC_PER_SEC clock-shift; xtime.tv_sec++; } ... } (Also note the change from if to while.) Another problem area is that the clock shift value might need adjustments, since when you reduce the update frequency, it also increases the error adjustment steps and thus influences the jitter. This is especially important if we want to synchronize the TSC on multiple cpu's. bye, Roman - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Tue, 19 Dec 2006 17:54:18 -0800 john stultz <[EMAIL PROTECTED]> wrote: > On Tue, 2006-12-19 at 17:32 -0800, john stultz wrote: > > On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote: > > > On Wed, 13 Dec 2006, john stultz wrote: > > > > > You don't have to introduce anything new, it's tick_length that > > > > > changes > > > > > and HZ that becomes a variable in this function. > > > > > > > > So, forgive me for rehashing this, but it seems we're cross talking > > > > again. The context here is the dynticks code. Where HZ doesn't change, > > > > but we get interrupts at much reduced rates. > > > > > > I know and all you have to change in the ntp and some related code is to > > > replace HZ there with a variable, thus make it changable, so you can > > > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). > > > > Untested patch below. Does this vibe better with you are suggesting? > > And here would be the follow on patch (again *untested*) for > CONFIG_NO_HZ slowing the time accumulation down to once per second. I'm still awaiting a final-looking version of this patch, fyi. I don't understand whether this is a theoretical might-happen thing, or if NTP problems have actually been observed in the field? Either way, I'm sure the final changelog will clear that up ;) - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Tue, 19 Dec 2006 17:54:18 -0800 john stultz [EMAIL PROTECTED] wrote: On Tue, 2006-12-19 at 17:32 -0800, john stultz wrote: On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote: On Wed, 13 Dec 2006, john stultz wrote: You don't have to introduce anything new, it's tick_length that changes and HZ that becomes a variable in this function. So, forgive me for rehashing this, but it seems we're cross talking again. The context here is the dynticks code. Where HZ doesn't change, but we get interrupts at much reduced rates. I know and all you have to change in the ntp and some related code is to replace HZ there with a variable, thus make it changable, so you can increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). Untested patch below. Does this vibe better with you are suggesting? And here would be the follow on patch (again *untested*) for CONFIG_NO_HZ slowing the time accumulation down to once per second. I'm still awaiting a final-looking version of this patch, fyi. I don't understand whether this is a theoretical might-happen thing, or if NTP problems have actually been observed in the field? Either way, I'm sure the final changelog will clear that up ;) - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Tue, 2006-12-19 at 17:32 -0800, john stultz wrote: > On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote: > > On Wed, 13 Dec 2006, john stultz wrote: > > > > You don't have to introduce anything new, it's tick_length that changes > > > > and HZ that becomes a variable in this function. > > > > > > So, forgive me for rehashing this, but it seems we're cross talking > > > again. The context here is the dynticks code. Where HZ doesn't change, > > > but we get interrupts at much reduced rates. > > > > I know and all you have to change in the ntp and some related code is to > > replace HZ there with a variable, thus make it changable, so you can > > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). > > Untested patch below. Does this vibe better with you are suggesting? And here would be the follow on patch (again *untested*) for CONFIG_NO_HZ slowing the time accumulation down to once per second. thanks -john diff --git a/include/linux/timex.h b/include/linux/timex.h index 8241e6e..3beb539 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -286,7 +286,11 @@ #endif /* !CONFIG_TIME_INTERPOLATION */ #define TICK_LENGTH_SHIFT 32 +#ifdef CONFIG_NO_HZ +#define NTP_INTERVAL_FREQ (1) +#else #define NTP_INTERVAL_FREQ (HZ) +#endif #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d0ba190..53979a9 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -127,12 +127,14 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); */ static void hrtimer_get_softirq_time(struct hrtimer_base *base) { + struct timespec ts; ktime_t xtim, tomono; unsigned long seq; do { seq = read_seqbegin(_lock); - xtim = timespec_to_ktime(xtime); + getnstimeofday(); + xtim = timespec_to_ktime(ts); tomono = timespec_to_ktime(wall_to_monotonic); } while (read_seqretry(_lock, seq)); - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote: > On Wed, 13 Dec 2006, john stultz wrote: > > > You cannot choose arbitrary intervals otherwise you get other problems, > > > e.g. with your patch time_offset handling is broken. > > > > I'm not seeing this yet. Any more details? > > time_offset is scaled to HZ in do_adjtimex, which needs to be changed as > well. Ah, thanks! Fixed. > > > You don't have to introduce anything new, it's tick_length that changes > > > and HZ that becomes a variable in this function. > > > > So, forgive me for rehashing this, but it seems we're cross talking > > again. The context here is the dynticks code. Where HZ doesn't change, > > but we get interrupts at much reduced rates. > > I know and all you have to change in the ntp and some related code is to > replace HZ there with a variable, thus make it changable, so you can > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). Untested patch below. Does this vibe better with you are suggesting? Any other suggestions or feedback? thanks -john diff --git a/include/linux/timex.h b/include/linux/timex.h index db501dc..8241e6e 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -286,6 +286,9 @@ #endif /* !CONFIG_TIME_INTERPOLATION */ #define TICK_LENGTH_SHIFT 32 +#define NTP_INTERVAL_FREQ (HZ) +#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) + /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ extern u64 current_tick_length(void); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3afeaa3..eb12509 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base #define MAX_TICKADJ500 /* microsecs */ #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ - TICK_LENGTH_SHIFT) / HZ) + TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) /* * phase-lock loop variables @@ -46,13 +46,17 @@ #define CLOCK_TICK_ADJUST (((s64)CLOCK_T static void ntp_update_frequency(void) { - tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; - tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; - tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); + u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) + << TICK_LENGTH_SHIFT; + second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; + second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); - do_div(tick_length_base, HZ); + tick_length_base = second_length; - tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; + do_div(second_length, HZ); + tick_nsec = second_length >> TICK_LENGTH_SHIFT; + + do_div(tick_length_base, NTP_INTERVAL_FREQ); } /** @@ -162,7 +166,7 @@ void second_overflow(void) tick_length -= MAX_TICKADJ_SCALED; } else { tick_length += (s64)(time_adjust * NSEC_PER_USEC / -HZ) << TICK_LENGTH_SHIFT; + NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT; time_adjust = 0; } } @@ -239,7 +243,8 @@ #endif result = -EINVAL; goto leave; } - time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - SHIFT_NSEC); + time_freq = ((s64)txc->freq * NSEC_PER_USEC) + >> (SHIFT_USEC - SHIFT_NSEC); } if (txc->modes & ADJ_MAXERROR) { @@ -309,7 +314,8 @@ #endif freq_adj += time_freq; freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); - time_offset = (time_offset / HZ) << SHIFT_UPDATE; + time_offset = (time_offset / NTP_INTERVAL_FREQ) + << SHIFT_UPDATE; } /* STA_PLL */ } /* txc->modes & ADJ_OFFSET */ if (txc->modes & ADJ_TICK) @@ -324,8 +330,10 @@ leave: if ((time_status & (STA_UNSYNC|ST if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) txc->offset= save_adjust; else - txc->offset= shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; - txc->freq = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - SHIFT_NSEC); + txc->offset= shift_right(time_offset, SHIFT_UPDATE) + * NTP_INTERVAL_FREQ / 1000; + txc->freq = (time_freq / NSEC_PER_USEC) + << (SHIFT_USEC - SHIFT_NSEC); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status
Re: [RFC] HZ free ntp
On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote: On Wed, 13 Dec 2006, john stultz wrote: You cannot choose arbitrary intervals otherwise you get other problems, e.g. with your patch time_offset handling is broken. I'm not seeing this yet. Any more details? time_offset is scaled to HZ in do_adjtimex, which needs to be changed as well. Ah, thanks! Fixed. You don't have to introduce anything new, it's tick_length that changes and HZ that becomes a variable in this function. So, forgive me for rehashing this, but it seems we're cross talking again. The context here is the dynticks code. Where HZ doesn't change, but we get interrupts at much reduced rates. I know and all you have to change in the ntp and some related code is to replace HZ there with a variable, thus make it changable, so you can increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). Untested patch below. Does this vibe better with you are suggesting? Any other suggestions or feedback? thanks -john diff --git a/include/linux/timex.h b/include/linux/timex.h index db501dc..8241e6e 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -286,6 +286,9 @@ #endif /* !CONFIG_TIME_INTERPOLATION */ #define TICK_LENGTH_SHIFT 32 +#define NTP_INTERVAL_FREQ (HZ) +#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) + /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ extern u64 current_tick_length(void); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3afeaa3..eb12509 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base #define MAX_TICKADJ500 /* microsecs */ #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) \ - TICK_LENGTH_SHIFT) / HZ) + TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ) /* * phase-lock loop variables @@ -46,13 +46,17 @@ #define CLOCK_TICK_ADJUST (((s64)CLOCK_T static void ntp_update_frequency(void) { - tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) TICK_LENGTH_SHIFT; - tick_length_base += (s64)CLOCK_TICK_ADJUST TICK_LENGTH_SHIFT; - tick_length_base += (s64)time_freq (TICK_LENGTH_SHIFT - SHIFT_NSEC); + u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) +TICK_LENGTH_SHIFT; + second_length += (s64)CLOCK_TICK_ADJUST TICK_LENGTH_SHIFT; + second_length += (s64)time_freq (TICK_LENGTH_SHIFT - SHIFT_NSEC); - do_div(tick_length_base, HZ); + tick_length_base = second_length; - tick_nsec = tick_length_base TICK_LENGTH_SHIFT; + do_div(second_length, HZ); + tick_nsec = second_length TICK_LENGTH_SHIFT; + + do_div(tick_length_base, NTP_INTERVAL_FREQ); } /** @@ -162,7 +166,7 @@ void second_overflow(void) tick_length -= MAX_TICKADJ_SCALED; } else { tick_length += (s64)(time_adjust * NSEC_PER_USEC / -HZ) TICK_LENGTH_SHIFT; + NTP_INTERVAL_FREQ) TICK_LENGTH_SHIFT; time_adjust = 0; } } @@ -239,7 +243,8 @@ #endif result = -EINVAL; goto leave; } - time_freq = ((s64)txc-freq * NSEC_PER_USEC) (SHIFT_USEC - SHIFT_NSEC); + time_freq = ((s64)txc-freq * NSEC_PER_USEC) +(SHIFT_USEC - SHIFT_NSEC); } if (txc-modes ADJ_MAXERROR) { @@ -309,7 +314,8 @@ #endif freq_adj += time_freq; freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC); time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC); - time_offset = (time_offset / HZ) SHIFT_UPDATE; + time_offset = (time_offset / NTP_INTERVAL_FREQ) +SHIFT_UPDATE; } /* STA_PLL */ } /* txc-modes ADJ_OFFSET */ if (txc-modes ADJ_TICK) @@ -324,8 +330,10 @@ leave: if ((time_status (STA_UNSYNC|ST if ((txc-modes ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) txc-offset= save_adjust; else - txc-offset= shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000; - txc-freq = (time_freq / NSEC_PER_USEC) (SHIFT_USEC - SHIFT_NSEC); + txc-offset= shift_right(time_offset, SHIFT_UPDATE) + * NTP_INTERVAL_FREQ / 1000; + txc-freq = (time_freq / NSEC_PER_USEC) +(SHIFT_USEC - SHIFT_NSEC); txc-maxerror = time_maxerror; txc-esterror = time_esterror; txc-status= time_status; diff --git a/kernel/timer.c b/kernel/timer.c index
Re: [RFC] HZ free ntp
On Tue, 2006-12-19 at 17:32 -0800, john stultz wrote: On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote: On Wed, 13 Dec 2006, john stultz wrote: You don't have to introduce anything new, it's tick_length that changes and HZ that becomes a variable in this function. So, forgive me for rehashing this, but it seems we're cross talking again. The context here is the dynticks code. Where HZ doesn't change, but we get interrupts at much reduced rates. I know and all you have to change in the ntp and some related code is to replace HZ there with a variable, thus make it changable, so you can increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). Untested patch below. Does this vibe better with you are suggesting? And here would be the follow on patch (again *untested*) for CONFIG_NO_HZ slowing the time accumulation down to once per second. thanks -john diff --git a/include/linux/timex.h b/include/linux/timex.h index 8241e6e..3beb539 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -286,7 +286,11 @@ #endif /* !CONFIG_TIME_INTERPOLATION */ #define TICK_LENGTH_SHIFT 32 +#ifdef CONFIG_NO_HZ +#define NTP_INTERVAL_FREQ (1) +#else #define NTP_INTERVAL_FREQ (HZ) +#endif #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d0ba190..53979a9 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -127,12 +127,14 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); */ static void hrtimer_get_softirq_time(struct hrtimer_base *base) { + struct timespec ts; ktime_t xtim, tomono; unsigned long seq; do { seq = read_seqbegin(xtime_lock); - xtim = timespec_to_ktime(xtime); + getnstimeofday(ts); + xtim = timespec_to_ktime(ts); tomono = timespec_to_ktime(wall_to_monotonic); } while (read_seqretry(xtime_lock, seq)); - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Wed, 13 Dec 2006, john stultz wrote: > > The largest possible interval is freq cycles (or 1 second without > > adjustments). That is the base interval and without redesigning NTP we > > can't change that. This base interval can be subdivided into smaller > > intervals for incremental updates. > > Indeed, larger then 1 second intervals would require the second_overflow > code to be reworked too. There isn't much to rework without a complete redesign. > > You cannot choose arbitrary intervals otherwise you get other problems, > > e.g. with your patch time_offset handling is broken. > > I'm not seeing this yet. Any more details? time_offset is scaled to HZ in do_adjtimex, which needs to be changed as well. > > You don't have to introduce anything new, it's tick_length that changes > > and HZ that becomes a variable in this function. > > So, forgive me for rehashing this, but it seems we're cross talking > again. The context here is the dynticks code. Where HZ doesn't change, > but we get interrupts at much reduced rates. I know and all you have to change in the ntp and some related code is to replace HZ there with a variable, thus make it changable, so you can increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). > However, in doing so we have to > work w/ the ntp.c code which (as Ingo earlier mentioned) has a number of > HZ based assumptions. Repeating Ingo's nonsense doesn't make it any more true. :-( bye, Roman - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Wed, 2006-12-13 at 14:47 +0100, Roman Zippel wrote: > Hi, > > On Tue, 12 Dec 2006, john stultz wrote: > > > Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the > > time code will use to accumulate with. In this patch I've pushed it out > > to a full second, but it could be set via config (NSEC_PER_SEC/HZ for > > regular systems, something larger for systems using dynticks). > > Why do you want to use such an interval? This makes everything only more > complicated. > The largest possible interval is freq cycles (or 1 second without > adjustments). That is the base interval and without redesigning NTP we > can't change that. This base interval can be subdivided into smaller > intervals for incremental updates. Indeed, larger then 1 second intervals would require the second_overflow code to be reworked too. However, I'm not proposing larger then 1 second intervals at this point. I'm just allowing for non-HZ intervals. > You cannot choose arbitrary intervals otherwise you get other problems, > e.g. with your patch time_offset handling is broken. I'm not seeing this yet. Any more details? > > + /* calculate the length of one NTP adjusted second */ > > + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ); > > + second_length += (s64)CLOCK_TICK_ADJUST; > > + adj_length = (s64)time_freq; > > + > > + /* calculate tick length @ HZ*/ > > + tick_length = (second_length << TICK_LENGTH_SHIFT) > > + + (adj_length << (TICK_LENGTH_SHIFT - SHIFT_NSEC)); > > + do_div(tick_length, HZ); > > + tick_nsec = tick_length >> TICK_LENGTH_SHIFT; > > + > > + > > + /* calculate interval_length_base */ > > + /* XXX - this is broken up to avoid 64bit overlfows */ > > + interval_length_base = second_length * INTERVAL_LENGTH_NSEC; > > + interval_length_base <<= 2; > > + do_div(interval_length_base, NSEC_PER_SEC); > > + interval_length_base <<= TICK_LENGTH_SHIFT-2; > > You don't have to introduce anything new, it's tick_length that changes > and HZ that becomes a variable in this function. So, forgive me for rehashing this, but it seems we're cross talking again. The context here is the dynticks code. Where HZ doesn't change, but we get interrupts at much reduced rates. The problem is, if the interrupts slow to ~1 per second frequencies, we end up spending a ton of time in the main update_wall_time loop, processing that 1 second in 1/HZ chunks. The current code is correct, but just wastes a lot of time. So I'm trying to come up w/ an approach (the earlier, and broken, exponential accumulation patch had the same intention), that allows us to accumulate time in larger chunks. However, in doing so we have to work w/ the ntp.c code which (as Ingo earlier mentioned) has a number of HZ based assumptions. This last patch tries to give NTP and the timekeeping an agreed upon chunk (I chose "interval" as the term, since "tick" is somewhat connected to HZ) of time upon which accumulation is done, so we can have courser second updates, or finer grained updates, depending on config settings. In fairness, the patch probably going about this in a less then perfect way, but that's why I'm asking for your feedback and suggestions. What are your thoughts for reducing the time spent in the update_wall_time loop in the context of dynticks? thanks -john - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Wed, 2006-12-13 at 10:51 +0100, Ingo Molnar wrote: > * john stultz <[EMAIL PROTECTED]> wrote: > > > On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote: > > > On Wed, 6 Dec 2006, Ingo Molnar wrote: > > > > i disagree with you and it's pretty low-impact anyway. There's still > > > > quite many HZ/tick assumptions all around the time code (NTP being one > > > > example), we'll deal with those via other patches. > > > > > > Why do you pick on the NTP code? That's actually one of the places where > > > assumptions about HZ are largely gone. NTP state is updated incrementally > > > and this won't change, but the update frequency can now be easily > > > disconnected from HZ. > > > > Hey Roman, > > Here's my rough first attempt at doing so. I'd not call it easy, but > > maybe you have some suggestions for a simpler way? > > > > Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that > > the time code will use to accumulate with. In this patch I've pushed > > it out to a full second, but it could be set via config > > (NSEC_PER_SEC/HZ for regular systems, something larger for systems > > using dynticks). > > cool! I'll give this one a go in -rt, combined with the exponential > second-overflow patch. (that one is now algorithmically safe, right?) No, this one will replace the exponential accumulation patch. So we'll accumulate on second intervals, which should be far enough apart that we won't be spinning in that loop for long. thanks -john - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Tue, 12 Dec 2006, john stultz wrote: > Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the > time code will use to accumulate with. In this patch I've pushed it out > to a full second, but it could be set via config (NSEC_PER_SEC/HZ for > regular systems, something larger for systems using dynticks). Why do you want to use such an interval? This makes everything only more complicated. The largest possible interval is freq cycles (or 1 second without adjustments). That is the base interval and without redesigning NTP we can't change that. This base interval can be subdivided into smaller intervals for incremental updates. You cannot choose arbitrary intervals otherwise you get other problems, e.g. with your patch time_offset handling is broken. > + /* calculate the length of one NTP adjusted second */ > + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ); > + second_length += (s64)CLOCK_TICK_ADJUST; > + adj_length = (s64)time_freq; > + > + /* calculate tick length @ HZ*/ > + tick_length = (second_length << TICK_LENGTH_SHIFT) > + + (adj_length << (TICK_LENGTH_SHIFT - SHIFT_NSEC)); > + do_div(tick_length, HZ); > + tick_nsec = tick_length >> TICK_LENGTH_SHIFT; > + > + > + /* calculate interval_length_base */ > + /* XXX - this is broken up to avoid 64bit overlfows */ > + interval_length_base = second_length * INTERVAL_LENGTH_NSEC; > + interval_length_base <<= 2; > + do_div(interval_length_base, NSEC_PER_SEC); > + interval_length_base <<= TICK_LENGTH_SHIFT-2; You don't have to introduce anything new, it's tick_length that changes and HZ that becomes a variable in this function. bye, Roman - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
* john stultz <[EMAIL PROTECTED]> wrote: > On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote: > > On Wed, 6 Dec 2006, Ingo Molnar wrote: > > > i disagree with you and it's pretty low-impact anyway. There's still > > > quite many HZ/tick assumptions all around the time code (NTP being one > > > example), we'll deal with those via other patches. > > > > Why do you pick on the NTP code? That's actually one of the places where > > assumptions about HZ are largely gone. NTP state is updated incrementally > > and this won't change, but the update frequency can now be easily > > disconnected from HZ. > > Hey Roman, > Here's my rough first attempt at doing so. I'd not call it easy, but > maybe you have some suggestions for a simpler way? > > Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that > the time code will use to accumulate with. In this patch I've pushed > it out to a full second, but it could be set via config > (NSEC_PER_SEC/HZ for regular systems, something larger for systems > using dynticks). cool! I'll give this one a go in -rt, combined with the exponential second-overflow patch. (that one is now algorithmically safe, right?) Ingo - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
* john stultz [EMAIL PROTECTED] wrote: On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote: On Wed, 6 Dec 2006, Ingo Molnar wrote: i disagree with you and it's pretty low-impact anyway. There's still quite many HZ/tick assumptions all around the time code (NTP being one example), we'll deal with those via other patches. Why do you pick on the NTP code? That's actually one of the places where assumptions about HZ are largely gone. NTP state is updated incrementally and this won't change, but the update frequency can now be easily disconnected from HZ. Hey Roman, Here's my rough first attempt at doing so. I'd not call it easy, but maybe you have some suggestions for a simpler way? Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the time code will use to accumulate with. In this patch I've pushed it out to a full second, but it could be set via config (NSEC_PER_SEC/HZ for regular systems, something larger for systems using dynticks). cool! I'll give this one a go in -rt, combined with the exponential second-overflow patch. (that one is now algorithmically safe, right?) Ingo - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Tue, 12 Dec 2006, john stultz wrote: Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the time code will use to accumulate with. In this patch I've pushed it out to a full second, but it could be set via config (NSEC_PER_SEC/HZ for regular systems, something larger for systems using dynticks). Why do you want to use such an interval? This makes everything only more complicated. The largest possible interval is freq cycles (or 1 second without adjustments). That is the base interval and without redesigning NTP we can't change that. This base interval can be subdivided into smaller intervals for incremental updates. You cannot choose arbitrary intervals otherwise you get other problems, e.g. with your patch time_offset handling is broken. + /* calculate the length of one NTP adjusted second */ + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ); + second_length += (s64)CLOCK_TICK_ADJUST; + adj_length = (s64)time_freq; + + /* calculate tick length @ HZ*/ + tick_length = (second_length TICK_LENGTH_SHIFT) + + (adj_length (TICK_LENGTH_SHIFT - SHIFT_NSEC)); + do_div(tick_length, HZ); + tick_nsec = tick_length TICK_LENGTH_SHIFT; + + + /* calculate interval_length_base */ + /* XXX - this is broken up to avoid 64bit overlfows */ + interval_length_base = second_length * INTERVAL_LENGTH_NSEC; + interval_length_base = 2; + do_div(interval_length_base, NSEC_PER_SEC); + interval_length_base = TICK_LENGTH_SHIFT-2; You don't have to introduce anything new, it's tick_length that changes and HZ that becomes a variable in this function. bye, Roman - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Wed, 2006-12-13 at 10:51 +0100, Ingo Molnar wrote: * john stultz [EMAIL PROTECTED] wrote: On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote: On Wed, 6 Dec 2006, Ingo Molnar wrote: i disagree with you and it's pretty low-impact anyway. There's still quite many HZ/tick assumptions all around the time code (NTP being one example), we'll deal with those via other patches. Why do you pick on the NTP code? That's actually one of the places where assumptions about HZ are largely gone. NTP state is updated incrementally and this won't change, but the update frequency can now be easily disconnected from HZ. Hey Roman, Here's my rough first attempt at doing so. I'd not call it easy, but maybe you have some suggestions for a simpler way? Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the time code will use to accumulate with. In this patch I've pushed it out to a full second, but it could be set via config (NSEC_PER_SEC/HZ for regular systems, something larger for systems using dynticks). cool! I'll give this one a go in -rt, combined with the exponential second-overflow patch. (that one is now algorithmically safe, right?) No, this one will replace the exponential accumulation patch. So we'll accumulate on second intervals, which should be far enough apart that we won't be spinning in that loop for long. thanks -john - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
On Wed, 2006-12-13 at 14:47 +0100, Roman Zippel wrote: Hi, On Tue, 12 Dec 2006, john stultz wrote: Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the time code will use to accumulate with. In this patch I've pushed it out to a full second, but it could be set via config (NSEC_PER_SEC/HZ for regular systems, something larger for systems using dynticks). Why do you want to use such an interval? This makes everything only more complicated. The largest possible interval is freq cycles (or 1 second without adjustments). That is the base interval and without redesigning NTP we can't change that. This base interval can be subdivided into smaller intervals for incremental updates. Indeed, larger then 1 second intervals would require the second_overflow code to be reworked too. However, I'm not proposing larger then 1 second intervals at this point. I'm just allowing for non-HZ intervals. You cannot choose arbitrary intervals otherwise you get other problems, e.g. with your patch time_offset handling is broken. I'm not seeing this yet. Any more details? + /* calculate the length of one NTP adjusted second */ + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ); + second_length += (s64)CLOCK_TICK_ADJUST; + adj_length = (s64)time_freq; + + /* calculate tick length @ HZ*/ + tick_length = (second_length TICK_LENGTH_SHIFT) + + (adj_length (TICK_LENGTH_SHIFT - SHIFT_NSEC)); + do_div(tick_length, HZ); + tick_nsec = tick_length TICK_LENGTH_SHIFT; + + + /* calculate interval_length_base */ + /* XXX - this is broken up to avoid 64bit overlfows */ + interval_length_base = second_length * INTERVAL_LENGTH_NSEC; + interval_length_base = 2; + do_div(interval_length_base, NSEC_PER_SEC); + interval_length_base = TICK_LENGTH_SHIFT-2; You don't have to introduce anything new, it's tick_length that changes and HZ that becomes a variable in this function. So, forgive me for rehashing this, but it seems we're cross talking again. The context here is the dynticks code. Where HZ doesn't change, but we get interrupts at much reduced rates. The problem is, if the interrupts slow to ~1 per second frequencies, we end up spending a ton of time in the main update_wall_time loop, processing that 1 second in 1/HZ chunks. The current code is correct, but just wastes a lot of time. So I'm trying to come up w/ an approach (the earlier, and broken, exponential accumulation patch had the same intention), that allows us to accumulate time in larger chunks. However, in doing so we have to work w/ the ntp.c code which (as Ingo earlier mentioned) has a number of HZ based assumptions. This last patch tries to give NTP and the timekeeping an agreed upon chunk (I chose interval as the term, since tick is somewhat connected to HZ) of time upon which accumulation is done, so we can have courser second updates, or finer grained updates, depending on config settings. In fairness, the patch probably going about this in a less then perfect way, but that's why I'm asking for your feedback and suggestions. What are your thoughts for reducing the time spent in the update_wall_time loop in the context of dynticks? thanks -john - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] HZ free ntp
Hi, On Wed, 13 Dec 2006, john stultz wrote: The largest possible interval is freq cycles (or 1 second without adjustments). That is the base interval and without redesigning NTP we can't change that. This base interval can be subdivided into smaller intervals for incremental updates. Indeed, larger then 1 second intervals would require the second_overflow code to be reworked too. There isn't much to rework without a complete redesign. You cannot choose arbitrary intervals otherwise you get other problems, e.g. with your patch time_offset handling is broken. I'm not seeing this yet. Any more details? time_offset is scaled to HZ in do_adjtimex, which needs to be changed as well. You don't have to introduce anything new, it's tick_length that changes and HZ that becomes a variable in this function. So, forgive me for rehashing this, but it seems we're cross talking again. The context here is the dynticks code. Where HZ doesn't change, but we get interrupts at much reduced rates. I know and all you have to change in the ntp and some related code is to replace HZ there with a variable, thus make it changable, so you can increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ). However, in doing so we have to work w/ the ntp.c code which (as Ingo earlier mentioned) has a number of HZ based assumptions. Repeating Ingo's nonsense doesn't make it any more true. :-( bye, Roman - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC] HZ free ntp
On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote: > On Wed, 6 Dec 2006, Ingo Molnar wrote: > > i disagree with you and it's pretty low-impact anyway. There's still > > quite many HZ/tick assumptions all around the time code (NTP being one > > example), we'll deal with those via other patches. > > Why do you pick on the NTP code? That's actually one of the places where > assumptions about HZ are largely gone. NTP state is updated incrementally > and this won't change, but the update frequency can now be easily > disconnected from HZ. Hey Roman, Here's my rough first attempt at doing so. I'd not call it easy, but maybe you have some suggestions for a simpler way? Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the time code will use to accumulate with. In this patch I've pushed it out to a full second, but it could be set via config (NSEC_PER_SEC/HZ for regular systems, something larger for systems using dynticks). Thoughts? -john diff --git a/include/linux/timex.h b/include/linux/timex.h index db501dc..be13daf 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -286,9 +286,10 @@ #endif /* !CONFIG_TIME_INTERPOLATION */ #define TICK_LENGTH_SHIFT 32 -/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ -extern u64 current_tick_length(void); +#define INTERVAL_LENGTH_NSEC (NSEC_PER_SEC) +/* Returns how long NTP intervals are at present, in ns / 2^(SHIFT_SCALE-10).*/ +extern u64 ntp_interval_length(void); extern void second_overflow(void); extern void update_ntp_one_tick(void); extern int do_adjtimex(struct timex *); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d0ba190..53979a9 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -127,12 +127,14 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); */ static void hrtimer_get_softirq_time(struct hrtimer_base *base) { + struct timespec ts; ktime_t xtim, tomono; unsigned long seq; do { seq = read_seqbegin(_lock); - xtim = timespec_to_ktime(xtime); + getnstimeofday(); + xtim = timespec_to_ktime(ts); tomono = timespec_to_ktime(wall_to_monotonic); } while (read_seqretry(_lock, seq)); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3afeaa3..812c56f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -20,11 +20,12 @@ #include */ unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ unsigned long tick_nsec; /* ACTHZ period (nsec) */ -static u64 tick_length, tick_length_base; +static u64 interval_length, interval_length_base; #define MAX_TICKADJ500 /* microsecs */ -#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \ - TICK_LENGTH_SHIFT) / HZ) +#define MAX_TICKADJ_SCALED \ + u64)MAX_TICKADJ * NSEC_PER_USEC * INTERVAL_LENGTH_NSEC) \ + / NSEC_PER_SEC)<< TICK_LENGTH_SHIFT) /* * phase-lock loop variables @@ -44,15 +45,45 @@ #define CLOCK_TICK_OVERFLOW (LATCH * HZ #define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \ (s64)CLOCK_TICK_RATE) +/** + * ntp_update_frequency - Calculates base values used for NTP adjustments + * + */ static void ntp_update_frequency(void) { - tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << TICK_LENGTH_SHIFT; - tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT; - tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC); - - do_div(tick_length_base, HZ); - - tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT; + u64 second_length; + s64 adj_length; + u64 tick_length; + int neg = 0; + /* calculate the length of one NTP adjusted second */ + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ); + second_length += (s64)CLOCK_TICK_ADJUST; + adj_length = (s64)time_freq; + + /* calculate tick length @ HZ*/ + tick_length = (second_length << TICK_LENGTH_SHIFT) + + (adj_length << (TICK_LENGTH_SHIFT - SHIFT_NSEC)); + do_div(tick_length, HZ); + tick_nsec = tick_length >> TICK_LENGTH_SHIFT; + + + /* calculate interval_length_base */ + /* XXX - this is broken up to avoid 64bit overlfows */ + interval_length_base = second_length * INTERVAL_LENGTH_NSEC; + interval_length_base <<= 2; + do_div(interval_length_base, NSEC_PER_SEC); + interval_length_base <<= TICK_LENGTH_SHIFT-2; + + if (adj_length < 0) { + neg = 1; + adj_length = -adj_length; + } + adj_length *= INTERVAL_LENGTH_NSEC; + do_div(adj_length, NSEC_PER_SEC); + adj_length <<= (TICK_LENGTH_SHIFT - SHIFT_NSEC); + if(neg) + adj_length = -adj_length; + interval_length_base += adj_length; } /** @@
[RFC] HZ free ntp
On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote: On Wed, 6 Dec 2006, Ingo Molnar wrote: i disagree with you and it's pretty low-impact anyway. There's still quite many HZ/tick assumptions all around the time code (NTP being one example), we'll deal with those via other patches. Why do you pick on the NTP code? That's actually one of the places where assumptions about HZ are largely gone. NTP state is updated incrementally and this won't change, but the update frequency can now be easily disconnected from HZ. Hey Roman, Here's my rough first attempt at doing so. I'd not call it easy, but maybe you have some suggestions for a simpler way? Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the time code will use to accumulate with. In this patch I've pushed it out to a full second, but it could be set via config (NSEC_PER_SEC/HZ for regular systems, something larger for systems using dynticks). Thoughts? -john diff --git a/include/linux/timex.h b/include/linux/timex.h index db501dc..be13daf 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -286,9 +286,10 @@ #endif /* !CONFIG_TIME_INTERPOLATION */ #define TICK_LENGTH_SHIFT 32 -/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ -extern u64 current_tick_length(void); +#define INTERVAL_LENGTH_NSEC (NSEC_PER_SEC) +/* Returns how long NTP intervals are at present, in ns / 2^(SHIFT_SCALE-10).*/ +extern u64 ntp_interval_length(void); extern void second_overflow(void); extern void update_ntp_one_tick(void); extern int do_adjtimex(struct timex *); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index d0ba190..53979a9 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -127,12 +127,14 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); */ static void hrtimer_get_softirq_time(struct hrtimer_base *base) { + struct timespec ts; ktime_t xtim, tomono; unsigned long seq; do { seq = read_seqbegin(xtime_lock); - xtim = timespec_to_ktime(xtime); + getnstimeofday(ts); + xtim = timespec_to_ktime(ts); tomono = timespec_to_ktime(wall_to_monotonic); } while (read_seqretry(xtime_lock, seq)); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 3afeaa3..812c56f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -20,11 +20,12 @@ #include asm/timex.h */ unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */ unsigned long tick_nsec; /* ACTHZ period (nsec) */ -static u64 tick_length, tick_length_base; +static u64 interval_length, interval_length_base; #define MAX_TICKADJ500 /* microsecs */ -#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) \ - TICK_LENGTH_SHIFT) / HZ) +#define MAX_TICKADJ_SCALED \ + u64)MAX_TICKADJ * NSEC_PER_USEC * INTERVAL_LENGTH_NSEC) \ + / NSEC_PER_SEC) TICK_LENGTH_SHIFT) /* * phase-lock loop variables @@ -44,15 +45,45 @@ #define CLOCK_TICK_OVERFLOW (LATCH * HZ #define CLOCK_TICK_ADJUST (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \ (s64)CLOCK_TICK_RATE) +/** + * ntp_update_frequency - Calculates base values used for NTP adjustments + * + */ static void ntp_update_frequency(void) { - tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) TICK_LENGTH_SHIFT; - tick_length_base += (s64)CLOCK_TICK_ADJUST TICK_LENGTH_SHIFT; - tick_length_base += (s64)time_freq (TICK_LENGTH_SHIFT - SHIFT_NSEC); - - do_div(tick_length_base, HZ); - - tick_nsec = tick_length_base TICK_LENGTH_SHIFT; + u64 second_length; + s64 adj_length; + u64 tick_length; + int neg = 0; + /* calculate the length of one NTP adjusted second */ + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ); + second_length += (s64)CLOCK_TICK_ADJUST; + adj_length = (s64)time_freq; + + /* calculate tick length @ HZ*/ + tick_length = (second_length TICK_LENGTH_SHIFT) + + (adj_length (TICK_LENGTH_SHIFT - SHIFT_NSEC)); + do_div(tick_length, HZ); + tick_nsec = tick_length TICK_LENGTH_SHIFT; + + + /* calculate interval_length_base */ + /* XXX - this is broken up to avoid 64bit overlfows */ + interval_length_base = second_length * INTERVAL_LENGTH_NSEC; + interval_length_base = 2; + do_div(interval_length_base, NSEC_PER_SEC); + interval_length_base = TICK_LENGTH_SHIFT-2; + + if (adj_length 0) { + neg = 1; + adj_length = -adj_length; + } + adj_length *= INTERVAL_LENGTH_NSEC; + do_div(adj_length, NSEC_PER_SEC); + adj_length = (TICK_LENGTH_SHIFT - SHIFT_NSEC); + if(neg) + adj_length = -adj_length; + interval_length_base += adj_length; } /** @@ -69,7 +100,7