[patch 03/46] [RFC] HZ free ntp

2007-01-23 Thread Thomas Gleixner
>From [EMAIL PROTECTED] Wed Dec 20 02:36:15 2006

Distangle the NTP update from HZ. This is necessary for dynamic tick
enabled kernels.

---
 include/linux/timex.h |7 +++
 kernel/hrtimer.c  |   11 ---
 kernel/time/ntp.c |   30 +++---
 kernel/timer.c|4 ++--
 4 files changed, 36 insertions(+), 16 deletions(-)

Index: linux-2.6.20-rc4-mm1-bo/include/linux/timex.h
===
--- linux-2.6.20-rc4-mm1-bo.orig/include/linux/timex.h
+++ linux-2.6.20-rc4-mm1-bo/include/linux/timex.h
@@ -286,6 +286,13 @@ static inline void time_interpolator_upd
 
 #define TICK_LENGTH_SHIFT  32
 
+#ifdef CONFIG_NO_HZ
+#define NTP_INTERVAL_FREQ  (2)
+#else
+#define NTP_INTERVAL_FREQ  (HZ)
+#endif
+#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
+
 /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
 extern u64 current_tick_length(void);
 
Index: linux-2.6.20-rc4-mm1-bo/kernel/time/ntp.c
===
--- linux-2.6.20-rc4-mm1-bo.orig/kernel/time/ntp.c
+++ linux-2.6.20-rc4-mm1-bo/kernel/time/ntp.c
@@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base
 
 #define MAX_TICKADJ500 /* microsecs */
 #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
- TICK_LENGTH_SHIFT) / HZ)
+ TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ)
 
 /*
  * phase-lock loop variables
@@ -46,13 +46,17 @@ long time_adjust;
 
 static void ntp_update_frequency(void)
 {
-   tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << 
TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC);
+   u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
+   << TICK_LENGTH_SHIFT;
+   second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT;
+   second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC);
 
-   do_div(tick_length_base, HZ);
+   tick_length_base = second_length;
 
-   tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT;
+   do_div(second_length, HZ);
+   tick_nsec = second_length >> TICK_LENGTH_SHIFT;
+
+   do_div(tick_length_base, NTP_INTERVAL_FREQ);
 }
 
 /**
@@ -162,7 +166,7 @@ void second_overflow(void)
tick_length -= MAX_TICKADJ_SCALED;
} else {
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
-HZ) << TICK_LENGTH_SHIFT;
+   NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT;
time_adjust = 0;
}
}
@@ -239,7 +243,8 @@ int do_adjtimex(struct timex *txc)
result = -EINVAL;
goto leave;
}
-   time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - 
SHIFT_NSEC);
+   time_freq = ((s64)txc->freq * NSEC_PER_USEC)
+   >> (SHIFT_USEC - SHIFT_NSEC);
}
 
if (txc->modes & ADJ_MAXERROR) {
@@ -309,7 +314,8 @@ int do_adjtimex(struct timex *txc)
freq_adj += time_freq;
freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC);
time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC);
-   time_offset = (time_offset / HZ) << SHIFT_UPDATE;
+   time_offset = (time_offset / NTP_INTERVAL_FREQ)
+   << SHIFT_UPDATE;
} /* STA_PLL */
} /* txc->modes & ADJ_OFFSET */
if (txc->modes & ADJ_TICK)
@@ -324,8 +330,10 @@ leave: if ((time_status & (STA_UNSYNC|ST
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
txc->offset= save_adjust;
else
-   txc->offset= shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000;
-   txc->freq  = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - 
SHIFT_NSEC);
+   txc->offset= shift_right(time_offset, SHIFT_UPDATE)
+   * NTP_INTERVAL_FREQ / 1000;
+   txc->freq  = (time_freq / NSEC_PER_USEC)
+   << (SHIFT_USEC - SHIFT_NSEC);
txc->maxerror  = time_maxerror;
txc->esterror  = time_esterror;
txc->status= time_status;
Index: linux-2.6.20-rc4-mm1-bo/kernel/timer.c
===
--- linux-2.6.20-rc4-mm1-bo.orig/kernel/timer.c
+++ linux-2.6.20-rc4-mm1-bo/kernel/timer.c
@@ -890,7 +890,7 @@ void __init timekeeping_init(void)
ntp_clear();
 
clock = clocksource_get_next();
-   clocksource_calculate_interval(clock, tick_nsec);
+   

[patch 03/46] [RFC] HZ free ntp

2007-01-23 Thread Thomas Gleixner
From [EMAIL PROTECTED] Wed Dec 20 02:36:15 2006

Distangle the NTP update from HZ. This is necessary for dynamic tick
enabled kernels.

---
 include/linux/timex.h |7 +++
 kernel/hrtimer.c  |   11 ---
 kernel/time/ntp.c |   30 +++---
 kernel/timer.c|4 ++--
 4 files changed, 36 insertions(+), 16 deletions(-)

Index: linux-2.6.20-rc4-mm1-bo/include/linux/timex.h
===
--- linux-2.6.20-rc4-mm1-bo.orig/include/linux/timex.h
+++ linux-2.6.20-rc4-mm1-bo/include/linux/timex.h
@@ -286,6 +286,13 @@ static inline void time_interpolator_upd
 
 #define TICK_LENGTH_SHIFT  32
 
+#ifdef CONFIG_NO_HZ
+#define NTP_INTERVAL_FREQ  (2)
+#else
+#define NTP_INTERVAL_FREQ  (HZ)
+#endif
+#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
+
 /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
 extern u64 current_tick_length(void);
 
Index: linux-2.6.20-rc4-mm1-bo/kernel/time/ntp.c
===
--- linux-2.6.20-rc4-mm1-bo.orig/kernel/time/ntp.c
+++ linux-2.6.20-rc4-mm1-bo/kernel/time/ntp.c
@@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base
 
 #define MAX_TICKADJ500 /* microsecs */
 #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC)  \
- TICK_LENGTH_SHIFT) / HZ)
+ TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ)
 
 /*
  * phase-lock loop variables
@@ -46,13 +46,17 @@ long time_adjust;
 
 static void ntp_update_frequency(void)
 {
-   tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)  
TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)CLOCK_TICK_ADJUST  TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)time_freq  (TICK_LENGTH_SHIFT - SHIFT_NSEC);
+   u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
+TICK_LENGTH_SHIFT;
+   second_length += (s64)CLOCK_TICK_ADJUST  TICK_LENGTH_SHIFT;
+   second_length += (s64)time_freq  (TICK_LENGTH_SHIFT - SHIFT_NSEC);
 
-   do_div(tick_length_base, HZ);
+   tick_length_base = second_length;
 
-   tick_nsec = tick_length_base  TICK_LENGTH_SHIFT;
+   do_div(second_length, HZ);
+   tick_nsec = second_length  TICK_LENGTH_SHIFT;
+
+   do_div(tick_length_base, NTP_INTERVAL_FREQ);
 }
 
 /**
@@ -162,7 +166,7 @@ void second_overflow(void)
tick_length -= MAX_TICKADJ_SCALED;
} else {
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
-HZ)  TICK_LENGTH_SHIFT;
+   NTP_INTERVAL_FREQ)  TICK_LENGTH_SHIFT;
time_adjust = 0;
}
}
@@ -239,7 +243,8 @@ int do_adjtimex(struct timex *txc)
result = -EINVAL;
goto leave;
}
-   time_freq = ((s64)txc-freq * NSEC_PER_USEC)  (SHIFT_USEC - 
SHIFT_NSEC);
+   time_freq = ((s64)txc-freq * NSEC_PER_USEC)
+(SHIFT_USEC - SHIFT_NSEC);
}
 
if (txc-modes  ADJ_MAXERROR) {
@@ -309,7 +314,8 @@ int do_adjtimex(struct timex *txc)
freq_adj += time_freq;
freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC);
time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC);
-   time_offset = (time_offset / HZ)  SHIFT_UPDATE;
+   time_offset = (time_offset / NTP_INTERVAL_FREQ)
+SHIFT_UPDATE;
} /* STA_PLL */
} /* txc-modes  ADJ_OFFSET */
if (txc-modes  ADJ_TICK)
@@ -324,8 +330,10 @@ leave: if ((time_status  (STA_UNSYNC|ST
if ((txc-modes  ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
txc-offset= save_adjust;
else
-   txc-offset= shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000;
-   txc-freq  = (time_freq / NSEC_PER_USEC)  (SHIFT_USEC - 
SHIFT_NSEC);
+   txc-offset= shift_right(time_offset, SHIFT_UPDATE)
+   * NTP_INTERVAL_FREQ / 1000;
+   txc-freq  = (time_freq / NSEC_PER_USEC)
+(SHIFT_USEC - SHIFT_NSEC);
txc-maxerror  = time_maxerror;
txc-esterror  = time_esterror;
txc-status= time_status;
Index: linux-2.6.20-rc4-mm1-bo/kernel/timer.c
===
--- linux-2.6.20-rc4-mm1-bo.orig/kernel/timer.c
+++ linux-2.6.20-rc4-mm1-bo/kernel/timer.c
@@ -890,7 +890,7 @@ void __init timekeeping_init(void)
ntp_clear();
 
clock = clocksource_get_next();
-   clocksource_calculate_interval(clock, tick_nsec);
+   clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);

Re: [RFC] HZ free ntp

2007-01-06 Thread Roman Zippel
Hi,

On Tuesday 02 January 2007 20:42, john stultz wrote:

> > tick_nsec doesn't require special treatment, in the middle term it's
> > obsolete anyway, it could be replaced with (current_tick_length() >>
> > TICK_LENGTH_SHIFT) and current_tick_length() being inlined.
>
> If NTP_INTERVAL_FREQ is different then HZ, then tick_nsec still has a
> different meaning then (current_tick_length() >> TICK_LENGTH_SHIFT).
> So since tick_nsec is still used in quite a few places, so I'm hesitant
> to pull it.

The current usage under arch is pretty much bogus and they likely can't use 
dyntick anyway, so it would be easier to disable tick_nsec completely if 
dyntick is enabled.

> > NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at
> > runtime), it's already gone from all important paths.
>
> Wait, so you're suggesting NTP_INTERVAL_FREQ be a dynamic variable
> instead of a constant? Curious, could you give a bit more detail on why?

We already have more than enough config options, where the user has barely any 
idea what to do, so we should try to configure and initialize as much as 
possible at runtime depending on what the hardware is capable of.

That reminds me that the main problem left for a dynamic variable is 
time_offset. It should become a 64 bit value, so SHIFT_UPDATE isn't needed 
anymore. Right now it depends on HZ to maximize the value range.

> > In the short term I'd prefered a clock would store its frequency instead
> > of using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it
> > doesn't has to be derived there.
>
> I don't follow this at all. clocksources do store their own frequency
> (via mult/shift). Could you explain?

mult is not a constant and calculating the frequency like this is not very 
precise.

bye, Roman
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-06 Thread Roman Zippel
Hi,

On Tuesday 02 January 2007 21:50, john stultz wrote:

> > > It should be called every NTP_INTERVAL_FREQ times, but occasionally
> > > it's off
>
> Wait, so second_overflow should be called every NTP_INTERVAL_FREQ times
> (instead of every second)? Surely that's not right.

But it is, that's the reason the various adjustment values are divided by it, 
so they are applied to the next NTP_INTERVAL_FREQ times.

BTW I think NTP_INTERVAL_FREQ isn't the right name, CLOCK_UPDATE_FREQ would be 
a better name, currently ntp is the main user, but a clock can also be 
updated via other means (e.g. adjtimex or another clock).

> > > So in this case the loop in update_wall_time() should rather look like
> > > this:
> > >
> > >   while (offset >= clock->cycle_interval) {
> > >   ...
> > >   second_overflow();
> > >   while (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
> > >   clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
> > >   xtime.tv_sec++;
> > >   }
> > >   ...
> > >   }
> > >
> > > (Also note the change from "if" to "while".)
>
> This would assume that clock->cycle_interval would *always* be the
> length of a full second and that isn't what the patch trying to do.
>
> Maybe could you explain this some more?

As I said this was the case for a value of one.
Anyway, to avoid these problems, I'd prefer to keep it at least at 2 or better 
at 4. This would still drastically reduce the time spent in the loop and we 
can revisit the issue later.

bye, Roman
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-06 Thread Roman Zippel
Hi,

On Tuesday 02 January 2007 20:42, john stultz wrote:

  tick_nsec doesn't require special treatment, in the middle term it's
  obsolete anyway, it could be replaced with (current_tick_length() 
  TICK_LENGTH_SHIFT) and current_tick_length() being inlined.

 If NTP_INTERVAL_FREQ is different then HZ, then tick_nsec still has a
 different meaning then (current_tick_length()  TICK_LENGTH_SHIFT).
 So since tick_nsec is still used in quite a few places, so I'm hesitant
 to pull it.

The current usage under arch is pretty much bogus and they likely can't use 
dyntick anyway, so it would be easier to disable tick_nsec completely if 
dyntick is enabled.

  NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at
  runtime), it's already gone from all important paths.

 Wait, so you're suggesting NTP_INTERVAL_FREQ be a dynamic variable
 instead of a constant? Curious, could you give a bit more detail on why?

We already have more than enough config options, where the user has barely any 
idea what to do, so we should try to configure and initialize as much as 
possible at runtime depending on what the hardware is capable of.

That reminds me that the main problem left for a dynamic variable is 
time_offset. It should become a 64 bit value, so SHIFT_UPDATE isn't needed 
anymore. Right now it depends on HZ to maximize the value range.

  In the short term I'd prefered a clock would store its frequency instead
  of using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it
  doesn't has to be derived there.

 I don't follow this at all. clocksources do store their own frequency
 (via mult/shift). Could you explain?

mult is not a constant and calculating the frequency like this is not very 
precise.

bye, Roman
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-06 Thread Roman Zippel
Hi,

On Tuesday 02 January 2007 21:50, john stultz wrote:

   It should be called every NTP_INTERVAL_FREQ times, but occasionally
   it's off

 Wait, so second_overflow should be called every NTP_INTERVAL_FREQ times
 (instead of every second)? Surely that's not right.

But it is, that's the reason the various adjustment values are divided by it, 
so they are applied to the next NTP_INTERVAL_FREQ times.

BTW I think NTP_INTERVAL_FREQ isn't the right name, CLOCK_UPDATE_FREQ would be 
a better name, currently ntp is the main user, but a clock can also be 
updated via other means (e.g. adjtimex or another clock).

   So in this case the loop in update_wall_time() should rather look like
   this:
  
 while (offset = clock-cycle_interval) {
 ...
 second_overflow();
 while (clock-xtime_nsec = (u64)NSEC_PER_SEC  clock-shift) {
 clock-xtime_nsec -= (u64)NSEC_PER_SEC  clock-shift;
 xtime.tv_sec++;
 }
 ...
 }
  
   (Also note the change from if to while.)

 This would assume that clock-cycle_interval would *always* be the
 length of a full second and that isn't what the patch trying to do.

 Maybe could you explain this some more?

As I said this was the case for a value of one.
Anyway, to avoid these problems, I'd prefer to keep it at least at 2 or better 
at 4. This would still drastically reduce the time spent in the loop and we 
can revisit the issue later.

bye, Roman
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-02 Thread john stultz
On Tue, 2007-01-02 at 11:46 -0800, john stultz wrote:
> On Mon, 2007-01-01 at 19:29 +0100, Roman Zippel wrote:
> > On Wednesday 20 December 2006 02:54, john stultz wrote:
> > 
> > > And here would be the follow on patch (again *untested*) for
> > > CONFIG_NO_HZ slowing the time accumulation down to once per second.
> > 
> > Changing it to one creates a potential problem with calling 
> > second_overflow().

Wait, at first I thought I understood this, but looking closer, I'm not
so sure I do.

> > It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off

Wait, so second_overflow should be called every NTP_INTERVAL_FREQ times
(instead of every second)? Surely that's not right.

> > by one (when xtime is close to a full second and the tick length is 
> > different
> > from 1sec). At a higher frequency that's not much of a problem, but at one 
> > it
> > means second_overflow() is occasionally called twice a second or skipped for
> > a second. Usually the error should be quite small, but sometimes it can be
> > significant.
> > So in this case the loop in update_wall_time() should rather look like this:
> > 
> > while (offset >= clock->cycle_interval) {
> > ...
> > second_overflow();
> > while (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
> > clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
> > xtime.tv_sec++;
> > }
> > ...
> > }
> > 
> > (Also note the change from "if" to "while".)

This would assume that clock->cycle_interval would *always* be the
length of a full second and that isn't what the patch trying to do.

Maybe could you explain this some more?

thanks
-john


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-02 Thread john stultz
On Mon, 2007-01-01 at 19:29 +0100, Roman Zippel wrote:
> On Wednesday 20 December 2006 02:54, john stultz wrote:
> 
> > And here would be the follow on patch (again *untested*) for
> > CONFIG_NO_HZ slowing the time accumulation down to once per second.
> 
> Changing it to one creates a potential problem with calling second_overflow().
> It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off
> by one (when xtime is close to a full second and the tick length is different
> from 1sec). At a higher frequency that's not much of a problem, but at one it
> means second_overflow() is occasionally called twice a second or skipped for
> a second. Usually the error should be quite small, but sometimes it can be
> significant.
> So in this case the loop in update_wall_time() should rather look like this:
> 
>   while (offset >= clock->cycle_interval) {
>   ...
>   second_overflow();
>   while (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
>   clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
>   xtime.tv_sec++;
>   }
>   ...
>   }
> 
> (Also note the change from "if" to "while".)

Ah, good catch! Thank you, I'll add that, retest and send it to akpm.

thanks
-john



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-02 Thread john stultz
On Mon, 2007-01-01 at 17:27 +0100, Roman Zippel wrote:
> On Wednesday 20 December 2006 02:32, john stultz wrote:
> 
> > > I know and all you have to change in the ntp and some related code is to
> > > replace HZ there with a variable, thus make it changable, so you can
> > > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).
> >
> > Untested patch below. Does this vibe better with you are suggesting?
> 
> Yes, thanks.
> tick_nsec doesn't require special treatment, in the middle term it's obsolete
> anyway, it could be replaced with (current_tick_length() >>
> TICK_LENGTH_SHIFT) and current_tick_length() being inlined.

If NTP_INTERVAL_FREQ is different then HZ, then tick_nsec still has a
different meaning then (current_tick_length() >> TICK_LENGTH_SHIFT).
So since tick_nsec is still used in quite a few places, so I'm hesitant
to pull it.

> NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at
> runtime), it's already gone from all important paths.

Wait, so you're suggesting NTP_INTERVAL_FREQ be a dynamic variable
instead of a constant? Curious, could you give a bit more detail on why?

> In the short term I'd prefered a clock would store its frequency instead of
> using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't
> has to be derived there.

I don't follow this at all. clocksources do store their own frequency
(via mult/shift). Could you explain?

thanks
-john

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-02 Thread john stultz
On Mon, 2007-01-01 at 17:27 +0100, Roman Zippel wrote:
 On Wednesday 20 December 2006 02:32, john stultz wrote:
 
   I know and all you have to change in the ntp and some related code is to
   replace HZ there with a variable, thus make it changable, so you can
   increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).
 
  Untested patch below. Does this vibe better with you are suggesting?
 
 Yes, thanks.
 tick_nsec doesn't require special treatment, in the middle term it's obsolete
 anyway, it could be replaced with (current_tick_length() 
 TICK_LENGTH_SHIFT) and current_tick_length() being inlined.

If NTP_INTERVAL_FREQ is different then HZ, then tick_nsec still has a
different meaning then (current_tick_length()  TICK_LENGTH_SHIFT).
So since tick_nsec is still used in quite a few places, so I'm hesitant
to pull it.

 NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at
 runtime), it's already gone from all important paths.

Wait, so you're suggesting NTP_INTERVAL_FREQ be a dynamic variable
instead of a constant? Curious, could you give a bit more detail on why?

 In the short term I'd prefered a clock would store its frequency instead of
 using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't
 has to be derived there.

I don't follow this at all. clocksources do store their own frequency
(via mult/shift). Could you explain?

thanks
-john

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-02 Thread john stultz
On Mon, 2007-01-01 at 19:29 +0100, Roman Zippel wrote:
 On Wednesday 20 December 2006 02:54, john stultz wrote:
 
  And here would be the follow on patch (again *untested*) for
  CONFIG_NO_HZ slowing the time accumulation down to once per second.
 
 Changing it to one creates a potential problem with calling second_overflow().
 It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off
 by one (when xtime is close to a full second and the tick length is different
 from 1sec). At a higher frequency that's not much of a problem, but at one it
 means second_overflow() is occasionally called twice a second or skipped for
 a second. Usually the error should be quite small, but sometimes it can be
 significant.
 So in this case the loop in update_wall_time() should rather look like this:
 
   while (offset = clock-cycle_interval) {
   ...
   second_overflow();
   while (clock-xtime_nsec = (u64)NSEC_PER_SEC  clock-shift) {
   clock-xtime_nsec -= (u64)NSEC_PER_SEC  clock-shift;
   xtime.tv_sec++;
   }
   ...
   }
 
 (Also note the change from if to while.)

Ah, good catch! Thank you, I'll add that, retest and send it to akpm.

thanks
-john



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-02 Thread john stultz
On Tue, 2007-01-02 at 11:46 -0800, john stultz wrote:
 On Mon, 2007-01-01 at 19:29 +0100, Roman Zippel wrote:
  On Wednesday 20 December 2006 02:54, john stultz wrote:
  
   And here would be the follow on patch (again *untested*) for
   CONFIG_NO_HZ slowing the time accumulation down to once per second.
  
  Changing it to one creates a potential problem with calling 
  second_overflow().

Wait, at first I thought I understood this, but looking closer, I'm not
so sure I do.

  It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off

Wait, so second_overflow should be called every NTP_INTERVAL_FREQ times
(instead of every second)? Surely that's not right.

  by one (when xtime is close to a full second and the tick length is 
  different
  from 1sec). At a higher frequency that's not much of a problem, but at one 
  it
  means second_overflow() is occasionally called twice a second or skipped for
  a second. Usually the error should be quite small, but sometimes it can be
  significant.
  So in this case the loop in update_wall_time() should rather look like this:
  
  while (offset = clock-cycle_interval) {
  ...
  second_overflow();
  while (clock-xtime_nsec = (u64)NSEC_PER_SEC  clock-shift) {
  clock-xtime_nsec -= (u64)NSEC_PER_SEC  clock-shift;
  xtime.tv_sec++;
  }
  ...
  }
  
  (Also note the change from if to while.)

This would assume that clock-cycle_interval would *always* be the
length of a full second and that isn't what the patch trying to do.

Maybe could you explain this some more?

thanks
-john


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-01 Thread Roman Zippel
On Wednesday 20 December 2006 02:54, john stultz wrote:

> And here would be the follow on patch (again *untested*) for
> CONFIG_NO_HZ slowing the time accumulation down to once per second.

Changing it to one creates a potential problem with calling second_overflow(). 
It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off 
by one (when xtime is close to a full second and the tick length is different 
from 1sec). At a higher frequency that's not much of a problem, but at one it 
means second_overflow() is occasionally called twice a second or skipped for 
a second. Usually the error should be quite small, but sometimes it can be 
significant.
So in this case the loop in update_wall_time() should rather look like this:

while (offset >= clock->cycle_interval) {
...
second_overflow();
while (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
xtime.tv_sec++;
}
...
}

(Also note the change from "if" to "while".)

Another problem area is that the clock shift value might need adjustments, 
since when you reduce the update frequency, it also increases the error 
adjustment steps and thus influences the jitter. This is especially important 
if we want to synchronize the TSC on multiple cpu's.

bye, Roman
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-01 Thread Roman Zippel
Hi,

On Wednesday 20 December 2006 02:32, john stultz wrote:

> > I know and all you have to change in the ntp and some related code is to
> > replace HZ there with a variable, thus make it changable, so you can
> > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).
>
> Untested patch below. Does this vibe better with you are suggesting?

Yes, thanks.
tick_nsec doesn't require special treatment, in the middle term it's obsolete 
anyway, it could be replaced with (current_tick_length() >> 
TICK_LENGTH_SHIFT) and current_tick_length() being inlined.
NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at 
runtime), it's already gone from all important paths.
In the short term I'd prefered a clock would store its frequency instead of 
using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't 
has to be derived there.

bye, Roman
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-01 Thread Roman Zippel
Hi,

On Wednesday 20 December 2006 02:32, john stultz wrote:

  I know and all you have to change in the ntp and some related code is to
  replace HZ there with a variable, thus make it changable, so you can
  increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).

 Untested patch below. Does this vibe better with you are suggesting?

Yes, thanks.
tick_nsec doesn't require special treatment, in the middle term it's obsolete 
anyway, it could be replaced with (current_tick_length()  
TICK_LENGTH_SHIFT) and current_tick_length() being inlined.
NTP_INTERVAL_FREQ could be a real variable (so it can be initialized at 
runtime), it's already gone from all important paths.
In the short term I'd prefered a clock would store its frequency instead of 
using NTP_INTERVAL_LENGTH in clocksource_calculate_interval(), so it doesn't 
has to be derived there.

bye, Roman
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2007-01-01 Thread Roman Zippel
On Wednesday 20 December 2006 02:54, john stultz wrote:

 And here would be the follow on patch (again *untested*) for
 CONFIG_NO_HZ slowing the time accumulation down to once per second.

Changing it to one creates a potential problem with calling second_overflow(). 
It should be called every NTP_INTERVAL_FREQ times, but occasionally it's off 
by one (when xtime is close to a full second and the tick length is different 
from 1sec). At a higher frequency that's not much of a problem, but at one it 
means second_overflow() is occasionally called twice a second or skipped for 
a second. Usually the error should be quite small, but sometimes it can be 
significant.
So in this case the loop in update_wall_time() should rather look like this:

while (offset = clock-cycle_interval) {
...
second_overflow();
while (clock-xtime_nsec = (u64)NSEC_PER_SEC  clock-shift) {
clock-xtime_nsec -= (u64)NSEC_PER_SEC  clock-shift;
xtime.tv_sec++;
}
...
}

(Also note the change from if to while.)

Another problem area is that the clock shift value might need adjustments, 
since when you reduce the update frequency, it also increases the error 
adjustment steps and thus influences the jitter. This is especially important 
if we want to synchronize the TSC on multiple cpu's.

bye, Roman
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-20 Thread Andrew Morton
On Tue, 19 Dec 2006 17:54:18 -0800
john stultz <[EMAIL PROTECTED]> wrote:

> On Tue, 2006-12-19 at 17:32 -0800, john stultz wrote:
> > On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote:
> > > On Wed, 13 Dec 2006, john stultz wrote:
> > > > > You don't have to introduce anything new, it's tick_length that 
> > > > > changes
> > > > > and HZ that becomes a variable in this function.
> > > >
> > > > So, forgive me for rehashing this, but it seems we're cross talking
> > > > again. The context here is the dynticks code. Where HZ doesn't change,
> > > > but we get interrupts at much reduced rates.
> > > 
> > > I know and all you have to change in the ntp and some related code is to
> > > replace HZ there with a variable, thus make it changable, so you can
> > > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).
> > 
> > Untested patch below. Does this vibe better with you are suggesting?
> 
> And here would be the follow on patch (again *untested*) for
> CONFIG_NO_HZ slowing the time accumulation down to once per second.

I'm still awaiting a final-looking version of this patch, fyi.

I don't understand whether this is a theoretical might-happen thing,
or if NTP problems have actually been observed in the field?

Either way, I'm sure the final changelog will clear that up ;)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-20 Thread Andrew Morton
On Tue, 19 Dec 2006 17:54:18 -0800
john stultz [EMAIL PROTECTED] wrote:

 On Tue, 2006-12-19 at 17:32 -0800, john stultz wrote:
  On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote:
   On Wed, 13 Dec 2006, john stultz wrote:
 You don't have to introduce anything new, it's tick_length that 
 changes
 and HZ that becomes a variable in this function.
   
So, forgive me for rehashing this, but it seems we're cross talking
again. The context here is the dynticks code. Where HZ doesn't change,
but we get interrupts at much reduced rates.
   
   I know and all you have to change in the ntp and some related code is to
   replace HZ there with a variable, thus make it changable, so you can
   increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).
  
  Untested patch below. Does this vibe better with you are suggesting?
 
 And here would be the follow on patch (again *untested*) for
 CONFIG_NO_HZ slowing the time accumulation down to once per second.

I'm still awaiting a final-looking version of this patch, fyi.

I don't understand whether this is a theoretical might-happen thing,
or if NTP problems have actually been observed in the field?

Either way, I'm sure the final changelog will clear that up ;)
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-19 Thread john stultz
On Tue, 2006-12-19 at 17:32 -0800, john stultz wrote:
> On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote:
> > On Wed, 13 Dec 2006, john stultz wrote:
> > > > You don't have to introduce anything new, it's tick_length that changes
> > > > and HZ that becomes a variable in this function.
> > >
> > > So, forgive me for rehashing this, but it seems we're cross talking
> > > again. The context here is the dynticks code. Where HZ doesn't change,
> > > but we get interrupts at much reduced rates.
> > 
> > I know and all you have to change in the ntp and some related code is to
> > replace HZ there with a variable, thus make it changable, so you can
> > increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).
> 
> Untested patch below. Does this vibe better with you are suggesting?

And here would be the follow on patch (again *untested*) for
CONFIG_NO_HZ slowing the time accumulation down to once per second.

thanks
-john


diff --git a/include/linux/timex.h b/include/linux/timex.h
index 8241e6e..3beb539 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -286,7 +286,11 @@ #endif /* !CONFIG_TIME_INTERPOLATION */
 
 #define TICK_LENGTH_SHIFT  32
 
+#ifdef CONFIG_NO_HZ
+#define NTP_INTERVAL_FREQ  (1)
+#else
 #define NTP_INTERVAL_FREQ  (HZ)
+#endif
 #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
 
 /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d0ba190..53979a9 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,12 +127,14 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
  */
 static void hrtimer_get_softirq_time(struct hrtimer_base *base)
 {
+   struct timespec ts;
ktime_t xtim, tomono;
unsigned long seq;
 
do {
seq = read_seqbegin(_lock);
-   xtim = timespec_to_ktime(xtime);
+   getnstimeofday();
+   xtim = timespec_to_ktime(ts);
tomono = timespec_to_ktime(wall_to_monotonic);
 
} while (read_seqretry(_lock, seq));


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-19 Thread john stultz
On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote:
> On Wed, 13 Dec 2006, john stultz wrote:
> > > You cannot choose arbitrary intervals otherwise you get other problems,
> > > e.g. with your patch time_offset handling is broken.
> >
> > I'm not seeing this yet. Any more details?
> 
> time_offset is scaled to HZ in do_adjtimex, which needs to be changed as
> well.

Ah, thanks! Fixed.

> > > You don't have to introduce anything new, it's tick_length that changes
> > > and HZ that becomes a variable in this function.
> >
> > So, forgive me for rehashing this, but it seems we're cross talking
> > again. The context here is the dynticks code. Where HZ doesn't change,
> > but we get interrupts at much reduced rates.
> 
> I know and all you have to change in the ntp and some related code is to
> replace HZ there with a variable, thus make it changable, so you can
> increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).

Untested patch below. Does this vibe better with you are suggesting?

Any other suggestions or feedback?

thanks
-john


diff --git a/include/linux/timex.h b/include/linux/timex.h
index db501dc..8241e6e 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -286,6 +286,9 @@ #endif /* !CONFIG_TIME_INTERPOLATION */
 
 #define TICK_LENGTH_SHIFT  32
 
+#define NTP_INTERVAL_FREQ  (HZ)
+#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
+
 /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
 extern u64 current_tick_length(void);
 
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 3afeaa3..eb12509 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base
 
 #define MAX_TICKADJ500 /* microsecs */
 #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
- TICK_LENGTH_SHIFT) / HZ)
+ TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ)
 
 /*
  * phase-lock loop variables
@@ -46,13 +46,17 @@ #define CLOCK_TICK_ADJUST   (((s64)CLOCK_T
 
 static void ntp_update_frequency(void)
 {
-   tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << 
TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC);
+   u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
+   << TICK_LENGTH_SHIFT;
+   second_length += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT;
+   second_length += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC);
 
-   do_div(tick_length_base, HZ);
+   tick_length_base = second_length;
 
-   tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT;
+   do_div(second_length, HZ);
+   tick_nsec = second_length >> TICK_LENGTH_SHIFT;
+
+   do_div(tick_length_base, NTP_INTERVAL_FREQ);
 }
 
 /**
@@ -162,7 +166,7 @@ void second_overflow(void)
tick_length -= MAX_TICKADJ_SCALED;
} else {
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
-HZ) << TICK_LENGTH_SHIFT;
+   NTP_INTERVAL_FREQ) << TICK_LENGTH_SHIFT;
time_adjust = 0;
}
}
@@ -239,7 +243,8 @@ #endif
result = -EINVAL;
goto leave;
}
-   time_freq = ((s64)txc->freq * NSEC_PER_USEC) >> (SHIFT_USEC - 
SHIFT_NSEC);
+   time_freq = ((s64)txc->freq * NSEC_PER_USEC)
+   >> (SHIFT_USEC - SHIFT_NSEC);
}
 
if (txc->modes & ADJ_MAXERROR) {
@@ -309,7 +314,8 @@ #endif
freq_adj += time_freq;
freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC);
time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC);
-   time_offset = (time_offset / HZ) << SHIFT_UPDATE;
+   time_offset = (time_offset / NTP_INTERVAL_FREQ)
+   << SHIFT_UPDATE;
} /* STA_PLL */
} /* txc->modes & ADJ_OFFSET */
if (txc->modes & ADJ_TICK)
@@ -324,8 +330,10 @@ leave: if ((time_status & (STA_UNSYNC|ST
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
txc->offset= save_adjust;
else
-   txc->offset= shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000;
-   txc->freq  = (time_freq / NSEC_PER_USEC) << (SHIFT_USEC - 
SHIFT_NSEC);
+   txc->offset= shift_right(time_offset, SHIFT_UPDATE)
+   * NTP_INTERVAL_FREQ / 1000;
+   txc->freq  = (time_freq / NSEC_PER_USEC)
+   << (SHIFT_USEC - SHIFT_NSEC);
txc->maxerror  = time_maxerror;
txc->esterror  = time_esterror;
txc->status   

Re: [RFC] HZ free ntp

2006-12-19 Thread john stultz
On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote:
 On Wed, 13 Dec 2006, john stultz wrote:
   You cannot choose arbitrary intervals otherwise you get other problems,
   e.g. with your patch time_offset handling is broken.
 
  I'm not seeing this yet. Any more details?
 
 time_offset is scaled to HZ in do_adjtimex, which needs to be changed as
 well.

Ah, thanks! Fixed.

   You don't have to introduce anything new, it's tick_length that changes
   and HZ that becomes a variable in this function.
 
  So, forgive me for rehashing this, but it seems we're cross talking
  again. The context here is the dynticks code. Where HZ doesn't change,
  but we get interrupts at much reduced rates.
 
 I know and all you have to change in the ntp and some related code is to
 replace HZ there with a variable, thus make it changable, so you can
 increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).

Untested patch below. Does this vibe better with you are suggesting?

Any other suggestions or feedback?

thanks
-john


diff --git a/include/linux/timex.h b/include/linux/timex.h
index db501dc..8241e6e 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -286,6 +286,9 @@ #endif /* !CONFIG_TIME_INTERPOLATION */
 
 #define TICK_LENGTH_SHIFT  32
 
+#define NTP_INTERVAL_FREQ  (HZ)
+#define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
+
 /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
 extern u64 current_tick_length(void);
 
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 3afeaa3..eb12509 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -24,7 +24,7 @@ static u64 tick_length, tick_length_base
 
 #define MAX_TICKADJ500 /* microsecs */
 #define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC)  \
- TICK_LENGTH_SHIFT) / HZ)
+ TICK_LENGTH_SHIFT) / NTP_INTERVAL_FREQ)
 
 /*
  * phase-lock loop variables
@@ -46,13 +46,17 @@ #define CLOCK_TICK_ADJUST   (((s64)CLOCK_T
 
 static void ntp_update_frequency(void)
 {
-   tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)  
TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)CLOCK_TICK_ADJUST  TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)time_freq  (TICK_LENGTH_SHIFT - SHIFT_NSEC);
+   u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
+TICK_LENGTH_SHIFT;
+   second_length += (s64)CLOCK_TICK_ADJUST  TICK_LENGTH_SHIFT;
+   second_length += (s64)time_freq  (TICK_LENGTH_SHIFT - SHIFT_NSEC);
 
-   do_div(tick_length_base, HZ);
+   tick_length_base = second_length;
 
-   tick_nsec = tick_length_base  TICK_LENGTH_SHIFT;
+   do_div(second_length, HZ);
+   tick_nsec = second_length  TICK_LENGTH_SHIFT;
+
+   do_div(tick_length_base, NTP_INTERVAL_FREQ);
 }
 
 /**
@@ -162,7 +166,7 @@ void second_overflow(void)
tick_length -= MAX_TICKADJ_SCALED;
} else {
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
-HZ)  TICK_LENGTH_SHIFT;
+   NTP_INTERVAL_FREQ)  TICK_LENGTH_SHIFT;
time_adjust = 0;
}
}
@@ -239,7 +243,8 @@ #endif
result = -EINVAL;
goto leave;
}
-   time_freq = ((s64)txc-freq * NSEC_PER_USEC)  (SHIFT_USEC - 
SHIFT_NSEC);
+   time_freq = ((s64)txc-freq * NSEC_PER_USEC)
+(SHIFT_USEC - SHIFT_NSEC);
}
 
if (txc-modes  ADJ_MAXERROR) {
@@ -309,7 +314,8 @@ #endif
freq_adj += time_freq;
freq_adj = min(freq_adj, (s64)MAXFREQ_NSEC);
time_freq = max(freq_adj, (s64)-MAXFREQ_NSEC);
-   time_offset = (time_offset / HZ)  SHIFT_UPDATE;
+   time_offset = (time_offset / NTP_INTERVAL_FREQ)
+SHIFT_UPDATE;
} /* STA_PLL */
} /* txc-modes  ADJ_OFFSET */
if (txc-modes  ADJ_TICK)
@@ -324,8 +330,10 @@ leave: if ((time_status  (STA_UNSYNC|ST
if ((txc-modes  ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
txc-offset= save_adjust;
else
-   txc-offset= shift_right(time_offset, SHIFT_UPDATE) * HZ / 1000;
-   txc-freq  = (time_freq / NSEC_PER_USEC)  (SHIFT_USEC - 
SHIFT_NSEC);
+   txc-offset= shift_right(time_offset, SHIFT_UPDATE)
+   * NTP_INTERVAL_FREQ / 1000;
+   txc-freq  = (time_freq / NSEC_PER_USEC)
+(SHIFT_USEC - SHIFT_NSEC);
txc-maxerror  = time_maxerror;
txc-esterror  = time_esterror;
txc-status= time_status;
diff --git a/kernel/timer.c b/kernel/timer.c
index 

Re: [RFC] HZ free ntp

2006-12-19 Thread john stultz
On Tue, 2006-12-19 at 17:32 -0800, john stultz wrote:
 On Wed, 2006-12-13 at 21:40 +0100, Roman Zippel wrote:
  On Wed, 13 Dec 2006, john stultz wrote:
You don't have to introduce anything new, it's tick_length that changes
and HZ that becomes a variable in this function.
  
   So, forgive me for rehashing this, but it seems we're cross talking
   again. The context here is the dynticks code. Where HZ doesn't change,
   but we get interrupts at much reduced rates.
  
  I know and all you have to change in the ntp and some related code is to
  replace HZ there with a variable, thus make it changable, so you can
  increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).
 
 Untested patch below. Does this vibe better with you are suggesting?

And here would be the follow on patch (again *untested*) for
CONFIG_NO_HZ slowing the time accumulation down to once per second.

thanks
-john


diff --git a/include/linux/timex.h b/include/linux/timex.h
index 8241e6e..3beb539 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -286,7 +286,11 @@ #endif /* !CONFIG_TIME_INTERPOLATION */
 
 #define TICK_LENGTH_SHIFT  32
 
+#ifdef CONFIG_NO_HZ
+#define NTP_INTERVAL_FREQ  (1)
+#else
 #define NTP_INTERVAL_FREQ  (HZ)
+#endif
 #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
 
 /* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d0ba190..53979a9 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,12 +127,14 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
  */
 static void hrtimer_get_softirq_time(struct hrtimer_base *base)
 {
+   struct timespec ts;
ktime_t xtim, tomono;
unsigned long seq;
 
do {
seq = read_seqbegin(xtime_lock);
-   xtim = timespec_to_ktime(xtime);
+   getnstimeofday(ts);
+   xtim = timespec_to_ktime(ts);
tomono = timespec_to_ktime(wall_to_monotonic);
 
} while (read_seqretry(xtime_lock, seq));


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread Roman Zippel
Hi,

On Wed, 13 Dec 2006, john stultz wrote:

> > The largest possible interval is freq cycles (or 1 second without
> > adjustments). That is the base interval and without redesigning NTP we
> > can't change that. This base interval can be subdivided into smaller
> > intervals for incremental updates.
> 
> Indeed, larger then 1 second intervals would require the second_overflow
> code to be reworked too.

There isn't much to rework without a complete redesign.

> > You cannot choose arbitrary intervals otherwise you get other problems,
> > e.g. with your patch time_offset handling is broken.
> 
> I'm not seeing this yet. Any more details? 

time_offset is scaled to HZ in do_adjtimex, which needs to be changed as 
well.

> > You don't have to introduce anything new, it's tick_length that changes
> > and HZ that becomes a variable in this function.
> 
> So, forgive me for rehashing this, but it seems we're cross talking
> again. The context here is the dynticks code. Where HZ doesn't change,
> but we get interrupts at much reduced rates.

I know and all you have to change in the ntp and some related code is to 
replace HZ there with a variable, thus make it changable, so you can 
increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).

> However, in doing so we have to
> work w/ the ntp.c code which (as Ingo earlier mentioned) has a number of
> HZ based assumptions.

Repeating Ingo's nonsense doesn't make it any more true. :-(

bye, Roman
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread john stultz
On Wed, 2006-12-13 at 14:47 +0100, Roman Zippel wrote:
> Hi,
> 
> On Tue, 12 Dec 2006, john stultz wrote:
> 
> > Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the
> > time code will use to accumulate with. In this patch I've pushed it out
> > to a full second, but it could be set via config (NSEC_PER_SEC/HZ for
> > regular systems, something larger for systems using dynticks).
> 
> Why do you want to use such an interval? This makes everything only more
> complicated.
> The largest possible interval is freq cycles (or 1 second without
> adjustments). That is the base interval and without redesigning NTP we
> can't change that. This base interval can be subdivided into smaller
> intervals for incremental updates.

Indeed, larger then 1 second intervals would require the second_overflow
code to be reworked too. However, I'm not proposing larger then 1 second
intervals at this point. I'm just allowing for non-HZ intervals.

> You cannot choose arbitrary intervals otherwise you get other problems,
> e.g. with your patch time_offset handling is broken.

I'm not seeing this yet. Any more details? 


> > +   /* calculate the length of one NTP adjusted second */
> > +   second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ);
> > +   second_length += (s64)CLOCK_TICK_ADJUST;
> > +   adj_length = (s64)time_freq;
> > +
> > +   /* calculate tick length @ HZ*/
> > +   tick_length = (second_length << TICK_LENGTH_SHIFT)
> > +   + (adj_length << (TICK_LENGTH_SHIFT - SHIFT_NSEC));
> > +   do_div(tick_length, HZ);
> > +   tick_nsec = tick_length >> TICK_LENGTH_SHIFT;
> > +
> > +
> > +   /* calculate interval_length_base */
> > +   /* XXX - this is broken up to avoid 64bit overlfows */
> > +   interval_length_base = second_length * INTERVAL_LENGTH_NSEC;
> > +   interval_length_base <<= 2;
> > +   do_div(interval_length_base, NSEC_PER_SEC);
> > +   interval_length_base <<= TICK_LENGTH_SHIFT-2;
> 
> You don't have to introduce anything new, it's tick_length that changes
> and HZ that becomes a variable in this function.

So, forgive me for rehashing this, but it seems we're cross talking
again. The context here is the dynticks code. Where HZ doesn't change,
but we get interrupts at much reduced rates. The problem is, if the
interrupts slow to ~1 per second frequencies, we end up spending a ton
of time in the main update_wall_time loop, processing that 1 second in
1/HZ chunks. The current code is correct, but just wastes a lot of time.

So I'm trying to come up w/ an approach (the earlier, and broken,
exponential accumulation patch had the same intention), that allows us
to accumulate time in larger chunks. However, in doing so we have to
work w/ the ntp.c code which (as Ingo earlier mentioned) has a number of
HZ based assumptions.

This last patch tries to give NTP and the timekeeping an agreed upon
chunk (I chose "interval" as the term, since "tick" is somewhat
connected to HZ) of time upon which accumulation is done, so we can have
courser second updates, or finer grained updates, depending on config
settings.

In fairness, the patch probably going about this in a less then perfect
way, but that's why I'm asking for your feedback and suggestions. What
are your thoughts for reducing the time spent in the update_wall_time
loop in the context of dynticks?

thanks
-john

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread john stultz
On Wed, 2006-12-13 at 10:51 +0100, Ingo Molnar wrote:
> * john stultz <[EMAIL PROTECTED]> wrote:
> 
> > On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote:
> > > On Wed, 6 Dec 2006, Ingo Molnar wrote:
> > > > i disagree with you and it's pretty low-impact anyway. There's still
> > > > quite many HZ/tick assumptions all around the time code (NTP being one
> > > > example), we'll deal with those via other patches.
> > >
> > > Why do you pick on the NTP code? That's actually one of the places where
> > > assumptions about HZ are largely gone. NTP state is updated incrementally
> > > and this won't change, but the update frequency can now be easily
> > > disconnected from HZ.
> >
> > Hey Roman,
> > Here's my rough first attempt at doing so. I'd not call it easy, but
> > maybe you have some suggestions for a simpler way?
> >
> > Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that
> > the time code will use to accumulate with. In this patch I've pushed
> > it out to a full second, but it could be set via config
> > (NSEC_PER_SEC/HZ for regular systems, something larger for systems
> > using dynticks).
> 
> cool! I'll give this one a go in -rt, combined with the exponential
> second-overflow patch. (that one is now algorithmically safe, right?)

No, this one will replace the exponential accumulation patch. So we'll
accumulate on second intervals, which should be far enough apart that we
won't be spinning in that loop for long.

thanks
-john


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread Roman Zippel
Hi,

On Tue, 12 Dec 2006, john stultz wrote:

> Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the
> time code will use to accumulate with. In this patch I've pushed it out
> to a full second, but it could be set via config (NSEC_PER_SEC/HZ for
> regular systems, something larger for systems using dynticks).

Why do you want to use such an interval? This makes everything only more 
complicated.
The largest possible interval is freq cycles (or 1 second without 
adjustments). That is the base interval and without redesigning NTP we 
can't change that. This base interval can be subdivided into smaller
intervals for incremental updates.
You cannot choose arbitrary intervals otherwise you get other problems, 
e.g. with your patch time_offset handling is broken.

> + /* calculate the length of one NTP adjusted second */
> + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ);
> + second_length += (s64)CLOCK_TICK_ADJUST;
> + adj_length = (s64)time_freq;
> +
> + /* calculate tick length @ HZ*/
> + tick_length = (second_length << TICK_LENGTH_SHIFT)
> + + (adj_length << (TICK_LENGTH_SHIFT - SHIFT_NSEC));
> + do_div(tick_length, HZ);
> + tick_nsec = tick_length >> TICK_LENGTH_SHIFT;
> +
> +
> + /* calculate interval_length_base */
> + /* XXX - this is broken up to avoid 64bit overlfows */
> + interval_length_base = second_length * INTERVAL_LENGTH_NSEC;
> + interval_length_base <<= 2;
> + do_div(interval_length_base, NSEC_PER_SEC);
> + interval_length_base <<= TICK_LENGTH_SHIFT-2;

You don't have to introduce anything new, it's tick_length that changes 
and HZ that becomes a variable in this function.

bye, Roman
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread Ingo Molnar

* john stultz <[EMAIL PROTECTED]> wrote:

> On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote:
> > On Wed, 6 Dec 2006, Ingo Molnar wrote:
> > > i disagree with you and it's pretty low-impact anyway. There's still
> > > quite many HZ/tick assumptions all around the time code (NTP being one
> > > example), we'll deal with those via other patches.
> > 
> > Why do you pick on the NTP code? That's actually one of the places where
> > assumptions about HZ are largely gone. NTP state is updated incrementally
> > and this won't change, but the update frequency can now be easily
> > disconnected from HZ.
> 
> Hey Roman,
>   Here's my rough first attempt at doing so. I'd not call it easy, but
> maybe you have some suggestions for a simpler way?
> 
> Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that 
> the time code will use to accumulate with. In this patch I've pushed 
> it out to a full second, but it could be set via config 
> (NSEC_PER_SEC/HZ for regular systems, something larger for systems 
> using dynticks).

cool! I'll give this one a go in -rt, combined with the exponential 
second-overflow patch. (that one is now algorithmically safe, right?)

Ingo
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread Ingo Molnar

* john stultz [EMAIL PROTECTED] wrote:

 On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote:
  On Wed, 6 Dec 2006, Ingo Molnar wrote:
   i disagree with you and it's pretty low-impact anyway. There's still
   quite many HZ/tick assumptions all around the time code (NTP being one
   example), we'll deal with those via other patches.
  
  Why do you pick on the NTP code? That's actually one of the places where
  assumptions about HZ are largely gone. NTP state is updated incrementally
  and this won't change, but the update frequency can now be easily
  disconnected from HZ.
 
 Hey Roman,
   Here's my rough first attempt at doing so. I'd not call it easy, but
 maybe you have some suggestions for a simpler way?
 
 Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that 
 the time code will use to accumulate with. In this patch I've pushed 
 it out to a full second, but it could be set via config 
 (NSEC_PER_SEC/HZ for regular systems, something larger for systems 
 using dynticks).

cool! I'll give this one a go in -rt, combined with the exponential 
second-overflow patch. (that one is now algorithmically safe, right?)

Ingo
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread Roman Zippel
Hi,

On Tue, 12 Dec 2006, john stultz wrote:

 Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the
 time code will use to accumulate with. In this patch I've pushed it out
 to a full second, but it could be set via config (NSEC_PER_SEC/HZ for
 regular systems, something larger for systems using dynticks).

Why do you want to use such an interval? This makes everything only more 
complicated.
The largest possible interval is freq cycles (or 1 second without 
adjustments). That is the base interval and without redesigning NTP we 
can't change that. This base interval can be subdivided into smaller
intervals for incremental updates.
You cannot choose arbitrary intervals otherwise you get other problems, 
e.g. with your patch time_offset handling is broken.

 + /* calculate the length of one NTP adjusted second */
 + second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ);
 + second_length += (s64)CLOCK_TICK_ADJUST;
 + adj_length = (s64)time_freq;
 +
 + /* calculate tick length @ HZ*/
 + tick_length = (second_length  TICK_LENGTH_SHIFT)
 + + (adj_length  (TICK_LENGTH_SHIFT - SHIFT_NSEC));
 + do_div(tick_length, HZ);
 + tick_nsec = tick_length  TICK_LENGTH_SHIFT;
 +
 +
 + /* calculate interval_length_base */
 + /* XXX - this is broken up to avoid 64bit overlfows */
 + interval_length_base = second_length * INTERVAL_LENGTH_NSEC;
 + interval_length_base = 2;
 + do_div(interval_length_base, NSEC_PER_SEC);
 + interval_length_base = TICK_LENGTH_SHIFT-2;

You don't have to introduce anything new, it's tick_length that changes 
and HZ that becomes a variable in this function.

bye, Roman
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread john stultz
On Wed, 2006-12-13 at 10:51 +0100, Ingo Molnar wrote:
 * john stultz [EMAIL PROTECTED] wrote:
 
  On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote:
   On Wed, 6 Dec 2006, Ingo Molnar wrote:
i disagree with you and it's pretty low-impact anyway. There's still
quite many HZ/tick assumptions all around the time code (NTP being one
example), we'll deal with those via other patches.
  
   Why do you pick on the NTP code? That's actually one of the places where
   assumptions about HZ are largely gone. NTP state is updated incrementally
   and this won't change, but the update frequency can now be easily
   disconnected from HZ.
 
  Hey Roman,
  Here's my rough first attempt at doing so. I'd not call it easy, but
  maybe you have some suggestions for a simpler way?
 
  Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that
  the time code will use to accumulate with. In this patch I've pushed
  it out to a full second, but it could be set via config
  (NSEC_PER_SEC/HZ for regular systems, something larger for systems
  using dynticks).
 
 cool! I'll give this one a go in -rt, combined with the exponential
 second-overflow patch. (that one is now algorithmically safe, right?)

No, this one will replace the exponential accumulation patch. So we'll
accumulate on second intervals, which should be far enough apart that we
won't be spinning in that loop for long.

thanks
-john


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread john stultz
On Wed, 2006-12-13 at 14:47 +0100, Roman Zippel wrote:
 Hi,
 
 On Tue, 12 Dec 2006, john stultz wrote:
 
  Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the
  time code will use to accumulate with. In this patch I've pushed it out
  to a full second, but it could be set via config (NSEC_PER_SEC/HZ for
  regular systems, something larger for systems using dynticks).
 
 Why do you want to use such an interval? This makes everything only more
 complicated.
 The largest possible interval is freq cycles (or 1 second without
 adjustments). That is the base interval and without redesigning NTP we
 can't change that. This base interval can be subdivided into smaller
 intervals for incremental updates.

Indeed, larger then 1 second intervals would require the second_overflow
code to be reworked too. However, I'm not proposing larger then 1 second
intervals at this point. I'm just allowing for non-HZ intervals.

 You cannot choose arbitrary intervals otherwise you get other problems,
 e.g. with your patch time_offset handling is broken.

I'm not seeing this yet. Any more details? 


  +   /* calculate the length of one NTP adjusted second */
  +   second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ);
  +   second_length += (s64)CLOCK_TICK_ADJUST;
  +   adj_length = (s64)time_freq;
  +
  +   /* calculate tick length @ HZ*/
  +   tick_length = (second_length  TICK_LENGTH_SHIFT)
  +   + (adj_length  (TICK_LENGTH_SHIFT - SHIFT_NSEC));
  +   do_div(tick_length, HZ);
  +   tick_nsec = tick_length  TICK_LENGTH_SHIFT;
  +
  +
  +   /* calculate interval_length_base */
  +   /* XXX - this is broken up to avoid 64bit overlfows */
  +   interval_length_base = second_length * INTERVAL_LENGTH_NSEC;
  +   interval_length_base = 2;
  +   do_div(interval_length_base, NSEC_PER_SEC);
  +   interval_length_base = TICK_LENGTH_SHIFT-2;
 
 You don't have to introduce anything new, it's tick_length that changes
 and HZ that becomes a variable in this function.

So, forgive me for rehashing this, but it seems we're cross talking
again. The context here is the dynticks code. Where HZ doesn't change,
but we get interrupts at much reduced rates. The problem is, if the
interrupts slow to ~1 per second frequencies, we end up spending a ton
of time in the main update_wall_time loop, processing that 1 second in
1/HZ chunks. The current code is correct, but just wastes a lot of time.

So I'm trying to come up w/ an approach (the earlier, and broken,
exponential accumulation patch had the same intention), that allows us
to accumulate time in larger chunks. However, in doing so we have to
work w/ the ntp.c code which (as Ingo earlier mentioned) has a number of
HZ based assumptions.

This last patch tries to give NTP and the timekeeping an agreed upon
chunk (I chose interval as the term, since tick is somewhat
connected to HZ) of time upon which accumulation is done, so we can have
courser second updates, or finer grained updates, depending on config
settings.

In fairness, the patch probably going about this in a less then perfect
way, but that's why I'm asking for your feedback and suggestions. What
are your thoughts for reducing the time spent in the update_wall_time
loop in the context of dynticks?

thanks
-john

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC] HZ free ntp

2006-12-13 Thread Roman Zippel
Hi,

On Wed, 13 Dec 2006, john stultz wrote:

  The largest possible interval is freq cycles (or 1 second without
  adjustments). That is the base interval and without redesigning NTP we
  can't change that. This base interval can be subdivided into smaller
  intervals for incremental updates.
 
 Indeed, larger then 1 second intervals would require the second_overflow
 code to be reworked too.

There isn't much to rework without a complete redesign.

  You cannot choose arbitrary intervals otherwise you get other problems,
  e.g. with your patch time_offset handling is broken.
 
 I'm not seeing this yet. Any more details? 

time_offset is scaled to HZ in do_adjtimex, which needs to be changed as 
well.

  You don't have to introduce anything new, it's tick_length that changes
  and HZ that becomes a variable in this function.
 
 So, forgive me for rehashing this, but it seems we're cross talking
 again. The context here is the dynticks code. Where HZ doesn't change,
 but we get interrupts at much reduced rates.

I know and all you have to change in the ntp and some related code is to 
replace HZ there with a variable, thus make it changable, so you can 
increase the update interval (i.e. it becomes 1s/hz instead of 1s/HZ).

 However, in doing so we have to
 work w/ the ntp.c code which (as Ingo earlier mentioned) has a number of
 HZ based assumptions.

Repeating Ingo's nonsense doesn't make it any more true. :-(

bye, Roman
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC] HZ free ntp

2006-12-12 Thread john stultz
On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote:
> On Wed, 6 Dec 2006, Ingo Molnar wrote:
> > i disagree with you and it's pretty low-impact anyway. There's still
> > quite many HZ/tick assumptions all around the time code (NTP being one
> > example), we'll deal with those via other patches.
> 
> Why do you pick on the NTP code? That's actually one of the places where
> assumptions about HZ are largely gone. NTP state is updated incrementally
> and this won't change, but the update frequency can now be easily
> disconnected from HZ.

Hey Roman,
Here's my rough first attempt at doing so. I'd not call it easy, but
maybe you have some suggestions for a simpler way?

Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the
time code will use to accumulate with. In this patch I've pushed it out
to a full second, but it could be set via config (NSEC_PER_SEC/HZ for
regular systems, something larger for systems using dynticks).

Thoughts?

-john


diff --git a/include/linux/timex.h b/include/linux/timex.h
index db501dc..be13daf 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -286,9 +286,10 @@ #endif /* !CONFIG_TIME_INTERPOLATION */
 
 #define TICK_LENGTH_SHIFT  32
 
-/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
-extern u64 current_tick_length(void);
+#define INTERVAL_LENGTH_NSEC (NSEC_PER_SEC)
 
+/* Returns how long NTP intervals are at present, in ns / 2^(SHIFT_SCALE-10).*/
+extern u64 ntp_interval_length(void);
 extern void second_overflow(void);
 extern void update_ntp_one_tick(void);
 extern int do_adjtimex(struct timex *);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d0ba190..53979a9 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,12 +127,14 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
  */
 static void hrtimer_get_softirq_time(struct hrtimer_base *base)
 {
+   struct timespec ts;
ktime_t xtim, tomono;
unsigned long seq;
 
do {
seq = read_seqbegin(_lock);
-   xtim = timespec_to_ktime(xtime);
+   getnstimeofday();
+   xtim = timespec_to_ktime(ts);
tomono = timespec_to_ktime(wall_to_monotonic);
 
} while (read_seqretry(_lock, seq));
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 3afeaa3..812c56f 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -20,11 +20,12 @@ #include 
  */
 unsigned long tick_usec = TICK_USEC;   /* USER_HZ period (usec) */
 unsigned long tick_nsec;   /* ACTHZ period (nsec) */
-static u64 tick_length, tick_length_base;
+static u64 interval_length, interval_length_base;
 
 #define MAX_TICKADJ500 /* microsecs */
-#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
- TICK_LENGTH_SHIFT) / HZ)
+#define MAX_TICKADJ_SCALED \
+   u64)MAX_TICKADJ * NSEC_PER_USEC * INTERVAL_LENGTH_NSEC) \
+   / NSEC_PER_SEC)<< TICK_LENGTH_SHIFT)
 
 /*
  * phase-lock loop variables
@@ -44,15 +45,45 @@ #define CLOCK_TICK_OVERFLOW (LATCH * HZ 
 #define CLOCK_TICK_ADJUST  (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \
(s64)CLOCK_TICK_RATE)
 
+/**
+ * ntp_update_frequency - Calculates base values used for NTP adjustments
+ *
+ */
 static void ntp_update_frequency(void)
 {
-   tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ) << 
TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)CLOCK_TICK_ADJUST << TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)time_freq << (TICK_LENGTH_SHIFT - SHIFT_NSEC);
-
-   do_div(tick_length_base, HZ);
-
-   tick_nsec = tick_length_base >> TICK_LENGTH_SHIFT;
+   u64 second_length;
+   s64 adj_length;
+   u64 tick_length;
+   int neg = 0;
+   /* calculate the length of one NTP adjusted second */
+   second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ);
+   second_length += (s64)CLOCK_TICK_ADJUST;
+   adj_length = (s64)time_freq;
+
+   /* calculate tick length @ HZ*/
+   tick_length = (second_length << TICK_LENGTH_SHIFT)
+   + (adj_length << (TICK_LENGTH_SHIFT - SHIFT_NSEC));
+   do_div(tick_length, HZ);
+   tick_nsec = tick_length >> TICK_LENGTH_SHIFT;
+
+
+   /* calculate interval_length_base */
+   /* XXX - this is broken up to avoid 64bit overlfows */
+   interval_length_base = second_length * INTERVAL_LENGTH_NSEC;
+   interval_length_base <<= 2;
+   do_div(interval_length_base, NSEC_PER_SEC);
+   interval_length_base <<= TICK_LENGTH_SHIFT-2;
+
+   if (adj_length < 0) {
+   neg = 1;
+   adj_length = -adj_length;
+   }
+   adj_length *= INTERVAL_LENGTH_NSEC;
+   do_div(adj_length, NSEC_PER_SEC);
+   adj_length <<= (TICK_LENGTH_SHIFT - SHIFT_NSEC);
+   if(neg)
+   adj_length = -adj_length;
+   interval_length_base += adj_length;
 }
 
 /**
@@ 

[RFC] HZ free ntp

2006-12-12 Thread john stultz
On Wed, 2006-12-06 at 15:33 +0100, Roman Zippel wrote:
 On Wed, 6 Dec 2006, Ingo Molnar wrote:
  i disagree with you and it's pretty low-impact anyway. There's still
  quite many HZ/tick assumptions all around the time code (NTP being one
  example), we'll deal with those via other patches.
 
 Why do you pick on the NTP code? That's actually one of the places where
 assumptions about HZ are largely gone. NTP state is updated incrementally
 and this won't change, but the update frequency can now be easily
 disconnected from HZ.

Hey Roman,
Here's my rough first attempt at doing so. I'd not call it easy, but
maybe you have some suggestions for a simpler way?

Basically INTERVAL_LENGTH_NSEC defines the NTP interval length that the
time code will use to accumulate with. In this patch I've pushed it out
to a full second, but it could be set via config (NSEC_PER_SEC/HZ for
regular systems, something larger for systems using dynticks).

Thoughts?

-john


diff --git a/include/linux/timex.h b/include/linux/timex.h
index db501dc..be13daf 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -286,9 +286,10 @@ #endif /* !CONFIG_TIME_INTERPOLATION */
 
 #define TICK_LENGTH_SHIFT  32
 
-/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */
-extern u64 current_tick_length(void);
+#define INTERVAL_LENGTH_NSEC (NSEC_PER_SEC)
 
+/* Returns how long NTP intervals are at present, in ns / 2^(SHIFT_SCALE-10).*/
+extern u64 ntp_interval_length(void);
 extern void second_overflow(void);
 extern void update_ntp_one_tick(void);
 extern int do_adjtimex(struct timex *);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d0ba190..53979a9 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -127,12 +127,14 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
  */
 static void hrtimer_get_softirq_time(struct hrtimer_base *base)
 {
+   struct timespec ts;
ktime_t xtim, tomono;
unsigned long seq;
 
do {
seq = read_seqbegin(xtime_lock);
-   xtim = timespec_to_ktime(xtime);
+   getnstimeofday(ts);
+   xtim = timespec_to_ktime(ts);
tomono = timespec_to_ktime(wall_to_monotonic);
 
} while (read_seqretry(xtime_lock, seq));
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 3afeaa3..812c56f 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -20,11 +20,12 @@ #include asm/timex.h
  */
 unsigned long tick_usec = TICK_USEC;   /* USER_HZ period (usec) */
 unsigned long tick_nsec;   /* ACTHZ period (nsec) */
-static u64 tick_length, tick_length_base;
+static u64 interval_length, interval_length_base;
 
 #define MAX_TICKADJ500 /* microsecs */
-#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC)  \
- TICK_LENGTH_SHIFT) / HZ)
+#define MAX_TICKADJ_SCALED \
+   u64)MAX_TICKADJ * NSEC_PER_USEC * INTERVAL_LENGTH_NSEC) \
+   / NSEC_PER_SEC) TICK_LENGTH_SHIFT)
 
 /*
  * phase-lock loop variables
@@ -44,15 +45,45 @@ #define CLOCK_TICK_OVERFLOW (LATCH * HZ 
 #define CLOCK_TICK_ADJUST  (((s64)CLOCK_TICK_OVERFLOW * NSEC_PER_SEC) / \
(s64)CLOCK_TICK_RATE)
 
+/**
+ * ntp_update_frequency - Calculates base values used for NTP adjustments
+ *
+ */
 static void ntp_update_frequency(void)
 {
-   tick_length_base = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)  
TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)CLOCK_TICK_ADJUST  TICK_LENGTH_SHIFT;
-   tick_length_base += (s64)time_freq  (TICK_LENGTH_SHIFT - SHIFT_NSEC);
-
-   do_div(tick_length_base, HZ);
-
-   tick_nsec = tick_length_base  TICK_LENGTH_SHIFT;
+   u64 second_length;
+   s64 adj_length;
+   u64 tick_length;
+   int neg = 0;
+   /* calculate the length of one NTP adjusted second */
+   second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ);
+   second_length += (s64)CLOCK_TICK_ADJUST;
+   adj_length = (s64)time_freq;
+
+   /* calculate tick length @ HZ*/
+   tick_length = (second_length  TICK_LENGTH_SHIFT)
+   + (adj_length  (TICK_LENGTH_SHIFT - SHIFT_NSEC));
+   do_div(tick_length, HZ);
+   tick_nsec = tick_length  TICK_LENGTH_SHIFT;
+
+
+   /* calculate interval_length_base */
+   /* XXX - this is broken up to avoid 64bit overlfows */
+   interval_length_base = second_length * INTERVAL_LENGTH_NSEC;
+   interval_length_base = 2;
+   do_div(interval_length_base, NSEC_PER_SEC);
+   interval_length_base = TICK_LENGTH_SHIFT-2;
+
+   if (adj_length  0) {
+   neg = 1;
+   adj_length = -adj_length;
+   }
+   adj_length *= INTERVAL_LENGTH_NSEC;
+   do_div(adj_length, NSEC_PER_SEC);
+   adj_length = (TICK_LENGTH_SHIFT - SHIFT_NSEC);
+   if(neg)
+   adj_length = -adj_length;
+   interval_length_base += adj_length;
 }
 
 /**
@@ -69,7 +100,7