* john stultz ([EMAIL PROTECTED]) wrote:
> On Wed, 2008-01-16 at 18:39 -0500, Mathieu Desnoyers wrote:
> > I would disable preemption in clocksource_get_basecycles. We would not
> > want to be scheduled out while we hold a pointer to the old array
> > element.
> > 
> > > + int num = cs->base_num;
> > 
> > Since you deal with base_num in a shared manner (not per cpu), you will
> > need a smp_read_barrier_depend() here after the cs->base_num read.
> > 
> > You should think about reading the cs->base_num first, and _after_ that
> > read the real clocksource. Here, the clocksource value is passed as
> > parameter. It means that the read clocksource may have been read in the
> > previous RCU window.
> 
> Here's an updated version of the patch w/ the suggested memory barrier
> changes and favored (1-x) inversion change. ;)  Let me know if you see
> any other holes, or have any other suggestions or ideas.
> 
> Still un-tested (my test box will free up soon, I promise!), but builds.
> 
> Signed-off-by: John Stultz <[EMAIL PROTECTED]>
> 
> Index: monotonic-cleanup/include/linux/clocksource.h
> ===================================================================
> --- monotonic-cleanup.orig/include/linux/clocksource.h        2008-01-16 
> 12:22:04.000000000 -0800
> +++ monotonic-cleanup/include/linux/clocksource.h     2008-01-16 
> 18:12:53.000000000 -0800
> @@ -87,9 +87,17 @@
>        * more than one cache line.
>        */
>       struct {
> -             cycle_t cycle_last, cycle_accumulated, cycle_raw;
> -     } ____cacheline_aligned_in_smp;
> +             cycle_t cycle_last, cycle_accumulated;
>  
> +             /* base structure provides lock-free read
> +              * access to a virtualized 64bit counter
> +              * Uses RCU-like update.
> +              */
> +             struct {
> +                     cycle_t cycle_base_last, cycle_base;
> +             } base[2];
> +             int base_num;
> +     } ____cacheline_aligned_in_smp;
>       u64 xtime_nsec;
>       s64 error;
>  
> @@ -175,19 +183,29 @@
>  }
>  
>  /**
> - * clocksource_get_cycles: - Access the clocksource's accumulated cycle value
> + * clocksource_get_basecycles: - get the clocksource's accumulated cycle 
> value
>   * @cs:              pointer to clocksource being read
>   * @now:     current cycle value
>   *
>   * Uses the clocksource to return the current cycle_t value.
>   * NOTE!!!: This is different from clocksource_read, because it
> - * returns the accumulated cycle value! Must hold xtime lock!
> + * returns a 64bit wide accumulated value.
>   */
>  static inline cycle_t
> -clocksource_get_cycles(struct clocksource *cs, cycle_t now)
> +clocksource_get_basecycles(struct clocksource *cs)
>  {
> -     cycle_t offset = (now - cs->cycle_last) & cs->mask;
> -     offset += cs->cycle_accumulated;
> +     int num;
> +     cycle_t now, offset;
> +
> +     preempt_disable();
> +     num = cs->base_num;
> +     smp_read_barrier_depends();
> +     now = clocksource_read(cs);
> +     offset = (now - cs->base[num].cycle_base_last);
> +     offset &= cs->mask;
> +     offset += cs->base[num].cycle_base;
> +     preempt_enable();
> +
>       return offset;
>  }
>  
> @@ -197,14 +215,26 @@
>   * @now:     current cycle value
>   *
>   * Used to avoids clocksource hardware overflow by periodically
> - * accumulating the current cycle delta. Must hold xtime write lock!
> + * accumulating the current cycle delta. Uses RCU-like update, but
> + * ***still requires the xtime_lock is held for writing!***
>   */
>  static inline void clocksource_accumulate(struct clocksource *cs, cycle_t 
> now)
>  {
> -     cycle_t offset = (now - cs->cycle_last) & cs->mask;
> +     /* First update the monotonic base portion.
> +      * The dual array update method allows for lock-free reading.
> +      */
> +     int num = 1 - cs->base_num;

(nitpick)
right here, you could probably express 1-num with cs->base_num, since we
are the only ones supposed to touch it.

> +     cycle_t offset = (now - cs->base[1-num].cycle_base_last);
> +     offset &= cs->mask;

here too.

> +     cs->base[num].cycle_base = cs->base[1-num].cycle_base + offset;
> +     cs->base[num].cycle_base_last = now;
> +     wmb();

As I just emailed : smp_smb() *should* be enough. I don't see which
architecture could reorder writes wrt local interrupts ? (please tell me
if I am grossly mistaken)

Mathieu

> +     cs->base_num = num;
> +
> +     /* Now update the cycle_accumulated portion */
> +     offset = (now - cs->cycle_last) & cs->mask;
>       cs->cycle_last = now;
>       cs->cycle_accumulated += offset;
> -     cs->cycle_raw += offset;
>  }
>  
>  /**
> Index: monotonic-cleanup/kernel/time/timekeeping.c
> ===================================================================
> --- monotonic-cleanup.orig/kernel/time/timekeeping.c  2008-01-16 
> 12:21:46.000000000 -0800
> +++ monotonic-cleanup/kernel/time/timekeeping.c       2008-01-16 
> 17:51:50.000000000 -0800
> @@ -71,10 +71,12 @@
>   */
>  static inline s64 __get_nsec_offset(void)
>  {
> -     cycle_t cycle_delta;
> +     cycle_t now, cycle_delta;
>       s64 ns_offset;
>  
> -     cycle_delta = clocksource_get_cycles(clock, clocksource_read(clock));
> +     now = clocksource_read(clock);
> +     cycle_delta = (now - clock->cycle_last) & clock->mask;
> +     cycle_delta += clock->cycle_accumulated;
>       ns_offset = cyc2ns(clock, cycle_delta);
>  
>       return ns_offset;
> @@ -105,35 +107,7 @@
>  
>  cycle_t notrace get_monotonic_cycles(void)
>  {
> -     cycle_t cycle_now, cycle_delta, cycle_raw, cycle_last;
> -
> -     do {
> -             /*
> -              * cycle_raw and cycle_last can change on
> -              * another CPU and we need the delta calculation
> -              * of cycle_now and cycle_last happen atomic, as well
> -              * as the adding to cycle_raw. We don't need to grab
> -              * any locks, we just keep trying until get all the
> -              * calculations together in one state.
> -              *
> -              * In fact, we __cant__ grab any locks. This
> -              * function is called from the latency_tracer which can
> -              * be called anywhere. To grab any locks (including
> -              * seq_locks) we risk putting ourselves into a deadlock.
> -              */
> -             cycle_raw = clock->cycle_raw;
> -             cycle_last = clock->cycle_last;
> -
> -             /* read clocksource: */
> -             cycle_now = clocksource_read(clock);
> -
> -             /* calculate the delta since the last update_wall_time: */
> -             cycle_delta = (cycle_now - cycle_last) & clock->mask;
> -
> -     } while (cycle_raw != clock->cycle_raw ||
> -              cycle_last != clock->cycle_last);
> -
> -     return cycle_raw + cycle_delta;
> +     return clocksource_get_basecycles(clock);
>  }
>  
>  unsigned long notrace cycles_to_usecs(cycle_t cycles)
> 
> 

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to