This is 3 of 3 patches for ia64 clocksource. I have an unfinished business.
Please refer: > [PATCH] ia64: Scalability improvement of gettimeofday with jitter compensation > http://lkml.org/lkml/2007/6/11/402 So this is repost of above patch, rebased on clocksource code. One additional change is: - remove "when holding the xtime write lock..." section in itc_get_cycles(). Since it allows itc_lastcycle to go past. Following results show that: > # separatejitter : default > CPU 0: 1.50 (usecs) (0 errors / 6677159 iterations) > CPU 1: 1.49 (usecs) (0 errors / 6697159 iterations) > CPU 2: 1.50 (usecs) (0 errors / 6664672 iterations) > CPU 3: 1.50 (usecs) (0 errors / 6668999 iterations) > # separatejitter : nojitter > CPU 0: 0.14 (usecs) (0 errors / 70580221 iterations) > CPU 1: 0.14 (usecs) (0 errors / 71275618 iterations) > CPU 2: 0.14 (usecs) (0 errors / 70626121 iterations) > CPU 3: 0.14 (usecs) (0 errors / 70603364 iterations) > # separatejitter : nolwsys > CPU 0: 2.26 (usecs) (0 errors / 4417197 iterations) > CPU 1: 2.26 (usecs) (0 errors / 4415829 iterations) > CPU 2: 2.27 (usecs) (0 errors / 4402768 iterations) > CPU 3: 2.27 (usecs) (0 errors / 4406101 iterations) the scalability of gettimeofday is clearly improved. > # clocksource (fixed) : default > CPU 0: 1.33 (usecs) (0 errors / 7507837 iterations) > CPU 1: 1.31 (usecs) (0 errors / 7621659 iterations) > CPU 2: 1.27 (usecs) (0 errors / 7865412 iterations) > CPU 3: 1.27 (usecs) (0 errors / 7863362 iterations) > # clocksource (fixed) : nojitter > CPU 0: 0.14 (usecs) (0 errors / 69608888 iterations) > CPU 1: 0.14 (usecs) (0 errors / 70277433 iterations) > CPU 2: 0.14 (usecs) (0 errors / 69632925 iterations) > CPU 3: 0.14 (usecs) (0 errors / 69606531 iterations) > # clocksource (fixed) : nolwsys > CPU 0: 1.48 (usecs) (0 errors / 6770870 iterations) > CPU 1: 1.48 (usecs) (0 errors / 6777897 iterations) > CPU 2: 1.49 (usecs) (0 errors / 6728101 iterations) > CPU 3: 1.49 (usecs) (0 errors / 6703961 iterations) Thanks, H.Seto Signed-off-by: Hidetoshi Seto <[EMAIL PROTECTED]> ----- arch/ia64/kernel/fsys.S | 22 ++++++++++++---------- arch/ia64/kernel/time.c | 39 +++++++++++++++++---------------------- 2 files changed, 29 insertions(+), 32 deletions(-) Index: linux-2.6.22/arch/ia64/kernel/fsys.S =================================================================== --- linux-2.6.22.orig/arch/ia64/kernel/fsys.S +++ linux-2.6.22/arch/ia64/kernel/fsys.S @@ -231,7 +231,8 @@ add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled (p6) br.cond.spnt.many fsys_fallback_syscall - ;; // get lock.seq here new code, outer loop2! + ;; + // Begin critical section .time_redo: ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first ;; @@ -252,8 +253,7 @@ ld4 r23 = [r23] // clocksource shift value ld8 r24 = [r26] // get clksrc_cycle_last value (p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control - ;; // old position for lock seq, new inner loop1! -.cmpxchg_redo: + ;; .pred.rel.mutex p8,p9 (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. @@ -270,19 +270,21 @@ (p6) sub r10 = r25,r24 // time we got was less than last_cycle (p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg ;; +(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv + ;; +(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful + ;; +(p7) sub r10 = r3,r24 // then use new last_cycle instead + ;; and r10 = r10,r14 // Apply mask ;; setf.sig f8 = r10 nop.i 123 ;; -(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv // fault check takes 5 cycles and we have spare time EX(.fail_efault, probe.w.fault r31, 3) xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) ;; - // End cmpxchg critical section loop1 -(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo -(p7) br.cond.dpnt.few .cmpxchg_redo // inner loop1 // ? simulate tbit.nz.or p7,p0 = r28,0 getf.sig r2 = f8 mf @@ -290,10 +292,10 @@ ld4 r10 = [r20] // gtod_lock.sequence shr.u r2 = r2,r23 // shift by factor ;; // ? overloaded 3 bundles! - // End critical section. add r8 = r8,r2 // Add xtime.nsecs - cmp4.ne.or p7,p0 = r28,r10 -(p7) br.cond.dpnt.few .time_redo // sequence number changed, outer loop2 + cmp4.ne p7,p0 = r28,r10 +(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo + // End critical section. // Now r8=tv->tv_nsec and r9=tv->tv_sec mov r10 = r0 movl r2 = 1000000000 Index: linux-2.6.22/arch/ia64/kernel/time.c =================================================================== --- linux-2.6.22.orig/arch/ia64/kernel/time.c +++ linux-2.6.22/arch/ia64/kernel/time.c @@ -257,31 +257,26 @@ static cycle_t itc_get_cycles() { - u64 lcycle; - u64 now; + u64 lcycle, now, ret; if (!itc_jitter_data.itc_jitter) return get_cycles(); - do { - lcycle = itc_jitter_data.itc_lastcycle; - now = get_cycles(); - if (lcycle && time_after(lcycle, now)) - return lcycle; - - /* When holding the xtime write lock, there's no need - * to add the overhead of the cmpxchg. Readers are - * force to retry until the write lock is released. - */ - if (spin_is_locked(&xtime_lock.lock)) { - itc_jitter_data.itc_lastcycle = now; - return now; - } - /* Keep track of the last timer value returned. - * The use of cmpxchg here will cause contention in - * an SMP environment. - */ - } while (likely(cmpxchg(&itc_jitter_data.itc_lastcycle, - lcycle, now) != lcycle)); + + lcycle = itc_jitter_data.itc_lastcycle; + now = get_cycles(); + if (lcycle && time_after(lcycle, now)) + return lcycle; + + /* + * Keep track of the last timer value returned. + * In an SMP environment, you could lose out in contention of + * cmpxchg. If so, your cmpxchg returns new value which the + * winner of contention updated to. Use the new value instead. + */ + ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now); + if (unlikely(ret != lcycle)) + return ret; + return now; } - To unsubscribe from this list: send the line "unsubscribe linux-ia64" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
