On Tue, 2017-07-25 at 12:26 +0530, Santosh Sivaraj wrote:

> +static notrace void kernel_get_tspec(struct timespec *tp,
> +                                  struct vdso_data *vdata, u32 *wtom_sec,
> +                                  u32 *wtom_nsec)
> +{
> +     u64 tb;
> +     u32 update_count;

This is broken:

> +     do {
> +             /* check for update count & load values */
> +             update_count = vdata->tb_update_count;
> +
> +             /* Get TB, offset it and scale result */
> +             tb = mulhdu((get_tb() - vdata->tb_orig_stamp) << 12,
> +                         vdata->tb_to_xs) + vdata->stamp_sec_fraction;
> +             tp->tv_sec = vdata->stamp_xtime.tv_sec;
> +             if (wtom_sec)
> +                     *wtom_sec = vdata->wtom_clock_sec;
> +             if (wtom_nsec)
> +                     *wtom_nsec = vdata->wtom_clock_nsec;
> +     } while (update_count != vdata->tb_update_count);

The assembly code is carefuly crafted to create a chain of data
dependencies in order to enforce the ordering in this function,
you completely broke it.

IE. the pointer used to access tb_orig_stamp etc... depends on the
initial update count, and the final read of the update count depends
on all the previously read values (or should), thus ordering those
loads. Withtout that you'll need more expensive lwsync's.

Additionally, you broke another semantic of the seqlock which is
to spin on the first update count if it has an odd value.

The same problem exist in all your other implementations.

I am really NOT a fan of that attempt at converting to C. The code is
hand crafted assembly for a number of reasons, including the above ones
and maximum performance.

As it is, it's deeply broken.

> +
> +     tp->tv_nsec = ((u64)mulhwu(tb, NSEC_PER_SEC) << 32) >> 32;
> +     tp->tv_sec += (tb >> 32);
> +}
> +
> +static notrace int clock_get_realtime(struct timespec *tp,
> +                                   struct vdso_data *vdata)
> +{
> +     kernel_get_tspec(tp, vdata, NULL, NULL);
> +
> +     return 0;
> +}
> +
> +static notrace int clock_get_monotonic(struct timespec *tp,
> +                                    struct vdso_data *vdata)
> +{
> +     __s32 wtom_sec, wtom_nsec;
> +     u64 nsec;
> +
> +     kernel_get_tspec(tp, vdata, &wtom_sec, &wtom_nsec);
> +
> +     tp->tv_sec += wtom_sec;
> +
> +     nsec = tp->tv_nsec;
> +     tp->tv_nsec = 0;
> +     timespec_add_ns(tp, nsec + wtom_nsec);
> +
> +     return 0;
> +}
> +
> +static notrace int clock_realtime_coarse(struct timespec *tp,
> +                                      struct vdso_data *vdata)
> +{
> +     u32 update_count;
> +
> +     do {
> +             /* check for update count & load values */
> +             update_count = vdata->tb_update_count;
> +
> +             tp->tv_sec = vdata->stamp_xtime.tv_sec;
> +             tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
> +     } while (update_count != vdata->tb_update_count);
> +
> +     return 0;
> +}
> +
> +static notrace int clock_monotonic_coarse(struct timespec *tp,
> +                                       struct vdso_data *vdata)
> +{
> +     __s32 wtom_sec, wtom_nsec;
> +     u64 nsec;
> +     u32 update_count;
> +
> +     do {
> +             /* check for update count & load values */
> +             update_count = vdata->tb_update_count;
> +
> +             tp->tv_sec = vdata->stamp_xtime.tv_sec;
> +             tp->tv_nsec = vdata->stamp_xtime.tv_nsec;
> +             wtom_sec = vdata->wtom_clock_sec;
> +             wtom_nsec = vdata->wtom_clock_nsec;
> +     } while (update_count != vdata->tb_update_count);
> +
> +     tp->tv_sec += wtom_sec;
> +     nsec = tp->tv_nsec;
> +     tp->tv_nsec = 0;
> +     timespec_add_ns(tp, nsec + wtom_nsec);
> +
> +     return 0;
> +}
> +
> +notrace int kernel_clock_gettime(clockid_t clk_id, struct timespec *tp)
> +{
> +     int ret;
> +     struct vdso_data *vdata = __get_datapage();
> +
> +     if (!tp || !vdata)
> +             return -EBADR;
> +
> +     switch (clk_id) {
> +     case CLOCK_REALTIME:
> +             ret = clock_get_realtime(tp, vdata);
> +             break;
> +     case CLOCK_MONOTONIC:
> +             ret = clock_get_monotonic(tp, vdata);
> +             break;
> +     case CLOCK_REALTIME_COARSE:
> +             ret = clock_realtime_coarse(tp, vdata);
> +             break;
> +     case CLOCK_MONOTONIC_COARSE:
> +             ret = clock_monotonic_coarse(tp, vdata);
> +             break;
> +     default:
> +             /* fallback to syscall */
> +             ret = -1;
> +             break;
> +     }
> +
> +     return ret;
> +}
> diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S 
> b/arch/powerpc/kernel/vdso64/gettimeofday.S
> index 3820213..c3f6b24 100644
> --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> @@ -16,6 +16,8 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/unistd.h>
>  
> +.global      kernel_clock_gettime
> +
>       .text
>  /*
>   * Exact prototype of gettimeofday
> @@ -60,71 +62,23 @@ V_FUNCTION_END(__kernel_gettimeofday)
>   */
>  V_FUNCTION_BEGIN(__kernel_clock_gettime)
>    .cfi_startproc
> -     /* Check for supported clock IDs */
> -     cmpwi   cr0,r3,CLOCK_REALTIME
> -     cmpwi   cr1,r3,CLOCK_MONOTONIC
> -     cror    cr0*4+eq,cr0*4+eq,cr1*4+eq
> -     bne     cr0,99f
> -
> -     mflr    r12                     /* r12 saves lr */
> -  .cfi_register lr,r12
> -     mr      r11,r4                  /* r11 saves tp */
> -     bl      V_LOCAL_FUNC(__get_datapage)    /* get data page */
> -     lis     r7,NSEC_PER_SEC@h       /* want nanoseconds */
> -     ori     r7,r7,NSEC_PER_SEC@l
> -50:  bl      V_LOCAL_FUNC(__do_get_tspec)    /* get time from tb & kernel */
> -     bne     cr1,80f                 /* if not monotonic, all done */
> -
> -     /*
> -      * CLOCK_MONOTONIC
> -      */
> -
> -     /* now we must fixup using wall to monotonic. We need to snapshot
> -      * that value and do the counter trick again. Fortunately, we still
> -      * have the counter value in r8 that was returned by __do_get_tspec.
> -      * At this point, r4,r5 contain our sec/nsec values.
> -      */
> -
> -     lwa     r6,WTOM_CLOCK_SEC(r3)
> -     lwa     r9,WTOM_CLOCK_NSEC(r3)
> -
> -     /* We now have our result in r6,r9. We create a fake dependency
> -      * on that result and re-check the counter
> -      */
> -     or      r0,r6,r9
> -     xor     r0,r0,r0
> -     add     r3,r3,r0
> -     ld      r0,CFG_TB_UPDATE_COUNT(r3)
> -        cmpld   cr0,r0,r8            /* check if updated */
> -     bne-    50b
> -
> -     /* Add wall->monotonic offset and check for overflow or underflow.
> -      */
> -     add     r4,r4,r6
> -     add     r5,r5,r9
> -     cmpd    cr0,r5,r7
> -     cmpdi   cr1,r5,0
> -     blt     1f
> -     subf    r5,r7,r5
> -     addi    r4,r4,1
> -1:   bge     cr1,80f
> -     addi    r4,r4,-1
> -     add     r5,r5,r7
> -
> -80:  std     r4,TSPC64_TV_SEC(r11)
> -     std     r5,TSPC64_TV_NSEC(r11)
> -
> -     mtlr    r12
> +     mflr    r6                      /* r12 saves lr */
> +     stwu    r1,-112(r1)
> +  .cfi_register lr,r6
> +     std     r6,24(r1)
> +     std     r3,32(r1)
> +     std     r4,40(r1)
>       crclr   cr0*4+so
> -     li      r3,0
> -     blr
> -
> -     /*
> -      * syscall fallback
> -      */
> -99:
> +     bl      V_LOCAL_FUNC(kernel_clock_gettime)
> +     cmpwi   r3,0
> +     beq     77f
>       li      r0,__NR_clock_gettime
> +     ld      r3,32(r1)
> +     ld      r4,40(r1)
>       sc
> +77:  ld      r6,24(r1)
> +     addi    r1,r1,112
> +     mtlr    r6
>       blr
>    .cfi_endproc
>  V_FUNCTION_END(__kernel_clock_gettime)

Reply via email to