__get_datapage() is only a few instructions to retrieve the
address of the page where the kernel stores data to the VDSO.

By inlining this function into its users, a bl/blr pair and
a mflr/mtlr pair is avoided, plus a few reg moves.

clock-gettime-monotonic: syscall: 514 nsec/call  396 nsec/call
clock-gettime-monotonic:    libc: 25 nsec/call   24 nsec/call
clock-gettime-monotonic:    vdso: 20 nsec/call   20 nsec/call
clock-getres-monotonic: syscall: 347 nsec/call   372 nsec/call
clock-getres-monotonic:    libc: 19 nsec/call    19 nsec/call
clock-getres-monotonic:    vdso: 10 nsec/call    10 nsec/call
clock-gettime-monotonic-coarse: syscall: 511 nsec/call   396 nsec/call
clock-gettime-monotonic-coarse:    libc: 23 nsec/call    21 nsec/call
clock-gettime-monotonic-coarse:    vdso: 15 nsec/call    13 nsec/call
clock-gettime-realtime: syscall: 526 nsec/call   405 nsec/call
clock-gettime-realtime:    libc: 24 nsec/call    23 nsec/call
clock-gettime-realtime:    vdso: 18 nsec/call    18 nsec/call
clock-getres-realtime: syscall: 342 nsec/call    372 nsec/call
clock-getres-realtime:    libc: 19 nsec/call     19 nsec/call
clock-getres-realtime:    vdso: 10 nsec/call     10 nsec/call
clock-gettime-realtime-coarse: syscall: 515 nsec/call    373 nsec/call
clock-gettime-realtime-coarse:    libc: 23 nsec/call     22 nsec/call
clock-gettime-realtime-coarse:    vdso: 14 nsec/call     13 nsec/call

Based on the patch by Christophe Leroy <christophe.le...@c-s.fr> for vdso32.

Signed-off-by: Santosh Sivaraj <sant...@fossix.org>
---

except for a couple of calls (1 or 2 nsec reduction), there are no
improvements in the call times. Or is 10 nsec the minimum granularity??

So I don't know if its even worth updating vdso64 except to keep vdso32 and
vdso64 equal.


 arch/powerpc/kernel/vdso64/cacheflush.S   | 10 ++++----
 arch/powerpc/kernel/vdso64/datapage.S     | 29 ++++-------------------
 arch/powerpc/kernel/vdso64/datapage.h     | 10 ++++++++
 arch/powerpc/kernel/vdso64/gettimeofday.S |  8 ++++---
 4 files changed, 24 insertions(+), 33 deletions(-)
 create mode 100644 arch/powerpc/kernel/vdso64/datapage.h

diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S 
b/arch/powerpc/kernel/vdso64/cacheflush.S
index 3f92561a64c4..30e8b0d29bea 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -10,6 +10,8 @@
 #include <asm/vdso.h>
 #include <asm/asm-offsets.h>
 
+#include "datapage.h"
+
        .text
 
 /*
@@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
   .cfi_startproc
        mflr    r12
   .cfi_register lr,r12
-       mr      r11,r3
-       bl      V_LOCAL_FUNC(__get_datapage)
+       get_datapage    r11, r0
        mtlr    r12
-       mr      r10,r3
 
        lwz     r7,CFG_DCACHE_BLOCKSZ(r10)
        addi    r5,r7,-1
-       andc    r6,r11,r5               /* round low to line bdy */
+       andc    r6,r3,r5                /* round low to line bdy */
        subf    r8,r6,r4                /* compute length */
        add     r8,r8,r5                /* ensure we get enough */
        lwz     r9,CFG_DCACHE_LOGBLOCKSZ(r10)
@@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
 
        lwz     r7,CFG_ICACHE_BLOCKSZ(r10)
        addi    r5,r7,-1
-       andc    r6,r11,r5               /* round low to line bdy */
+       andc    r6,r3,r5                /* round low to line bdy */
        subf    r8,r6,r4                /* compute length */
        add     r8,r8,r5
        lwz     r9,CFG_ICACHE_LOGBLOCKSZ(r10)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S 
b/arch/powerpc/kernel/vdso64/datapage.S
index dc84f5ae3802..8712f57c931c 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -11,34 +11,13 @@
 #include <asm/unistd.h>
 #include <asm/vdso.h>
 
+#include "datapage.h"
+
        .text
 .global        __kernel_datapage_offset;
 __kernel_datapage_offset:
        .long   0
 
-V_FUNCTION_BEGIN(__get_datapage)
-  .cfi_startproc
-       /* We don't want that exposed or overridable as we want other objects
-        * to be able to bl directly to here
-        */
-       .protected __get_datapage
-       .hidden __get_datapage
-
-       mflr    r0
-  .cfi_register lr,r0
-
-       bcl     20,31,data_page_branch
-data_page_branch:
-       mflr    r3
-       mtlr    r0
-       addi    r3, r3, __kernel_datapage_offset-data_page_branch
-       lwz     r0,0(r3)
-  .cfi_restore lr
-       add     r3,r0,r3
-       blr
-  .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
 /*
  * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
  *
@@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
        mflr    r12
   .cfi_register lr,r12
        mr      r4,r3
-       bl      V_LOCAL_FUNC(__get_datapage)
+       get_datapage    r3, r0
        mtlr    r12
        addi    r3,r3,CFG_SYSCALL_MAP64
        cmpldi  cr0,r4,0
@@ -75,7 +54,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
   .cfi_startproc
        mflr    r12
   .cfi_register lr,r12
-       bl      V_LOCAL_FUNC(__get_datapage)
+       get_datapage    r3, r0
        ld      r3,CFG_TB_TICKS_PER_SEC(r3)
        mtlr    r12
        crclr   cr0*4+so
diff --git a/arch/powerpc/kernel/vdso64/datapage.h 
b/arch/powerpc/kernel/vdso64/datapage.h
new file mode 100644
index 000000000000..f2f0da0f65f3
--- /dev/null
+++ b/arch/powerpc/kernel/vdso64/datapage.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+.macro get_datapage ptr, tmp
+       bcl     20,31,888f
+888:
+       mflr    \ptr
+       addi    \ptr, \ptr, __kernel_datapage_offset - 888b
+       lwz     \tmp, 0(\ptr)
+       add     \ptr, \tmp, \ptr
+.endm
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S 
b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 07bfe33fe874..7bcc879392cc 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -12,6 +12,8 @@
 #include <asm/asm-offsets.h>
 #include <asm/unistd.h>
 
+#include "datapage.h"
+
        .text
 /*
  * Exact prototype of gettimeofday
@@ -26,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
 
        mr      r11,r3                  /* r11 holds tv */
        mr      r10,r4                  /* r10 holds tz */
-       bl      V_LOCAL_FUNC(__get_datapage)    /* get data page */
+       get_datapage    r3, r0
        cmpldi  r11,0                   /* check if tv is NULL */
        beq     2f
        lis     r7,1000000@ha           /* load up USEC_PER_SEC */
@@ -71,7 +73,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
        mflr    r12                     /* r12 saves lr */
   .cfi_register lr,r12
        mr      r11,r4                  /* r11 saves tp */
-       bl      V_LOCAL_FUNC(__get_datapage)    /* get data page */
+       get_datapage    r3, r0          /* get data page */
        lis     r7,NSEC_PER_SEC@h       /* want nanoseconds */
        ori     r7,r7,NSEC_PER_SEC@l
        beq     cr5,70f
@@ -218,7 +220,7 @@ V_FUNCTION_BEGIN(__kernel_time)
   .cfi_register lr,r12
 
        mr      r11,r3                  /* r11 holds t */
-       bl      V_LOCAL_FUNC(__get_datapage)
+       get_datapage    r3, r0
 
        ld      r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
 
-- 
2.21.0

Reply via email to