Ciao,

Il Sab, 10 Agosto 2019 7:01 pm, Torbjörn Granlund ha scritto:
> We might provide several gcc_11 function variants to accomodate the
> internal uses you bring up.
>
> gcd_1o1o - two odd limbs
> gcd_1o1  - one odd and one odd/even limb
> gcd_11   - two odd/even limbs

This would be a rich set of entry points...

This means we are currently work on the _1o1o variants.

May I propose a small latency-micro-optimisation for two x86_64 just
proposed variants? The idea is not to use the register %r10 at all, and
directly keep the value of v0 in %rax, so that it is already in place when
the function returns.

-----------------------------------------------
diff -Nrc6 gmp.b252c7e4f9b6/mpn/x86_64/bd2/gcd_11.asm
gmp/mpn/x86_64/bd2/gcd_11.asm
*** gmp.b252c7e4f9b6/mpn/x86_64/bd2/gcd_11.asm  2019-08-08
16:29:36.000000000 +0200
--- gmp/mpn/x86_64/bd2/gcd_11.asm       2019-08-13 02:55:44.287847000 +0200
***************
*** 69,93 ****

  ASM_START()
        TEXT
        ALIGN(16)
  PROLOGUE(mpn_gcd_11)
        FUNC_ENTRY(2)
!       mov     v0, %r10        C
!       sub     u0, %r10        C
        jz      L(end)          C

        ALIGN(16)               C              K10 BD1 BD2 ZEN CNR NHM SBR
! L(top):       rep;bsf %r10, %rcx      C tzcnt!       3   3   3   2   6   5   5
        mov     u0, %r9         C              2   2   2   2   3   3   4
!       sub     v0, u0          C              2   2   2   2   4   3   4
!       cmovc   %r10, u0        C if x-y < 0   0,3 0,3 0,3 0,3 0,6 0,5 0,5
!       cmovc   %r9, v0         C use x,y-x    0,3 0,3 0,3 0,3 2,8 1,7 1,7
        shr     R8(%rcx), u0    C              1,7 1,6 1,5 1,4 2,8 2,8 2,8
!       mov     v0, %r10        C              1   1   1   1   4   3   3
!       sub     u0, %r10        C              2   2   2   1   5   4   4
        jnz     L(top)          C

! L(end):       mov     v0, %rax
!       FUNC_EXIT()
        ret
  EPILOGUE()
--- 69,92 ----

  ASM_START()
        TEXT
        ALIGN(16)
  PROLOGUE(mpn_gcd_11)
        FUNC_ENTRY(2)
!       mov     v0, %rax        C
!       sub     u0, v0          C
        jz      L(end)          C

        ALIGN(16)               C              K10 BD1 BD2 ZEN CNR NHM SBR
! L(top):       rep;bsf v0, %rcx        C tzcnt!       3   3   3   2   6   5   5
        mov     u0, %r9         C              2   2   2   2   3   3   4
!       sub     %rax, u0        C              2   2   2   2   4   3   4
!       cmovc   v0, u0          C if x-y < 0   0,3 0,3 0,3 0,3 0,6 0,5 0,5
!       cmovc   %r9, %rax       C use x,y-x    0,3 0,3 0,3 0,3 2,8 1,7 1,7
        shr     R8(%rcx), u0    C              1,7 1,6 1,5 1,4 2,8 2,8 2,8
!       mov     %rax, v0        C              1   1   1   1   4   3   3
!       sub     u0, v0          C              2   2   2   1   5   4   4
        jnz     L(top)          C

! L(end):       FUNC_EXIT()
        ret
  EPILOGUE()
diff -Nrc6 gmp.b252c7e4f9b6/mpn/x86_64/core2/gcd_11.asm
gmp/mpn/x86_64/core2/gcd_11.asm
*** gmp.b252c7e4f9b6/mpn/x86_64/core2/gcd_11.asm        2019-08-08
16:29:36.000000000 +0200
--- gmp/mpn/x86_64/core2/gcd_11.asm     2019-08-13 02:55:44.287847000 +0200
***************
*** 69,93 ****

  ASM_START()
        TEXT
        ALIGN(16)
  PROLOGUE(mpn_gcd_11)
        FUNC_ENTRY(2)
!       mov     v0, %r10        C
!       sub     u0, %r10        C
        jz      L(end)          C

        ALIGN(16)               C              K10 BD1 CNR NHM SBR
! L(top):       bsf     %r10, %rcx      C              3   3   6   5   5
        mov     u0, %r9         C              2   2   3   3   4
!       sub     v0, u0          C              2   2   4   3   4
!       cmovc   %r10, u0        C if x-y < 0   0,3 0,3 0,6 0,5 0,5
!       cmovc   %r9, v0         C use x,y-x    0,3 0,3 2,8 1,7 1,7
        shr     R8(%rcx), u0    C              1,7 1,6 2,8 2,8 2,8
!       mov     v0, %r10        C              1   1   4   3   3
!       sub     u0, %r10        C              2   2   5   4   4
        jnz     L(top)          C

! L(end):       mov     v0, %rax
!       FUNC_EXIT()
        ret
  EPILOGUE()
--- 69,92 ----

  ASM_START()
        TEXT
        ALIGN(16)
  PROLOGUE(mpn_gcd_11)
        FUNC_ENTRY(2)
!       mov     v0, %rax        C
!       sub     u0, v0          C
        jz      L(end)          C

        ALIGN(16)               C              K10 BD1 CNR NHM SBR
! L(top):       bsf     v0, %rcx        C              3   3   6   5   5
        mov     u0, %r9         C              2   2   3   3   4
!       sub     %rax, u0        C              2   2   4   3   4
!       cmovc   v0, u0          C if x-y < 0   0,3 0,3 0,6 0,5 0,5
!       cmovc   %r9, %rax       C use x,y-x    0,3 0,3 2,8 1,7 1,7
        shr     R8(%rcx), u0    C              1,7 1,6 2,8 2,8 2,8
!       mov     %rax, v0        C              1   1   4   3   3
!       sub     u0, v0          C              2   2   5   4   4
        jnz     L(top)          C

! L(end):       FUNC_EXIT()
        ret
  EPILOGUE()
-----------------------------------------------

Ĝis,
m

-- 
http://bodrato.it/papers/

_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to