2013-01-03  David S. Miller  <da...@davemloft.net>

        * mpn/sparc32/ultrasparct1/mul_1.asm (mpn_mul_1): Unroll main loop
        one time, align code on 32-byte boundary, add T2/T3/T4 timings.
        * mpn/sparc32/ultrasparct1/addmul_1.asm (mpn_addmul_1): Likewise.
        * mpn/sparc32/ultrasparct1/submul_1.asm (mpn_submul_1): Likewise.

diff --git a/mpn/sparc32/ultrasparct1/addmul_1.asm 
b/mpn/sparc32/ultrasparct1/addmul_1.asm
index 5001726..98df2bb 100644
--- a/mpn/sparc32/ultrasparct1/addmul_1.asm
+++ b/mpn/sparc32/ultrasparct1/addmul_1.asm
@@ -1,6 +1,6 @@
 dnl  SPARC T1 32-bit mpn_addmul_1.
 
-dnl  Copyright 2010 Free Software Foundation, Inc.
+dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,33 +20,60 @@ dnl  along with the GNU MP Library.  If not, see 
http://www.gnu.org/licenses/.
 include(`../config.m4')
 
 C                 cycles/limb
-C UltraSPARC T1:       27
+C UltraSPARC T1:       24
+C UltraSPARC T2:       19
+C UltraSPARC T3:       19
+C UltraSPARC T4:       5
 
 C INPUT PARAMETERS
-define(`rp',   `%o0')
-define(`up',   `%o1')
-define(`n',    `%o2')
-define(`v0',   `%o3')
+define(`rp',   `%i0')
+define(`up',   `%i1')
+define(`n',    `%i2')
+define(`v0',   `%i3')
 
 ASM_START()
+       ALIGN(32)
 PROLOGUE(mpn_addmul_1)
-       mov     0, %g4
+       save    %sp, -96, %sp
+       srl     n, 0, %o4
        srl     v0, 0, v0
-       srl     n, 0, n
-       dec     n                       C n--
-
-L(top):        lduw    [up+0], %g1
-       add     up, 4, up               C up++
-       mulx    %g1, v0, %g3
-       lduw    [rp+0], %g2
-       add     %g2, %g3, %g3
-       add     %g4, %g3, %g3
+       subcc   %o4, 1, %o4
+       be      L(final_one)
+        clr    %o5
+
+L(top):
+       lduw    [up+0], %l0
+       lduw    [rp+0], %l2
+       lduw    [up+4], %l1
+       lduw    [rp+4], %l3
+       mulx    %l0, v0, %g3
+       add     up, 8, up
+       mulx    %l1, v0, %o3
+       sub     %o4, 2, %o4
+       add     rp, 8, rp
+       add     %l2, %g3, %g3
+       add     %o5, %g3, %g3
+       stw     %g3, [rp-8]
+       srlx    %g3, 32, %o5
+       add     %l3, %o3, %o3
+       add     %o5, %o3, %o3
+       stw     %o3, [rp-4]
+       brgz    %o4, L(top)
+        srlx   %o3, 32, %o5
+
+       brlz,pt %o4, L(done)
+        nop
+
+L(final_one):
+       lduw    [up+0], %l0
+       lduw    [rp+0], %l2
+       mulx    %l0, v0, %g3
+       add     %l2, %g3, %g3
+       add     %o5, %g3, %g3
        stw     %g3, [rp+0]
-       add     rp, 4, rp               C rp++
-       srlx    %g3, 32, %g4
-       brnz    n, L(top)
-       dec     n                       C n--
+       srlx    %g3, 32, %o5
 
-       retl
-       mov     %g4, %o0                C return value
+L(done):
+       ret
+        restore %o5, 0, %o0
 EPILOGUE()
diff --git a/mpn/sparc32/ultrasparct1/mul_1.asm 
b/mpn/sparc32/ultrasparct1/mul_1.asm
index fcde0c7..a002292 100644
--- a/mpn/sparc32/ultrasparct1/mul_1.asm
+++ b/mpn/sparc32/ultrasparct1/mul_1.asm
@@ -1,6 +1,6 @@
 dnl  SPARC T1 32-bit mpn_mul_1.
 
-dnl  Copyright 2010 Free Software Foundation, Inc.
+dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,7 +20,10 @@ dnl  along with the GNU MP Library.  If not, see 
http://www.gnu.org/licenses/.
 include(`../config.m4')
 
 C                 cycles/limb
-C UltraSPARC T1:       23
+C UltraSPARC T1:       20
+C UltraSPARC T2:       18
+C UltraSPARC T3:       18
+C UltraSPARC T4:       4
 
 C INPUT PARAMETERS
 define(`rp',   `%o0')
@@ -29,22 +32,41 @@ define(`n', `%o2')
 define(`v0',   `%o3')
 
 ASM_START()
+       ALIGN(32)
 PROLOGUE(mpn_mul_1)
-       mov     0, %g4
-       srl     v0, 0, v0
        srl     n, 0, n
-       dec     n                       C n--
+       srl     v0, 0, v0
+       subcc   n, 1, n
+       be      L(final_one)
+        clr    %o5
+
+L(top):
+       lduw    [up+0], %g1
+       lduw    [up+4], %g2
+       mulx    %g1, v0, %g3
+       add     up, 8, up
+       mulx    %g2, v0, %o4
+       sub     n, 2, n
+       add     rp, 8, rp
+       add     %o5, %g3, %g3
+       stw     %g3, [rp-8]
+       srlx    %g3, 32, %o5
+       add     %o5, %o4, %o4
+       stw     %o4, [rp-4]
+       brgz    n, L(top)
+        srlx   %o4, 32, %o5
+
+       brlz,pt n, L(done)
+        nop
 
-L(top):        lduw    [up+0], %g1
-       add     up, 4, up               C up++
+L(final_one):
+       lduw    [up+0], %g1
        mulx    %g1, v0, %g3
-       add     %g4, %g3, %g3
+       add     %o5, %g3, %g3
        stw     %g3, [rp+0]
-       add     rp, 4, rp               C rp++
-       srlx    %g3, 32, %g4
-       brnz    n, L(top)
-       dec     n                       C n--
+       srlx    %g3, 32, %o5
 
+L(done):
        retl
-       mov     %g4, %o0                C return value
+        mov    %o5, %o0
 EPILOGUE()
diff --git a/mpn/sparc32/ultrasparct1/submul_1.asm 
b/mpn/sparc32/ultrasparct1/submul_1.asm
index 605a882..084c61c 100644
--- a/mpn/sparc32/ultrasparct1/submul_1.asm
+++ b/mpn/sparc32/ultrasparct1/submul_1.asm
@@ -1,6 +1,6 @@
 dnl  SPARC T1 32-bit mpn_submul_1.
 
-dnl  Copyright 2010 Free Software Foundation, Inc.
+dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 
@@ -20,33 +20,61 @@ dnl  along with the GNU MP Library.  If not, see 
http://www.gnu.org/licenses/.
 include(`../config.m4')
 
 C                 cycles/limb
-C UltraSPARC T1:       27
+C UltraSPARC T1:       24
+C UltraSPARC T2:       19
+C UltraSPARC T3:       19
+C UltraSPARC T4:       5
 
 C INPUT PARAMETERS
-define(`rp',   `%o0')
-define(`up',   `%o1')
-define(`n',    `%o2')
-define(`v0',   `%o3')
+define(`rp',   `%i0')
+define(`up',   `%i1')
+define(`n',    `%i2')
+define(`v0',   `%i3')
 
 ASM_START()
+       ALIGN(32)
 PROLOGUE(mpn_submul_1)
-       subcc   %g0, %g0, %g4           C clear CF and g4
+       save    %sp, -96, %sp
+       srl     n, 0, %o4
        srl     v0, 0, v0
-       srl     n, 0, n
-       dec     n                       C n--
-
-L(top):        lduw    [up+0], %g1
-       add     up, 4, up               C up++
-       mulx    %g1, v0, %g3
-       lduw    [rp+0], %g2
-       addx    %g4, %g3, %g3
-       srlx    %g3, 32, %g4
-       subcc   %g2, %g3, %g3
+       subcc   %o4, 1, %o4
+       be      L(final_one)
+        subcc  %g0, 0, %o5
+
+L(top):
+       lduw    [up+0], %l0
+       lduw    [rp+0], %l2
+       lduw    [up+4], %l1
+       lduw    [rp+4], %l3
+       mulx    %l0, v0, %g3
+       add     up, 8, up
+       mulx    %l1, v0, %o3
+       sub     %o4, 2, %o4
+       add     rp, 8, rp
+       addx    %o5, %g3, %g3
+       srlx    %g3, 32, %o5
+       subcc   %l2, %g3, %g3
+       stw     %g3, [rp-8]
+       addx    %o5, %o3, %o3
+       srlx    %o3, 32, %o5
+       subcc   %l3, %o3, %o3
+       brgz    %o4, L(top)
+        stw    %o3, [rp-4]
+
+       brlz,pt %o4, L(done)
+        nop
+
+L(final_one):
+       lduw    [up+0], %l0
+       lduw    [rp+0], %l2
+       mulx    %l0, v0, %g3
+       addx    %o5, %g3, %g3
+       srlx    %g3, 32, %o5
+       subcc   %l2, %g3, %g3
        stw     %g3, [rp+0]
-       add     rp, 4, rp               C rp++
-       brnz    n, L(top)
-       dec     n                       C n--
 
-       retl
-       addx    %g4, 0, %o0             C return value
+L(done):
+       addx    %o5, 0, %o5
+       ret
+        restore %o5, 0, %o0
 EPILOGUE()
-- 
1.7.10.4

_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
http://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to