2013-01-03 David S. Miller <da...@davemloft.net> * mpn/sparc32/ultrasparct1/mul_1.asm (mpn_mul_1): Unroll main loop one time, align code on 32-byte boundary, add T2/T3/T4 timings. * mpn/sparc32/ultrasparct1/addmul_1.asm (mpn_addmul_1): Likewise. * mpn/sparc32/ultrasparct1/submul_1.asm (mpn_submul_1): Likewise.
diff --git a/mpn/sparc32/ultrasparct1/addmul_1.asm b/mpn/sparc32/ultrasparct1/addmul_1.asm index 5001726..98df2bb 100644 --- a/mpn/sparc32/ultrasparct1/addmul_1.asm +++ b/mpn/sparc32/ultrasparct1/addmul_1.asm @@ -1,6 +1,6 @@ dnl SPARC T1 32-bit mpn_addmul_1. -dnl Copyright 2010 Free Software Foundation, Inc. +dnl Copyright 2010, 2013 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -20,33 +20,60 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C UltraSPARC T1: 27 +C UltraSPARC T1: 24 +C UltraSPARC T2: 19 +C UltraSPARC T3: 19 +C UltraSPARC T4: 5 C INPUT PARAMETERS -define(`rp', `%o0') -define(`up', `%o1') -define(`n', `%o2') -define(`v0', `%o3') +define(`rp', `%i0') +define(`up', `%i1') +define(`n', `%i2') +define(`v0', `%i3') ASM_START() + ALIGN(32) PROLOGUE(mpn_addmul_1) - mov 0, %g4 + save %sp, -96, %sp + srl n, 0, %o4 srl v0, 0, v0 - srl n, 0, n - dec n C n-- - -L(top): lduw [up+0], %g1 - add up, 4, up C up++ - mulx %g1, v0, %g3 - lduw [rp+0], %g2 - add %g2, %g3, %g3 - add %g4, %g3, %g3 + subcc %o4, 1, %o4 + be L(final_one) + clr %o5 + +L(top): + lduw [up+0], %l0 + lduw [rp+0], %l2 + lduw [up+4], %l1 + lduw [rp+4], %l3 + mulx %l0, v0, %g3 + add up, 8, up + mulx %l1, v0, %o3 + sub %o4, 2, %o4 + add rp, 8, rp + add %l2, %g3, %g3 + add %o5, %g3, %g3 + stw %g3, [rp-8] + srlx %g3, 32, %o5 + add %l3, %o3, %o3 + add %o5, %o3, %o3 + stw %o3, [rp-4] + brgz %o4, L(top) + srlx %o3, 32, %o5 + + brlz,pt %o4, L(done) + nop + +L(final_one): + lduw [up+0], %l0 + lduw [rp+0], %l2 + mulx %l0, v0, %g3 + add %l2, %g3, %g3 + add %o5, %g3, %g3 stw %g3, [rp+0] - add rp, 4, rp C rp++ - srlx %g3, 32, %g4 - brnz n, L(top) - dec n C n-- + srlx %g3, 32, %o5 - retl - mov %g4, %o0 C return value +L(done): + ret + restore %o5, 0, %o0 EPILOGUE() diff --git a/mpn/sparc32/ultrasparct1/mul_1.asm b/mpn/sparc32/ultrasparct1/mul_1.asm index fcde0c7..a002292 100644 --- a/mpn/sparc32/ultrasparct1/mul_1.asm +++ b/mpn/sparc32/ultrasparct1/mul_1.asm @@ -1,6 +1,6 @@ dnl SPARC T1 32-bit mpn_mul_1. -dnl Copyright 2010 Free Software Foundation, Inc. +dnl Copyright 2010, 2013 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -20,7 +20,10 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C UltraSPARC T1: 23 +C UltraSPARC T1: 20 +C UltraSPARC T2: 18 +C UltraSPARC T3: 18 +C UltraSPARC T4: 4 C INPUT PARAMETERS define(`rp', `%o0') @@ -29,22 +32,41 @@ define(`n', `%o2') define(`v0', `%o3') ASM_START() + ALIGN(32) PROLOGUE(mpn_mul_1) - mov 0, %g4 - srl v0, 0, v0 srl n, 0, n - dec n C n-- + srl v0, 0, v0 + subcc n, 1, n + be L(final_one) + clr %o5 + +L(top): + lduw [up+0], %g1 + lduw [up+4], %g2 + mulx %g1, v0, %g3 + add up, 8, up + mulx %g2, v0, %o4 + sub n, 2, n + add rp, 8, rp + add %o5, %g3, %g3 + stw %g3, [rp-8] + srlx %g3, 32, %o5 + add %o5, %o4, %o4 + stw %o4, [rp-4] + brgz n, L(top) + srlx %o4, 32, %o5 + + brlz,pt n, L(done) + nop -L(top): lduw [up+0], %g1 - add up, 4, up C up++ +L(final_one): + lduw [up+0], %g1 mulx %g1, v0, %g3 - add %g4, %g3, %g3 + add %o5, %g3, %g3 stw %g3, [rp+0] - add rp, 4, rp C rp++ - srlx %g3, 32, %g4 - brnz n, L(top) - dec n C n-- + srlx %g3, 32, %o5 +L(done): retl - mov %g4, %o0 C return value + mov %o5, %o0 EPILOGUE() diff --git a/mpn/sparc32/ultrasparct1/submul_1.asm b/mpn/sparc32/ultrasparct1/submul_1.asm index 605a882..084c61c 100644 --- a/mpn/sparc32/ultrasparct1/submul_1.asm +++ b/mpn/sparc32/ultrasparct1/submul_1.asm @@ -1,6 +1,6 @@ dnl SPARC T1 32-bit mpn_submul_1. -dnl Copyright 2010 Free Software Foundation, Inc. +dnl Copyright 2010, 2013 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -20,33 +20,61 @@ dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C UltraSPARC T1: 27 +C UltraSPARC T1: 24 +C UltraSPARC T2: 19 +C UltraSPARC T3: 19 +C UltraSPARC T4: 5 C INPUT PARAMETERS -define(`rp', `%o0') -define(`up', `%o1') -define(`n', `%o2') -define(`v0', `%o3') +define(`rp', `%i0') +define(`up', `%i1') +define(`n', `%i2') +define(`v0', `%i3') ASM_START() + ALIGN(32) PROLOGUE(mpn_submul_1) - subcc %g0, %g0, %g4 C clear CF and g4 + save %sp, -96, %sp + srl n, 0, %o4 srl v0, 0, v0 - srl n, 0, n - dec n C n-- - -L(top): lduw [up+0], %g1 - add up, 4, up C up++ - mulx %g1, v0, %g3 - lduw [rp+0], %g2 - addx %g4, %g3, %g3 - srlx %g3, 32, %g4 - subcc %g2, %g3, %g3 + subcc %o4, 1, %o4 + be L(final_one) + subcc %g0, 0, %o5 + +L(top): + lduw [up+0], %l0 + lduw [rp+0], %l2 + lduw [up+4], %l1 + lduw [rp+4], %l3 + mulx %l0, v0, %g3 + add up, 8, up + mulx %l1, v0, %o3 + sub %o4, 2, %o4 + add rp, 8, rp + addx %o5, %g3, %g3 + srlx %g3, 32, %o5 + subcc %l2, %g3, %g3 + stw %g3, [rp-8] + addx %o5, %o3, %o3 + srlx %o3, 32, %o5 + subcc %l3, %o3, %o3 + brgz %o4, L(top) + stw %o3, [rp-4] + + brlz,pt %o4, L(done) + nop + +L(final_one): + lduw [up+0], %l0 + lduw [rp+0], %l2 + mulx %l0, v0, %g3 + addx %o5, %g3, %g3 + srlx %g3, 32, %o5 + subcc %l2, %g3, %g3 stw %g3, [rp+0] - add rp, 4, rp C rp++ - brnz n, L(top) - dec n C n-- - retl - addx %g4, 0, %o0 C return value +L(done): + addx %o5, 0, %o5 + ret + restore %o5, 0, %o0 EPILOGUE() -- 1.7.10.4 _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org http://gmplib.org/mailman/listinfo/gmp-devel