From: Torbjorn Granlund <t...@gmplib.org>
Date: Sun, 06 Jan 2013 01:25:10 +0100

> For sub_n, I suppose
> 
>     ldx
>     ldx
>     xnor (with %g0)
>     addxcc
>     stx
> 
> would be the right mix.  This should run in 2.5 + epsilon c/l, if
> properly software pipelined.  4x should give 2.75 c/l, unless they stick
> some pipeline bubbles for taken branches.
> 
> For add_n, things should run 0.5 c/l faster.
> 
> (I am assuming it is a 2-way pipeline.)

It is a 2-way pipeline, but you can get 3-way in certain circumstances
if one of the instructions is a store.  My Sparc-T4 pipeline description
in the GCC sparc backend shows how it works.

I have an add_n implementation and at 2-way unrolling it runs at 3.5
cycles per limb.

BTW are you going to get me the necessary paperwork so that I can
have all of this work I'm doing installed at some point?

====================
[PATCH] Optimize mpn_add_N for sparc T3 and later.

        * mpn/sparc64/ultrasparct3/add_n.asm: New file.
---
 ChangeLog                          |    4 ++
 mpn/sparc64/ultrasparct3/add_n.asm |   73 ++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 mpn/sparc64/ultrasparct3/add_n.asm

diff --git a/ChangeLog b/ChangeLog
index 8cceb64..e622e7e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2013-01-04  David S. Miller  <da...@davemloft.net>
+
+       * mpn/sparc64/ultrasparct3/add_n.asm: New file.
+
 2013-01-02  David S. Miller  <da...@davemloft.net>
 
        * config.guess: Recognize UltraSparc T4 under Linux.
diff --git a/mpn/sparc64/ultrasparct3/add_n.asm 
b/mpn/sparc64/ultrasparct3/add_n.asm
new file mode 100644
index 0000000..16bd0c4
--- /dev/null
+++ b/mpn/sparc64/ultrasparct3/add_n.asm
@@ -0,0 +1,73 @@
+dnl  SPARC v9 mpn_add_n for T3/T4.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                 cycles/limb
+C UltraSPARC T3:        9
+C UltraSPARC T4:        3.5
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n',  `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+       REGISTER(%g2,#scratch)
+       REGISTER(%g3,#scratch)
+       ALIGN(32)
+PROLOGUE(mpn_add_nc)
+       b,a     L(ent)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+       mov     0, cy
+L(ent):
+       subcc   n, 1, n
+       be      L(final_one)
+        cmp    %g0, cy
+L(top):
+       ldx     [up+0], %o4
+       add     up, 16, up
+       ldx     [vp+0], %o5
+       add     vp, 16, vp
+       ldx     [up-8], %g1
+       add     rp, 16, rp
+       ldx     [vp-8], %g2
+       sub     n, 2, n
+       addxccc %o4, %o5, %g3
+       stx     %g3, [rp-16]
+       addxccc %g1, %g2, %g2
+       brgz    n, L(top)
+        stx    %g2, [rp-8]
+
+       brlz,pt n, L(done)
+        nop
+
+L(final_one):
+       ldx     [up+0], %o4
+       ldx     [vp+0], %o5
+       addxccc %o4, %o5, %g3
+       stx     %g3, [rp+0]
+
+L(done):
+       retl
+        addxc  %g0, %g0, %o0
+EPILOGUE()
-- 
1.7.10.4

_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
http://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to