On Sunday 15 February 2009 14:13:23 David Harvey wrote:
> Hi, I'm curious to try this new divide-by-3 code, but I can't find it
> in the repo. Where do I look? How many c/l is it?
>
> david

about 2.3c/l , I expect it could be tweeked some more
I never put it in the repo because I never got around to proving it. I'll give 
it a look in the next few days.

Brian did put a windows version in though, did you prove it ?

Jason

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"mpir-devel" group.
To post to this group, send email to mpir-devel@googlegroups.com
To unsubscribe from this group, send email to 
mpir-devel+unsubscr...@googlegroups.com
For more options, visit this group at 
http://groups.google.com/group/mpir-devel?hl=en
-~----------~----~----~----~------~----~------~--~---

dnl  AMD64 mpn_diveby3

dnl  Copyright 2009 Jason Moxham

dnl  This file is part of the MPIR Library.

dnl  The MPIR Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 2.1 of the License, or (at
dnl  your option) any later version.

dnl  The MPIR Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the MPIR Library; see the file COPYING.LIB.  If not, write
dnl  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl  Boston, MA 02110-1301, USA.

include(`../config.m4')

C       (rdi,rdx)=(rsi,rdx)  rcx=carry in
C       rax=carry out

C   NOTE could pass 55555...555 as next param so this would
C   be mpn_divexact_by_ff_over_c

ASM_START()
PROLOGUE(mpn_divexact_by3c)
# Version 1.0.4
mov $3,%r9d
lea -24(%rsi,%rdx,8),%rsi
lea -24(%rdi,%rdx,8),%rdi
mov $0x5555555555555555,%r8
sub %rdx,%r9
jnc skiploop
ALIGN(16)
loop:
        mov (%rsi,%r9,8),%rax
        mul %r8
        sub %rax,%rcx
        mov %rcx,(%rdi,%r9,8)
        sbb %rdx,%rcx
        mov 8(%rsi,%r9,8),%rax
        mul %r8
        sub %rax,%rcx
        mov %rcx,8(%rdi,%r9,8)
        sbb %rdx,%rcx
        mov 16(%rsi,%r9,8),%rax
        mul %r8
        sub %rax,%rcx
        mov %rcx,16(%rdi,%r9,8)
        sbb %rdx,%rcx
        mov 24(%rsi,%r9,8),%rax
        mul %r8
        sub %rax,%rcx
        mov %rcx,24(%rdi,%r9,8)
        sbb %rdx,%rcx
        add $4,%r9
        jnc loop
skiploop:
# so have 3-r9 limbs left to do
test $2,%r9
jnz skip
        mov (%rsi,%r9,8),%rax
        mul %r8
        sub %rax,%rcx
        mov %rcx,(%rdi,%r9,8)
        sbb %rdx,%rcx
        mov 8(%rsi,%r9,8),%rax
        mul %r8
        sub %rax,%rcx
        mov %rcx,8(%rdi,%r9,8)
        sbb %rdx,%rcx
        add $2,%r9
skip:
test $1,%r9
jnz end
        mov (%rsi,%r9,8),%rax
        mul %r8
        sub %rax,%rcx
        mov %rcx,(%rdi,%r9,8)
        sbb %rdx,%rcx
end:
mov %rcx,%rax
ret
EPILOGUE()

Reply via email to