Author: ed
Date: Thu Jan 12 17:55:22 2012
New Revision: 230025
URL: http://svn.freebsd.org/changeset/base/230025

Log:
  Add SPARC64 version of div/mod written in assembly.
  
  This version is similar to the code shipped with libgcc. It is based on
  the code from the SPARC64 architecture manual, provided without any
  restrictions.
  
  Tested by:    flo@

Added:
  head/contrib/compiler-rt/lib/sparc64/
  head/contrib/compiler-rt/lib/sparc64/divmod.m4
  head/contrib/compiler-rt/lib/sparc64/divsi3.S
  head/contrib/compiler-rt/lib/sparc64/generate.sh
  head/contrib/compiler-rt/lib/sparc64/modsi3.S
  head/contrib/compiler-rt/lib/sparc64/udivsi3.S
  head/contrib/compiler-rt/lib/sparc64/umodsi3.S

Added: head/contrib/compiler-rt/lib/sparc64/divmod.m4
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/contrib/compiler-rt/lib/sparc64/divmod.m4      Thu Jan 12 17:55:22 
2012        (r230025)
@@ -0,0 +1,250 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ *   dividend -- the thing being divided
+ *   divisor -- how many ways to divide it
+ * Important parameters:
+ *   N -- how many bits per iteration we try to get
+ *        as our current guess: define(N, 4) define(TWOSUPN, 16)
+ *   WORDSIZE -- how many bits altogether we're talking about:
+ *               obviously: define(WORDSIZE, 32)
+ * A derived constant:
+ *   TOPBITS -- how many bits are in the top "decade" of a number:
+ *        define(TOPBITS, eval( WORDSIZE - N*((WORDSIZE-1)/N) ) )
+ * Important variables are:
+ *   Q -- the partial quotient under development -- initially 0
+ *   R -- the remainder so far -- initially == the dividend
+ *   ITER -- number of iterations of the main division loop which will
+ *           be required. Equal to CEIL( lg2(quotient)/N )
+ *           Note that this is log_base_(2ˆN) of the quotient.
+ *   V -- the current comparand -- initially divisor*2ˆ(ITER*N-1)
+ * Cost:
+ *   current estimate for non-large dividend is
+ *        CEIL( lg2(quotient) / N ) x ( 10 + 7N/2 ) + C
+ *   a large dividend is one greater than 2ˆ(31-TOPBITS) and takes a
+ *   different path, as the upper bits of the quotient must be developed
+ *   one bit at a time.
+ *   This uses the m4 and cpp macro preprocessors.
+ */
+
+define(dividend, `%o0')
+define(divisor,`%o1')
+define(Q, `%o2')
+define(R, `%o3')
+define(ITER, `%o4')
+define(V, `%o5')
+define(SIGN, `%g3')
+define(T, `%g1')
+define(SC,`%g2')
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ *   $1 -- the current depth, 1<=$1<=N
+ *   $2 -- the current accumulation of quotient bits
+ *   N -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the 
quotient.
+ * Dynamic input:
+ *   R -- current remainder
+ *   Q -- current quotient
+ *   V -- current comparand
+ *   cc -- set on current value of R
+ * Dynamic output:
+ *   R', Q', V', cc'
+ */
+
+#include "../assembly.h"
+
+.text
+       .align 4
+
+define(DEVELOP_QUOTIENT_BITS,
+`      !depth $1, accumulated bits $2
+       bl      L.$1.eval(TWOSUPN+$2)
+       srl     V,1,V
+       ! remainder is nonnegative
+       subcc   R,V,R
+       ifelse( $1, N,
+       `       b       9f
+               add     Q, ($2*2+1), Q
+       ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2+1)')
+       ')
+L.$1.eval(TWOSUPN+$2):
+       ! remainder is negative
+       addcc   R,V,R
+       ifelse( $1, N,
+       `       b       9f
+               add     Q, ($2*2-1), Q
+       ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2-1)')
+       ')
+       ifelse( $1, 1, `9:')
+')
+ifelse( ANSWER, `quotient', `
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+       save    %sp,-64,%sp             ! do this for debugging
+       b       divide
+       mov     0,SIGN                  ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+       save    %sp,-64,%sp             ! do this for debugging
+       orcc    divisor,dividend,%g0    ! are either dividend or divisor 
negative
+       bge     divide                  ! if not, skip this junk
+       xor     divisor,dividend,SIGN   ! record sign of result in sign of SIGN
+       tst     divisor
+       bge     2f
+       tst     dividend
+       ! divisor < 0
+       bge     divide
+       neg     divisor
+       2:
+       ! dividend < 0
+       neg     dividend
+       ! FALL THROUGH
+',`
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+       save    %sp,-64,%sp             ! do this for debugging
+       b       divide
+       mov     0,SIGN                  ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+       save    %sp,-64,%sp             ! do this for debugging
+       orcc    divisor,dividend,%g0    ! are either dividend or divisor 
negative
+       bge     divide                  ! if not, skip this junk
+       mov     dividend,SIGN           ! record sign of result in sign of SIGN
+       tst     divisor
+       bge     2f
+       tst     dividend
+       ! divisor < 0
+       bge     divide
+       neg     divisor
+       2:
+       ! dividend < 0
+       neg     dividend
+       ! FALL THROUGH
+')
+
+divide:
+       ! Compute size of quotient, scale comparand.
+       orcc    divisor,%g0,V           ! movcc divisor,V
+       te      2                       ! if divisor = 0
+       mov     dividend,R
+       mov     0,Q
+       sethi   %hi(1<<(WORDSIZE-TOPBITS-1)),T
+       cmp     R,T
+       blu     not_really_big
+       mov     0,ITER
+       !
+       ! Here, the dividend is >= 2ˆ(31-N) or so. We must be careful here,
+       ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+       ! The total number of bits in the result here is N*ITER+SC, where
+       ! SC <= N.
+       ! Compute ITER in an unorthodox manner: know we need to Shift V into
+! the top decade: so don't even bother to compare to R.
+1:
+       cmp     V,T
+       bgeu    3f
+       mov     1,SC
+       sll     V,N,V
+       b       1b
+       inc     ITER
+! Now compute SC
+2:     addcc   V,V,V
+       bcc     not_too_big
+       add     SC,1,SC
+               ! We're here if the divisor overflowed when Shifting.
+               ! This means that R has the high-order bit set.
+               ! Restore V and subtract from R.
+               sll     T,TOPBITS,T     ! high order bit
+               srl     V,1,V           ! rest of V
+               add     V,T,V
+               b       do_single_div
+               dec     SC
+not_too_big:
+3:     cmp     V,R
+       blu     2b
+       nop
+       be      do_single_div
+       nop
+! V > R: went too far: back up 1 step
+!     srl V,1,V
+!      dec SC
+! do single-bit divide steps
+!
+! We have to be careful here. We know that R >= V, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if R >= 0. Because both R and V may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+       deccc   SC
+       bl      end_regular_divide
+       nop
+       sub     R,V,R
+       mov     1,Q
+       b       end_single_divloop
+       nop
+single_divloop:
+       sll     Q,1,Q
+       bl      1f
+       srl     V,1,V
+       ! R >= 0
+               sub     R,V,R
+               b       2f
+               inc     Q
+       1:      ! R < 0
+               add     R,V,R
+               dec     Q
+       2:
+       end_single_divloop:
+               deccc   SC
+               bge     single_divloop
+               tst     R
+               b       end_regular_divide
+               nop
+
+not_really_big:
+1:
+       sll     V,N,V
+       cmp     V,R
+       bleu    1b
+       inccc   ITER
+       be      got_result
+       dec     ITER
+do_regular_divide:
+       ! Do the main division iteration
+       tst     R
+       ! Fall through into divide loop
+divloop:
+       sll     Q,N,Q
+       DEVELOP_QUOTIENT_BITS( 1, 0 )
+end_regular_divide:
+       deccc   ITER
+       bge     divloop
+       tst     R
+       bge     got_result
+       nop
+       ! non-restoring fixup here
+ifelse( ANSWER, `quotient',
+`      dec     Q
+',`    add     R,divisor,R
+')
+
+got_result:
+       tst     SIGN
+       bge     1f
+       restore
+       ! answer < 0
+       retl                            ! leaf-routine return
+ifelse( ANSWER, `quotient',
+`      neg     %o2,%o0                 ! quotient <- -Q
+',`    neg     %o3,%o0                 ! remainder <- -R
+')
+1:
+       retl                            ! leaf-routine return
+ifelse( ANSWER, `quotient',
+`      mov     %o2,%o0                 ! quotient <- Q
+',`    mov     %o3,%o0                 ! remainder <- R
+')

Added: head/contrib/compiler-rt/lib/sparc64/divsi3.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/contrib/compiler-rt/lib/sparc64/divsi3.S       Thu Jan 12 17:55:22 
2012        (r230025)
@@ -0,0 +1,333 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ *   dividend -- the thing being divided
+ *   divisor -- how many ways to divide it
+ * Important parameters:
+ *   N -- how many bits per iteration we try to get
+ *        as our current guess:
+ *   WORDSIZE -- how many bits altogether we're talking about:
+ *               obviously:
+ * A derived constant:
+ *   TOPBITS -- how many bits are in the top "decade" of a number:
+ *
+ * Important variables are:
+ *   Q -- the partial quotient under development -- initially 0
+ *   R -- the remainder so far -- initially == the dividend
+ *   ITER -- number of iterations of the main division loop which will
+ *           be required. Equal to CEIL( lg2(quotient)/4 )
+ *           Note that this is log_base_(2ˆ4) of the quotient.
+ *   V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
+ * Cost:
+ *   current estimate for non-large dividend is
+ *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
+ *   a large dividend is one greater than 2ˆ(31-4 ) and takes a
+ *   different path, as the upper bits of the quotient must be developed
+ *   one bit at a time.
+ *   This uses the m4 and cpp macro preprocessors.
+ */
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ *   $1 -- the current depth, 1<=$1<=4
+ *   $2 -- the current accumulation of quotient bits
+ *   4 -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the 
quotient.
+ * Dynamic input:
+ *   %o3 -- current remainder
+ *   %o2 -- current quotient
+ *   %o5 -- current comparand
+ *   cc -- set on current value of %o3
+ * Dynamic output:
+ *   %o3', %o2', %o5', cc'
+ */
+#include "../assembly.h"
+.text
+       .align 4
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+       save    %sp,-64,%sp             ! do this for debugging
+       b       divide
+       mov     0,%g3                   ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+       save    %sp,-64,%sp             ! do this for debugging
+       orcc    %o1,%o0,%g0     ! are either %o0 or %o1 negative
+       bge     divide                  ! if not, skip this junk
+       xor     %o1,%o0,%g3     ! record sign of result in sign of %g3
+       tst     %o1
+       bge     2f
+       tst     %o0
+       ! %o1 < 0
+       bge     divide
+       neg     %o1
+       2:
+       ! %o0 < 0
+       neg     %o0
+       ! FALL THROUGH
+divide:
+       ! Compute size of quotient, scale comparand.
+       orcc    %o1,%g0,%o5             ! movcc %o1,%o5
+       te      2                       ! if %o1 = 0
+       mov     %o0,%o3
+       mov     0,%o2
+       sethi   %hi(1<<(32-4 -1)),%g1
+       cmp     %o3,%g1
+       blu     not_really_big
+       mov     0,%o4
+       !
+       ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
+       ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
+       ! The total number of bits in the result here is 4*%o4+%g2, where
+       ! %g2 <= 4.
+       ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
+! the top decade: so don't even bother to compare to %o3.
+1:
+       cmp     %o5,%g1
+       bgeu    3f
+       mov     1,%g2
+       sll     %o5,4,%o5
+       b       1b
+       inc     %o4
+! Now compute %g2
+2:     addcc   %o5,%o5,%o5
+       bcc     not_too_big
+       add     %g2,1,%g2
+               ! We're here if the %o1 overflowed when Shifting.
+               ! This means that %o3 has the high-order bit set.
+               ! Restore %o5 and subtract from %o3.
+               sll     %g1,4 ,%g1      ! high order bit
+               srl     %o5,1,%o5               ! rest of %o5
+               add     %o5,%g1,%o5
+               b       do_single_div
+               dec     %g2
+not_too_big:
+3:     cmp     %o5,%o3
+       blu     2b
+       nop
+       be      do_single_div
+       nop
+! %o5 > %o3: went too far: back up 1 step
+!     srl %o5,1,%o5
+!      dec %g2
+! do single-bit divide steps
+!
+! We have to be careful here. We know that %o3 >= %o5, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+       deccc   %g2
+       bl      end_regular_divide
+       nop
+       sub     %o3,%o5,%o3
+       mov     1,%o2
+       b       end_single_divloop
+       nop
+single_divloop:
+       sll     %o2,1,%o2
+       bl      1f
+       srl     %o5,1,%o5
+       ! %o3 >= 0
+               sub     %o3,%o5,%o3
+               b       2f
+               inc     %o2
+       1:      ! %o3 < 0
+               add     %o3,%o5,%o3
+               dec     %o2
+       2:
+       end_single_divloop:
+               deccc   %g2
+               bge     single_divloop
+               tst     %o3
+               b       end_regular_divide
+               nop
+not_really_big:
+1:
+       sll     %o5,4,%o5
+       cmp     %o5,%o3
+       bleu    1b
+       inccc   %o4
+       be      got_result
+       dec     %o4
+do_regular_divide:
+       ! Do the main division iteration
+       tst     %o3
+       ! Fall through into divide loop
+divloop:
+       sll     %o2,4,%o2
+               !depth 1, accumulated bits 0
+       bl      L.1.16
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 2, accumulated bits 1
+       bl      L.2.17
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 3, accumulated bits 3
+       bl      L.3.19
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 4, accumulated bits 7
+       bl      L.4.23
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (7*2+1), %o2
+L.4.23:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (7*2-1), %o2
+L.3.19:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 4, accumulated bits 5
+       bl      L.4.21
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (5*2+1), %o2
+L.4.21:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (5*2-1), %o2
+L.2.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 3, accumulated bits 1
+       bl      L.3.17
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 4, accumulated bits 3
+       bl      L.4.19
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (3*2+1), %o2
+L.4.19:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (3*2-1), %o2
+L.3.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 4, accumulated bits 1
+       bl      L.4.17
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (1*2+1), %o2
+L.4.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (1*2-1), %o2
+L.1.16:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 2, accumulated bits -1
+       bl      L.2.15
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 3, accumulated bits -1
+       bl      L.3.15
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 4, accumulated bits -1
+       bl      L.4.15
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-1*2+1), %o2
+L.4.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-1*2-1), %o2
+L.3.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 4, accumulated bits -3
+       bl      L.4.13
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-3*2+1), %o2
+L.4.13:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-3*2-1), %o2
+L.2.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 3, accumulated bits -3
+       bl      L.3.13
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 4, accumulated bits -5
+       bl      L.4.11
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-5*2+1), %o2
+L.4.11:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-5*2-1), %o2
+L.3.13:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 4, accumulated bits -7
+       bl      L.4.9
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-7*2+1), %o2
+L.4.9:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-7*2-1), %o2
+       9:
+end_regular_divide:
+       deccc   %o4
+       bge     divloop
+       tst     %o3
+       bge     got_result
+       nop
+       ! non-restoring fixup here
+       dec     %o2
+got_result:
+       tst     %g3
+       bge     1f
+       restore
+       ! answer < 0
+       retl                            ! leaf-routine return
+       neg     %o2,%o0                 ! quotient <- -%o2
+1:
+       retl                            ! leaf-routine return
+       mov     %o2,%o0                 ! quotient <- %o2

Added: head/contrib/compiler-rt/lib/sparc64/generate.sh
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/contrib/compiler-rt/lib/sparc64/generate.sh    Thu Jan 12 17:55:22 
2012        (r230025)
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+m4 divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > modsi3.S
+m4 -DANSWER=quotient divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > 
divsi3.S
+echo '! This file intentionally left blank' > umodsi3.S
+echo '! This file intentionally left blank' > udivsi3.S

Added: head/contrib/compiler-rt/lib/sparc64/modsi3.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/contrib/compiler-rt/lib/sparc64/modsi3.S       Thu Jan 12 17:55:22 
2012        (r230025)
@@ -0,0 +1,333 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ *   dividend -- the thing being divided
+ *   divisor -- how many ways to divide it
+ * Important parameters:
+ *   N -- how many bits per iteration we try to get
+ *        as our current guess:
+ *   WORDSIZE -- how many bits altogether we're talking about:
+ *               obviously:
+ * A derived constant:
+ *   TOPBITS -- how many bits are in the top "decade" of a number:
+ *
+ * Important variables are:
+ *   Q -- the partial quotient under development -- initially 0
+ *   R -- the remainder so far -- initially == the dividend
+ *   ITER -- number of iterations of the main division loop which will
+ *           be required. Equal to CEIL( lg2(quotient)/4 )
+ *           Note that this is log_base_(2ˆ4) of the quotient.
+ *   V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
+ * Cost:
+ *   current estimate for non-large dividend is
+ *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
+ *   a large dividend is one greater than 2ˆ(31-4 ) and takes a
+ *   different path, as the upper bits of the quotient must be developed
+ *   one bit at a time.
+ *   This uses the m4 and cpp macro preprocessors.
+ */
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ *   $1 -- the current depth, 1<=$1<=4
+ *   $2 -- the current accumulation of quotient bits
+ *   4 -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the 
quotient.
+ * Dynamic input:
+ *   %o3 -- current remainder
+ *   %o2 -- current quotient
+ *   %o5 -- current comparand
+ *   cc -- set on current value of %o3
+ * Dynamic output:
+ *   %o3', %o2', %o5', cc'
+ */
+#include "../assembly.h"
+.text
+       .align 4
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+       save    %sp,-64,%sp             ! do this for debugging
+       b       divide
+       mov     0,%g3                   ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+       save    %sp,-64,%sp             ! do this for debugging
+       orcc    %o1,%o0,%g0     ! are either %o0 or %o1 negative
+       bge     divide                  ! if not, skip this junk
+       mov     %o0,%g3         ! record sign of result in sign of %g3
+       tst     %o1
+       bge     2f
+       tst     %o0
+       ! %o1 < 0
+       bge     divide
+       neg     %o1
+       2:
+       ! %o0 < 0
+       neg     %o0
+       ! FALL THROUGH
+divide:
+       ! Compute size of quotient, scale comparand.
+       orcc    %o1,%g0,%o5             ! movcc %o1,%o5
+       te      2                       ! if %o1 = 0
+       mov     %o0,%o3
+       mov     0,%o2
+       sethi   %hi(1<<(32-4 -1)),%g1
+       cmp     %o3,%g1
+       blu     not_really_big
+       mov     0,%o4
+       !
+       ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
+       ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
+       ! The total number of bits in the result here is 4*%o4+%g2, where
+       ! %g2 <= 4.
+       ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
+! the top decade: so don't even bother to compare to %o3.
+1:
+       cmp     %o5,%g1
+       bgeu    3f
+       mov     1,%g2
+       sll     %o5,4,%o5
+       b       1b
+       inc     %o4
+! Now compute %g2
+2:     addcc   %o5,%o5,%o5
+       bcc     not_too_big
+       add     %g2,1,%g2
+               ! We're here if the %o1 overflowed when Shifting.
+               ! This means that %o3 has the high-order bit set.
+               ! Restore %o5 and subtract from %o3.
+               sll     %g1,4 ,%g1      ! high order bit
+               srl     %o5,1,%o5               ! rest of %o5
+               add     %o5,%g1,%o5
+               b       do_single_div
+               dec     %g2
+not_too_big:
+3:     cmp     %o5,%o3
+       blu     2b
+       nop
+       be      do_single_div
+       nop
+! %o5 > %o3: went too far: back up 1 step
+!     srl %o5,1,%o5
+!      dec %g2
+! do single-bit divide steps
+!
+! We have to be careful here. We know that %o3 >= %o5, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+       deccc   %g2
+       bl      end_regular_divide
+       nop
+       sub     %o3,%o5,%o3
+       mov     1,%o2
+       b       end_single_divloop
+       nop
+single_divloop:
+       sll     %o2,1,%o2
+       bl      1f
+       srl     %o5,1,%o5
+       ! %o3 >= 0
+               sub     %o3,%o5,%o3
+               b       2f
+               inc     %o2
+       1:      ! %o3 < 0
+               add     %o3,%o5,%o3
+               dec     %o2
+       2:
+       end_single_divloop:
+               deccc   %g2
+               bge     single_divloop
+               tst     %o3
+               b       end_regular_divide
+               nop
+not_really_big:
+1:
+       sll     %o5,4,%o5
+       cmp     %o5,%o3
+       bleu    1b
+       inccc   %o4
+       be      got_result
+       dec     %o4
+do_regular_divide:
+       ! Do the main division iteration
+       tst     %o3
+       ! Fall through into divide loop
+divloop:
+       sll     %o2,4,%o2
+               !depth 1, accumulated bits 0
+       bl      L.1.16
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 2, accumulated bits 1
+       bl      L.2.17
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 3, accumulated bits 3
+       bl      L.3.19
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 4, accumulated bits 7
+       bl      L.4.23
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (7*2+1), %o2
+L.4.23:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (7*2-1), %o2
+L.3.19:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 4, accumulated bits 5
+       bl      L.4.21
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (5*2+1), %o2
+L.4.21:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (5*2-1), %o2
+L.2.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 3, accumulated bits 1
+       bl      L.3.17
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 4, accumulated bits 3
+       bl      L.4.19
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (3*2+1), %o2
+L.4.19:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (3*2-1), %o2
+L.3.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 4, accumulated bits 1
+       bl      L.4.17
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (1*2+1), %o2
+L.4.17:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (1*2-1), %o2
+L.1.16:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 2, accumulated bits -1
+       bl      L.2.15
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 3, accumulated bits -1
+       bl      L.3.15
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 4, accumulated bits -1
+       bl      L.4.15
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-1*2+1), %o2
+L.4.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-1*2-1), %o2
+L.3.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 4, accumulated bits -3
+       bl      L.4.13
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-3*2+1), %o2
+L.4.13:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-3*2-1), %o2
+L.2.15:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 3, accumulated bits -3
+       bl      L.3.13
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               !depth 4, accumulated bits -5
+       bl      L.4.11
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-5*2+1), %o2
+L.4.11:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-5*2-1), %o2
+L.3.13:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               !depth 4, accumulated bits -7
+       bl      L.4.9
+       srl     %o5,1,%o5
+       ! remainder is nonnegative
+       subcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-7*2+1), %o2
+L.4.9:
+       ! remainder is negative
+       addcc   %o3,%o5,%o3
+               b       9f
+               add     %o2, (-7*2-1), %o2
+       9:
+end_regular_divide:
+       deccc   %o4
+       bge     divloop
+       tst     %o3
+       bge     got_result
+       nop
+       ! non-restoring fixup here
+       add     %o3,%o1,%o3
+got_result:
+       tst     %g3
+       bge     1f
+       restore
+       ! answer < 0
+       retl                            ! leaf-routine return
+       neg     %o3,%o0                 ! remainder <- -%o3
+1:
+       retl                            ! leaf-routine return
+       mov     %o3,%o0                 ! remainder <- %o3

Added: head/contrib/compiler-rt/lib/sparc64/udivsi3.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/contrib/compiler-rt/lib/sparc64/udivsi3.S      Thu Jan 12 17:55:22 
2012        (r230025)
@@ -0,0 +1 @@
+! This file intentionally left blank

Added: head/contrib/compiler-rt/lib/sparc64/umodsi3.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/contrib/compiler-rt/lib/sparc64/umodsi3.S      Thu Jan 12 17:55:22 
2012        (r230025)
@@ -0,0 +1 @@
+! This file intentionally left blank
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to