Re: Alpha/Linux FP denormal processing

Richard Henderson Tue, 19 Sep 2000 19:26:35 -0700

On Tue, Sep 19, 2000 at 09:52:15AM -0700, Richard Henderson wrote:
> > Instead of *= 0.5, try /= 2.0
> 
> Yes indeed you've found a bug in the kernel's FP emulation.
> I'll see about fixing it.

Rather than fix the old udiv128 function, which was trying to do
128/128 bit division, I've pulled in a subroutine from libgcc
that does 128/64 bit division, which is all we need here.

So it should be a bit faster than the old routine, for what ever
that's worth to someone who is already trapping into the kernel...

r~

diff -ruNp linux/arch/alpha/math-emu/Makefile 2.4.0-9-4/arch/alpha/math-emu/Makefile
--- linux/arch/alpha/math-emu/Makefile  Thu Dec  2 15:28:54 1999
+++ 2.4.0-9-4/arch/alpha/math-emu/Makefile      Tue Sep 19 18:45:17 2000
@@ -8,7 +8,7 @@
 # Note 2! The CFLAGS definition is now in the main makefile...
 
 O_TARGET := math-emu.o
-O_OBJS   := math.o
+O_OBJS   := math.o qrnnd.o
 CFLAGS += -I. -I$(TOPDIR)/include/math-emu -w
 
 ifeq ($(CONFIG_MATHEMU),m)
diff -ruNp linux/arch/alpha/math-emu/math.c 2.4.0-9-4/arch/alpha/math-emu/math.c
--- linux/arch/alpha/math-emu/math.c    Tue Mar 21 10:47:06 2000
+++ 2.4.0-9-4/arch/alpha/math-emu/math.c        Tue Sep 19 18:26:38 2000
@@ -84,66 +84,6 @@ void cleanup_module(void)
 
 #endif /* MODULE */
 
-/* For 128-bit division.  */
-
-void
-udiv128(unsigned long divisor_f0, unsigned long divisor_f1,
-       unsigned long dividend_f0, unsigned long dividend_f1,
-       unsigned long *quot, unsigned long *remd)
-{
-       _FP_FRAC_DECL_2(quo);
-       _FP_FRAC_DECL_2(rem);
-       _FP_FRAC_DECL_2(tmp);
-       unsigned long i, num_bits, bit;
-
-       _FP_FRAC_SET_2(rem, _FP_ZEROFRAC_2);
-       _FP_FRAC_SET_2(quo, _FP_ZEROFRAC_2);
-
-       if (_FP_FRAC_ZEROP_2(divisor))
-               goto out;
-
-       if (_FP_FRAC_GT_2(divisor, dividend)) {
-               _FP_FRAC_COPY_2(rem, dividend);
-               goto out;
-       }
-
-       if (_FP_FRAC_EQ_2(divisor, dividend)) {
-               __FP_FRAC_SET_2(quo, 0, 1);
-               goto out;
-       }
-
-       num_bits = 128;
-       while (1) {
-               bit = _FP_FRAC_NEGP_2(dividend);
-               _FP_FRAC_COPY_2(tmp, rem);
-               _FP_FRAC_SLL_2(tmp, 1);
-               _FP_FRAC_LOW_2(tmp) |= bit;
-               if (! _FP_FRAC_GE_2(tmp, divisor))
-                       break;
-               _FP_FRAC_COPY_2(rem, tmp);
-               _FP_FRAC_SLL_2(dividend, 1);
-               num_bits--;
-       }
-
-       for (i = 0; i < num_bits; i++) {
-               bit = _FP_FRAC_NEGP_2(dividend);
-               _FP_FRAC_SLL_2(rem, 1);
-               _FP_FRAC_LOW_2(rem) |= bit;
-               _FP_FRAC_SUB_2(tmp, rem, divisor);
-               bit = _FP_FRAC_NEGP_2(tmp);
-               _FP_FRAC_SLL_2(dividend, 1);
-               _FP_FRAC_SLL_2(quo, 1);
-               if (!bit) {
-                       _FP_FRAC_LOW_2(quo) |= 1;
-                       _FP_FRAC_COPY_2(rem, tmp);
-               }
-       }
-
-out:
-       *quot = quo_f1;
-       *remd = rem_f1;
-       return;
-}
 
 /*
  * Emulate the floating point instruction at address PC.  Returns 0 if
diff -ruNp linux/arch/alpha/math-emu/qrnnd.S 2.4.0-9-4/arch/alpha/math-emu/qrnnd.S
--- linux/arch/alpha/math-emu/qrnnd.S   Wed Dec 31 16:00:00 1969
+++ 2.4.0-9-4/arch/alpha/math-emu/qrnnd.S       Tue Sep 19 18:41:26 2000
@@ -0,0 +1,163 @@
+ # Alpha 21064 __udiv_qrnnd
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+ # This file is part of GCC.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # In addition to the permissions in the GNU General Public License, the
+ # Free Software Foundation gives you unlimited permission to link the
+ # compiled version of this file with other programs, and to distribute
+ # those programs without any restriction coming from the use of this
+ # file.  (The General Public License restrictions do apply in other
+ # respects; for example, they cover modification of the file, and
+ # distribution when not linked into another program.)
+
+ # This file is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU General Public License
+ # along with GCC; see the file COPYING.  If not, write to the 
+ # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+        .set noreorder
+        .set noat
+
+       .text
+
+       .globl __udiv_qrnnd
+       .ent __udiv_qrnnd
+__udiv_qrnnd:
+       .frame $30,0,$26,0
+       .prologue 0
+
+#define cnt    $2
+#define tmp    $3
+#define rem_ptr        $16
+#define n1     $17
+#define n0     $18
+#define d      $19
+#define qb     $20
+#define AT     $at
+
+       ldiq    cnt,16
+       blt     d,$largedivisor
+
+$loop1:        cmplt   n0,0,tmp
+       addq    n1,n1,n1
+       bis     n1,tmp,n1
+       addq    n0,n0,n0
+       cmpule  d,n1,qb
+       subq    n1,d,tmp
+       cmovne  qb,tmp,n1
+       bis     n0,qb,n0
+       cmplt   n0,0,tmp
+       addq    n1,n1,n1
+       bis     n1,tmp,n1
+       addq    n0,n0,n0
+       cmpule  d,n1,qb
+       subq    n1,d,tmp
+       cmovne  qb,tmp,n1
+       bis     n0,qb,n0
+       cmplt   n0,0,tmp
+       addq    n1,n1,n1
+       bis     n1,tmp,n1
+       addq    n0,n0,n0
+       cmpule  d,n1,qb
+       subq    n1,d,tmp
+       cmovne  qb,tmp,n1
+       bis     n0,qb,n0
+       cmplt   n0,0,tmp
+       addq    n1,n1,n1
+       bis     n1,tmp,n1
+       addq    n0,n0,n0
+       cmpule  d,n1,qb
+       subq    n1,d,tmp
+       cmovne  qb,tmp,n1
+       bis     n0,qb,n0
+       subq    cnt,1,cnt
+       bgt     cnt,$loop1
+       stq     n1,0(rem_ptr)
+       bis     $31,n0,$0
+       ret     $31,($26),1
+
+$largedivisor:
+       and     n0,1,$4
+
+       srl     n0,1,n0
+       sll     n1,63,tmp
+       or      tmp,n0,n0
+       srl     n1,1,n1
+
+       and     d,1,$6
+       srl     d,1,$5
+       addq    $5,$6,$5
+
+$loop2:        cmplt   n0,0,tmp
+       addq    n1,n1,n1
+       bis     n1,tmp,n1
+       addq    n0,n0,n0
+       cmpule  $5,n1,qb
+       subq    n1,$5,tmp
+       cmovne  qb,tmp,n1
+       bis     n0,qb,n0
+       cmplt   n0,0,tmp
+       addq    n1,n1,n1
+       bis     n1,tmp,n1
+       addq    n0,n0,n0
+       cmpule  $5,n1,qb
+       subq    n1,$5,tmp
+       cmovne  qb,tmp,n1
+       bis     n0,qb,n0
+       cmplt   n0,0,tmp
+       addq    n1,n1,n1
+       bis     n1,tmp,n1
+       addq    n0,n0,n0
+       cmpule  $5,n1,qb
+       subq    n1,$5,tmp
+       cmovne  qb,tmp,n1
+       bis     n0,qb,n0
+       cmplt   n0,0,tmp
+       addq    n1,n1,n1
+       bis     n1,tmp,n1
+       addq    n0,n0,n0
+       cmpule  $5,n1,qb
+       subq    n1,$5,tmp
+       cmovne  qb,tmp,n1
+       bis     n0,qb,n0
+       subq    cnt,1,cnt
+       bgt     cnt,$loop2
+
+       addq    n1,n1,n1
+       addq    $4,n1,n1
+       bne     $6,$Odd
+       stq     n1,0(rem_ptr)
+       bis     $31,n0,$0
+       ret     $31,($26),1
+
+$Odd:
+       /* q' in n0. r' in n1 */
+       addq    n1,n0,n1
+
+       cmpult  n1,n0,tmp       # tmp := carry from addq
+       subq    n1,d,AT
+       addq    n0,tmp,n0
+       cmovne  tmp,AT,n1
+
+       cmpult  n1,d,tmp
+       addq    n0,1,AT
+       cmoveq  tmp,AT,n0
+       subq    n1,d,AT
+       cmoveq  tmp,AT,n1
+
+       stq     n1,0(rem_ptr)
+       bis     $31,n0,$0
+       ret     $31,($26),1
+
+       .end    __udiv_qrnnd
diff -ruNp linux/arch/alpha/math-emu/sfp-util.h 
2.4.0-9-4/arch/alpha/math-emu/sfp-util.h
--- linux/arch/alpha/math-emu/sfp-util.h        Thu Dec  2 15:28:54 1999
+++ 2.4.0-9-4/arch/alpha/math-emu/sfp-util.h    Tue Sep 19 18:41:05 2000
@@ -17,18 +17,13 @@
           : "r" ((UDItype)(u)),                \
             "r" ((UDItype)(v)))
 
-extern void udiv128(unsigned long, unsigned long,
-                   unsigned long, unsigned long,
-                   unsigned long *,
-                   unsigned long *);
-
-#define udiv_qrnnd(q, r, n1, n0, d)            \
-  do {                                         \
-    unsigned long xr, xi;                      \
-    udiv128((n0), (n1), 0, (d), &xr, &xi);     \
-    (q) = xr;                                  \
-    (r) = xi;                                  \
+#define udiv_qrnnd(q, r, n1, n0, d)                            \
+  do { unsigned long __r;                                      \
+    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                        \
+    (r) = __r;                                                 \
   } while (0)
+extern unsigned long __udiv_qrnnd (unsigned long *, unsigned long,
+                                  unsigned long , unsigned long);
 
 #define UDIV_NEEDS_NORMALIZATION 1

Re: Alpha/Linux FP denormal processing

Reply via email to