On 24/11/13 02:14, Ian Lance Taylor wrote: > Kugan <kugan.vivekanandara...@linaro.org> writes: > >> This RFC patch series implements a simple align divisor shift dividend >> method. >> >> Regression tested on arm-none-linux-gnueabi with no issues. >> >> OK? >> >> Thanks, >> Kugan >> >> +2013-11-22 Kugan Vivekanandarajah <kug...@linaro.org> >> + >> + * libgcc/libgcc2.c (__udivmoddi4): Define new implementation when >> + HAVE_NO_HW_DIVIDE is defined, for processors without any divide >> + instructions. > > > The code looks fine to me. > > You should document HAVE_NO_HW_DIVIDE in gcc/doc/tm.texi in the Library > Calls section. The macro should probably be something like > TARGET_HAS_NO_HW_DIVIDE. > Thanks for the review. Is this OK for trunk now?
+2013-11-26 Kugan Vivekanandarajah <kug...@linaro.org> + + * libgcc/libgcc2.c (__udivmoddi4): Define new implementation when + TARGET_HAS_NO_HW_DIVIDE is defined, for processors without any divide + instructions. + +2013-11-26 Kugan Vivekanandarajah <kug...@linaro.org> + + * doc/tm.texi.in (TARGET_HAS_NO_HW_DIVIDE): Define. + * doc/tm.texi (TARGET_HAS_NO_HW_DIVIDE): Regenerate. + Thanks, Kugan
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 925d93f..c9697f1 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5365,6 +5365,14 @@ If this macro evaluates to @code{false} the comparison functions return in @file{libgcc.a}, you do not need to define this macro. @end defmac +@defmac TARGET_HAS_NO_HW_DIVIDE +This macro should be defined if the target has no hardware divide +instructions. If this macro is defined, GCC will use an algorithm which +make use of simple logical and arithmetic operations for 64-bit +division. If the macro is not defined, GCC will use an algorithm which +make use of a 64-bit by 32-bit divide primitive. +@end defmac + @cindex @code{EDOM}, implicit usage @findex matherr @defmac TARGET_EDOM diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index edca600..03e6662 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4205,6 +4205,14 @@ If this macro evaluates to @code{false} the comparison functions return in @file{libgcc.a}, you do not need to define this macro. @end defmac +@defmac TARGET_HAS_NO_HW_DIVIDE +This macro should be defined if the target has no hardware divide +instructions. If this macro is defined, GCC will use an algorithm which +make use of simple logical and arithmetic operations for 64-bit +division. If the macro is not defined, GCC will use an algorithm which +make use of a 64-bit by 32-bit divide primitive. +@end defmac + @cindex @code{EDOM}, implicit usage @findex matherr @defmac TARGET_EDOM diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c index bec411b..8c4cc6a 100644 --- a/libgcc/libgcc2.c +++ b/libgcc/libgcc2.c @@ -934,6 +934,74 @@ __parityDI2 (UDWtype x) #endif #ifdef L_udivmoddi4 +#ifdef TARGET_HAS_NO_HW_DIVIDE + +#if (defined (L_udivdi3) || defined (L_divdi3) || \ + defined (L_umoddi3) || defined (L_moddi3)) +static inline __attribute__ ((__always_inline__)) +#endif +UDWtype +__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) +{ + UDWtype q = 0, r = n, y = d; + UWtype lz1, lz2, i, k; + + /* Implements align divisor shift dividend method. This algorithm + aligns the divisor under the dividend and then perform number of + test-subtract iterations which shift the dividend left. Number of + iterations is k + 1 where k is the number of bit positions the + divisor must be shifted left to align it under the dividend. + quotient bits can be saved in the rightmost positions of the dividend + as it shifts left on each test-subtract iteration. */ + + if (y <= r) + { + lz1 = __builtin_clzll (d); + lz2 = __builtin_clzll (n); + + k = lz1 - lz2; + y = (y << k); + + /* Dividend can exceed 2 ^ (width − 1) − 1 but still be less than the + aligned divisor. Normal iteration can drops the high order bit + of the dividend. Therefore, first test-subtract iteration is a + special case, saving its quotient bit in a separate location and + not shifting the dividend. */ + if (r >= y) + { + r = r - y; + q = (1ULL << k); + } + + if (k > 0) + { + y = y >> 1; + + /* k additional iterations where k regular test subtract shift + dividend iterations are done. */ + i = k; + do + { + if (r >= y) + r = ((r - y) << 1) + 1; + else + r = (r << 1); + i = i - 1; + } while (i != 0); + + /* First quotient bit is combined with the quotient bits resulting + from the k regular iterations. */ + q = q + r; + r = r >> k; + q = q - (r << k); + } + } + + if (rp) + *rp = r; + return q; +} +#else #if (defined (L_udivdi3) || defined (L_divdi3) || \ defined (L_umoddi3) || defined (L_moddi3)) @@ -1152,6 +1220,7 @@ __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) return ww.ll; } #endif +#endif #ifdef L_divdi3 DWtype