Re: [Patch, AVR]: Better 32=16*16 widening multiplication

2011-06-28 Thread Denis Chertykov
2011/6/28 Georg-Johann Lay :
> This implements "mulhisi3" and "umulhisi3" widening multiplication
> insns if AVR_HAVE_MUL.
>
> I chose the interface as r25:r22 = r19:r18 * r21:r20 which is ok
> because only avr-gcc BE will call respective __* support functions in
> libgcc.
>
> Tested without regression and hand-tested assembler code.
>
> Johann
>
>        * config/avr/t-avr (LIB1ASMFUNCS): Add _mulhisi3, _umulhisi3,
>        _xmulhisi3_exit.
>        * config/avr/libgcc.S (_xmulhisi3_exit): New Function.
>        (__mulhisi3): Optimize if have MUL*.  Use XJMP instead of rjmp.
>        (__umulhisi3): Ditto.
>        * config/avr/avr.md (mulhisi3): New insn expender.
>        (umulhisi3): New insn expender.
>        (*mulhisi3_call): New insn.
>        (*umulhisi3_call): New insn.
>

Approved.

Denis.


[Patch, AVR]: Better 32=16*16 widening multiplication

2011-06-28 Thread Georg-Johann Lay
This implements "mulhisi3" and "umulhisi3" widening multiplication
insns if AVR_HAVE_MUL.

I chose the interface as r25:r22 = r19:r18 * r21:r20 which is ok
because only avr-gcc BE will call respective __* support functions in
libgcc.

Tested without regression and hand-tested assembler code.

Johann

* config/avr/t-avr (LIB1ASMFUNCS): Add _mulhisi3, _umulhisi3,
_xmulhisi3_exit.
* config/avr/libgcc.S (_xmulhisi3_exit): New Function.
(__mulhisi3): Optimize if have MUL*.  Use XJMP instead of rjmp.
(__umulhisi3): Ditto.
* config/avr/avr.md (mulhisi3): New insn expender.
(umulhisi3): New insn expender.
(*mulhisi3_call): New insn.
(*umulhisi3_call): New insn.
Index: config/avr/libgcc.S
===
--- config/avr/libgcc.S	(revision 175574)
+++ config/avr/libgcc.S	(working copy)
@@ -178,10 +178,57 @@ __mulhi3_exit:
 #endif /* defined (L_mulhi3) */
 #endif /* !defined (__AVR_HAVE_MUL__) */
 
+/***
+  Widening Multiplication  32 = 16 x 16
+***/
+  
 #if defined (L_mulhisi3)
-	.global	__mulhisi3
-	.func	__mulhisi3
-__mulhisi3:
+DEFUN __mulhisi3
+#if defined (__AVR_HAVE_MUL__)
+
+;; r25:r22 = r19:r18 * r21:r20
+
+#define A0 18
+#define B0 20
+#define C0 22
+
+#define A1 A0+1
+#define B1 B0+1
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+ 
+; C = (signed)A1 * (signed)B1
+muls  A1, B1
+movw  C2, R0
+
+; C += A0 * B0
+mul   A0, B0
+movw  C0, R0
+
+; C += (signed)A1 * B0
+mulsu A1, B0
+sbci  C3, 0
+add   C1, R0
+adc   C2, R1
+clr   __zero_reg__
+adc   C3, __zero_reg__
+
+; C += (signed)B1 * A0
+mulsu B1, A0
+sbci  C3, 0
+XJMP  __xmulhisi3_exit
+
+#undef A0
+#undef A1
+#undef B0
+#undef B1
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#else /* !__AVR_HAVE_MUL__ */
 	mov_l	r18, r24
 	mov_h	r19, r25
 	clr	r24
@@ -192,24 +239,91 @@ __mulhisi3:
 	sbrc	r19, 7
 	dec	r20
 	mov	r21, r20
-	rjmp	__mulsi3
-	.endfunc
+	XJMP	__mulsi3
+#endif /* __AVR_HAVE_MUL__ */
+ENDF __mulhisi3
 #endif /* defined (L_mulhisi3) */
 
 #if defined (L_umulhisi3)
-	.global	__umulhisi3
-	.func	__umulhisi3
-__umulhisi3:
+DEFUN __umulhisi3
+#if defined (__AVR_HAVE_MUL__)
+
+;; r25:r22 = r19:r18 * r21:r20
+
+#define A0 18
+#define B0 20
+#define C0 22
+
+#define A1 A0+1
+#define B1 B0+1
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+; C = A1 * B1
+mul   A1, B1
+movw  C2, R0
+
+; C += A0 * B0
+mul   A0, B0
+movw  C0, R0
+
+; C += A1 * B0
+mul   A1, B0
+add   C1, R0
+adc   C2, R1
+clr   __zero_reg__
+adc   C3, __zero_reg__
+
+; C += B1 * A0
+mul   B1, A0
+XJMP  __xmulhisi3_exit
+
+#undef A0
+#undef A1
+#undef B0
+#undef B1
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#else /* !__AVR_HAVE_MUL__ */
 	mov_l	r18, r24
 	mov_h	r19, r25
 	clr	r24
 	clr	r25
 	clr	r20
 	clr	r21
-	rjmp	__mulsi3
-	.endfunc
+	XJMP	__mulsi3
+#endif /* __AVR_HAVE_MUL__ */
+ENDF __umulhisi3
 #endif /* defined (L_umulhisi3) */
 
+#if defined (L_xmulhisi3_exit)
+
+;;; Helper for __mulhisi3 resp. __umulhisi3.
+
+#define C0 22
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+DEFUN __xmulhisi3_exit
+add   C1, R0
+adc   C2, R1
+clr   __zero_reg__
+adc   C3, __zero_reg__
+ret
+ENDF __xmulhisi3_exit
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* defined (L_xmulhisi3_exit) */
+
 #if defined (L_mulsi3)
 /***
Multiplication  32 x 32
Index: config/avr/avr.md
===
--- config/avr/avr.md	(revision 175574)
+++ config/avr/avr.md	(working copy)
@@ -1056,6 +1056,50 @@ (define_insn "*mulsi3_call"
   [(set_attr "type" "xcall")
(set_attr "cc" "clobber")])
 
+(define_expand "mulhisi3"
+  [(set (reg:HI 18)
+(match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 20)
+(match_operand:HI 2 "register_operand" ""))
+   (set (reg:SI 22) 
+(mult:SI (sign_extend:SI (reg:HI 18))
+ (sign_extend:SI (reg:HI 20
+   (set (match_operand:SI 0 "register_operand" "") 
+(reg:SI 22))]
+  "AVR_HAVE_MUL"
+  "")
+
+(define_expand "umulhisi3"
+  [(set (reg:HI 18)
+(match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 20)
+(match_operand:HI 2 "register_operand" ""))
+   (set (reg:SI 22) 
+(mult:SI (zero_extend:SI (reg:HI 18))
+ (zero_extend:SI (reg:HI 20
+   (set (match_operand:SI 0 "register_operand" "") 
+(reg:SI 22))]
+  "AVR_HAVE_MUL"
+  "")
+
+(define_insn "*mulhisi3_call"
+  [(set (reg:SI 22) 
+(mult:SI (sign_extend:SI (reg:HI 18))
+ (sign_extend:SI (reg:HI 20]
+  "AVR_HAVE_MUL"
+  "%~call __mulhisi3"
+  [(set_attr "type" "x