This is the second part for a better widening multiply for AVR,
namely widening to 32 bit when a MUL instructions are available.

This as a bit more complicated than the 16-bit case because the
multiplications are emit as implicit libgcc calls and involve
hard registers.  Thus, all splits and expansion has to be done
before register allocation.

The patch includes widening multiply from QI to SI, too.
If a QI is involved the extension is done in two steps:
An explicit, inlined QI->HI extension and an implicit HI->SI
extension in the library routine.

The __mulsi3 is rewritten; it now runs a bit slower and needs a
bit more code because __umulhisi3 and __muluhisi3 are factored out
to faciliate code-reuse.  In particular, multiplication
with a small constant (i.e. 17-bit signed -65536...65536) perform
better and will reuse such functions.

Eric, can you review the assembler routines and say if such reuse is ok
or if you'd prefer a speed-optimized version of __mulsi3 like in the current 
libgcc?

The new multiplication routines aim at a minimal register usage footprint:
No registers need to be clobbered except R26/R27 for __mulhi3.

The patch passes without regressions, of course.

Moreover, I drove individual tests of the routines against the old 
implementation
before integrating then into libgcc to run regression tests.

Ok to install?

Johann


        PR target/49687
        * config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit.
        Add _muluhisi3, _mulshisi3, _usmulhisi3.
        * config/avr/libgcc.S (__mulsi3): Rewrite.
        (__mulhisi3): Rewrite.
        (__umulhisi3): Rewrite.
        (__usmulhisi3): New.
        (__muluhisi3): New.
        (__mulshisi3): New.
        (__mulohisi3): New.
        (__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to
        declare.
        * config/avr/predicates.md (pseudo_register_operand): Rewrite.
        (pseudo_register_or_const_int_operand): New.
        (combine_pseudo_register_operand): New.
        (u16_operand): New.
        (s16_operand): New.
        (o16_operand): New.
        * config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI.
        * config/avr/avr.md (QIHI, QIHI2): New mode iterators.
        (any_extend, any_extend2): New code iterators.
        (extend_prefix): New code attribute.
        (mulsi3): Rewrite. Turn insn to expander.
        (mulhisi3): Ditto.
        (umulhisi3): Ditto.
        (usmulhisi3): New expander.
        (*mulsi3): New insn-and-split.
        (mulu<mode>si3): New insn-and-split.
        (muls<mode>si3): New insn-and-split.
        (mulohisi3): New insn-and-split.
        (*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3,
        *usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3,
        *sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3,
        *ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New
        insn-and-split.
        (*mulsi3_call): Rewrite.
        (*mulhisi3_call): Rewrite.
        (*umulhisi3_call): Rewrite.
        (*usmulhisi3_call): New insn.
        (*muluhisi3_call): New insn.
        (*mulshisi3_call): New insn.
        (*mulohisi3_call): New insn.
        (extendqihi2): Use combine_pseudo_register_operand as predicate
        for operand 1.
        (extendqisi2): Ditto.
        (zero_extendqihi2): Ditto.
        (zero_extendqisi2): Ditto.
        (zero_extendhisi2): Ditto.
        (extendhisi2): Ditto. Don't early-clobber operand 0.

Index: config/avr/predicates.md
===================================================================
--- config/avr/predicates.md	(revision 176624)
+++ config/avr/predicates.md	(working copy)
@@ -155,10 +155,34 @@ (define_predicate "call_insn_operand"
        (ior (match_test "register_operand (XEXP (op, 0), mode)")
             (match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))"))))
 
+;; For some insns we must ensure that no hard register is inserted
+;; into their operands because the insns are split and the split
+;; involves hard registers.  An example are divmod insn that are
+;; split to insns that represent implicit library calls.
+
 ;; True for register that is pseudo register.
 (define_predicate "pseudo_register_operand"
-  (and (match_code "reg")
-       (match_test "!HARD_REGISTER_P (op)")))
+  (and (match_operand 0 "register_operand")
+       (not (and (match_code "reg")
+                 (match_test "HARD_REGISTER_P (op)")))))
+
+;; True for operand that is pseudo register or CONST_INT.
+(define_predicate "pseudo_register_or_const_int_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (match_operand 0 "pseudo_register_operand")))
+
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends.  A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't combine to a widening
+;; multiplication.  There is no need for combine to propagate or insert
+;; hard registers, register allocation can do it just as well.
+
+;; True for operand that is pseudo register at combine time.
+(define_predicate "combine_pseudo_register_operand"
+  (ior (match_operand 0 "pseudo_register_operand")
+       (and (match_operand 0 "register_operand")
+            (match_test "reload_completed || reload_in_progress"))))
 
 ;; Return true if OP is a constant integer that is either
 ;; 8 or 16 or 24.
@@ -189,3 +213,18 @@ (define_predicate "s9_operand"
 (define_predicate "register_or_s9_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "s9_operand")))
+
+;; Unsigned CONST_INT that fits in 16 bits, i.e. 0..65536.
+(define_predicate "u16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, (1<<16)-1)")))
+
+;; Signed CONST_INT that fits in 16 bits, i.e. -32768..32767.
+(define_predicate "s16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -(1<<15), (1<<15)-1)")))
+
+;; One-extended CONST_INT that fits in 16 bits, i.e. -65536..-1.
+(define_predicate "o16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -(1<<16), -1)")))
Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S	(revision 176624)
+++ config/avr/libgcc.S	(working copy)
@@ -72,10 +72,11 @@ see the files COPYING3 and COPYING.RUNTI
 .endm
 
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
 #if !defined (__AVR_HAVE_MUL__)
 /*******************************************************
-               Multiplication  8 x 8
+    Multiplication  8 x 8  without MUL
 *******************************************************/
 #if defined (L_mulqi3)
 
@@ -83,9 +84,7 @@ see the files COPYING3 and COPYING.RUNTI
 #define	r_arg1 	r24		/* multiplier */
 #define r_res	__tmp_reg__	/* result */
 
-	.global	__mulqi3
-	.func	__mulqi3
-__mulqi3:
+DEFUN __mulqi3
 	clr	r_res		; clear result
 __mulqi3_loop:
 	sbrc	r_arg1,0
@@ -97,18 +96,16 @@ __mulqi3_loop:
 __mulqi3_exit:	
 	mov	r_arg1,r_res	; result to return register
 	ret
+ENDF __mulqi3
 
 #undef r_arg2  
 #undef r_arg1  
 #undef r_res   
 	
-.endfunc
 #endif 	/* defined (L_mulqi3) */
 
 #if defined (L_mulqihi3)
-	.global	__mulqihi3
-	.func	__mulqihi3
-__mulqihi3:
+DEFUN __mulqihi3
 	clr	r25
 	sbrc	r24, 7
 	dec	r25
@@ -116,21 +113,19 @@ __mulqihi3:
 	sbrc	r22, 7
 	dec	r22
 	rjmp	__mulhi3
-	.endfunc
+ENDF __mulqihi3:
 #endif /* defined (L_mulqihi3) */
 
 #if defined (L_umulqihi3)
-	.global	__umulqihi3
-	.func	__umulqihi3
-__umulqihi3:
+DEFUN __umulqihi3
 	clr	r25
 	clr	r23
 	rjmp	__mulhi3
-	.endfunc
+ENDF __umulqihi3
 #endif /* defined (L_umulqihi3) */
 
 /*******************************************************
-               Multiplication  16 x 16
+    Multiplication  16 x 16  without MUL
 *******************************************************/
 #if defined (L_mulhi3)
 #define	r_arg1L	r24		/* multiplier Low */
@@ -140,9 +135,7 @@ __umulqihi3:
 #define r_resL	__tmp_reg__	/* result Low */
 #define r_resH  r21		/* result High */
 
-	.global	__mulhi3
-	.func	__mulhi3
-__mulhi3:
+DEFUN __mulhi3
 	clr	r_resH		; clear result
 	clr	r_resL		; clear result
 __mulhi3_loop:
@@ -166,6 +159,7 @@ __mulhi3_exit:
 	mov	r_arg1H,r_resH	; result to return register
 	mov	r_arg1L,r_resL
 	ret
+ENDF __mulhi3
 
 #undef r_arg1L
 #undef r_arg1H
@@ -174,168 +168,51 @@ __mulhi3_exit:
 #undef r_resL 	
 #undef r_resH 
 
-.endfunc
 #endif /* defined (L_mulhi3) */
-#endif /* !defined (__AVR_HAVE_MUL__) */
 
 /*******************************************************
-      Widening Multiplication  32 = 16 x 16
+    Widening Multiplication  32 = 16 x 16  without MUL
 *******************************************************/
-                              
+
 #if defined (L_mulhisi3)
 DEFUN __mulhisi3
-#if defined (__AVR_HAVE_MUL__)
-
-;; r25:r22 = r19:r18 * r21:r20
-
-#define A0 18
-#define B0 20
-#define C0 22
-
-#define A1 A0+1
-#define B1 B0+1
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
- 
-    ; C = (signed)A1 * (signed)B1
-    muls  A1, B1
-    movw  C2, R0
-
-    ; C += A0 * B0
-    mul   A0, B0
-    movw  C0, R0
-
-    ; C += (signed)A1 * B0
-    mulsu A1, B0
-    sbci  C3, 0
-    add   C1, R0
-    adc   C2, R1
-    clr   __zero_reg__
-    adc   C3, __zero_reg__
-
-    ; C += (signed)B1 * A0
-    mulsu B1, A0
-    sbci  C3, 0
-    XJMP  __xmulhisi3_exit
-
-#undef A0
-#undef A1
-#undef B0
-#undef B1
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* !__AVR_HAVE_MUL__ */
 ;;; FIXME: This is dead code (noone calls it)
-	mov_l	r18, r24
-	mov_h	r19, r25
-	clr	r24
-	sbrc	r23, 7
-	dec	r24
-	mov	r25, r24
-	clr	r20
-	sbrc	r19, 7
-	dec	r20
-	mov	r21, r20
-	XJMP	__mulsi3
-#endif /* __AVR_HAVE_MUL__ */
+    mov_l   r18, r24
+    mov_h   r19, r25
+    clr     r24
+    sbrc    r23, 7
+    dec     r24
+    mov     r25, r24
+    clr     r20
+    sbrc    r19, 7
+    dec     r20
+    mov     r21, r20
+    XJMP    __mulsi3
 ENDF __mulhisi3
 #endif /* defined (L_mulhisi3) */
 
 #if defined (L_umulhisi3)
 DEFUN __umulhisi3
-#if defined (__AVR_HAVE_MUL__)
-
-;; r25:r22 = r19:r18 * r21:r20
-
-#define A0 18
-#define B0 20
-#define C0 22
-
-#define A1 A0+1
-#define B1 B0+1
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-    ; C = A1 * B1
-    mul   A1, B1
-    movw  C2, R0
-
-    ; C += A0 * B0
-    mul   A0, B0
-    movw  C0, R0
-
-    ; C += A1 * B0
-    mul   A1, B0
-    add   C1, R0
-    adc   C2, R1
-    clr   __zero_reg__
-    adc   C3, __zero_reg__
-
-    ; C += B1 * A0
-    mul   B1, A0
-    XJMP  __xmulhisi3_exit
-
-#undef A0
-#undef A1
-#undef B0
-#undef B1
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* !__AVR_HAVE_MUL__ */
 ;;; FIXME: This is dead code (noone calls it)
-	mov_l	r18, r24
-	mov_h	r19, r25
-	clr	r24
-	clr	r25
-	clr	r20
-	clr	r21
-	XJMP	__mulsi3
-#endif /* __AVR_HAVE_MUL__ */
+    mov_l   r18, r24
+    mov_h   r19, r25
+    clr     r24
+    clr     r25
+    mov_l   r20, r24
+    mov_h   r21, r25
+    XJMP    __mulsi3
 ENDF __umulhisi3
 #endif /* defined (L_umulhisi3) */
 
-#if defined (L_xmulhisi3_exit)
-
-;;; Helper for __mulhisi3 resp. __umulhisi3.
-
-#define C0 22
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-DEFUN __xmulhisi3_exit
-    add   C1, R0
-    adc   C2, R1
-    clr   __zero_reg__
-    adc   C3, __zero_reg__
-    ret
-ENDF __xmulhisi3_exit
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#endif /* defined (L_xmulhisi3_exit) */
-
 #if defined (L_mulsi3)
 /*******************************************************
-               Multiplication  32 x 32
+    Multiplication  32 x 32  without MUL
 *******************************************************/
 #define r_arg1L  r22		/* multiplier Low */
 #define r_arg1H  r23
 #define	r_arg1HL r24
 #define	r_arg1HH r25		/* multiplier High */
 
-
 #define	r_arg2L  r18		/* multiplicand Low */
 #define	r_arg2H  r19	
 #define	r_arg2HL r20
@@ -346,43 +223,7 @@ ENDF __xmulhisi3_exit
 #define r_resHL	 r30
 #define r_resHH  r31		/* result High */
 
-	
-	.global	__mulsi3
-	.func	__mulsi3
-__mulsi3:
-#if defined (__AVR_HAVE_MUL__)
-	mul	r_arg1L, r_arg2L
-	movw	r_resL, r0
-	mul	r_arg1H, r_arg2H
-	movw	r_resHL, r0
-	mul	r_arg1HL, r_arg2L
-	add	r_resHL, r0
-	adc	r_resHH, r1
-	mul	r_arg1L, r_arg2HL
-	add	r_resHL, r0
-	adc	r_resHH, r1
-	mul	r_arg1HH, r_arg2L
-	add	r_resHH, r0
-	mul	r_arg1HL, r_arg2H
-	add	r_resHH, r0
-	mul	r_arg1H, r_arg2HL
-	add	r_resHH, r0
-	mul	r_arg1L, r_arg2HH
-	add	r_resHH, r0
-	clr	r_arg1HH	; use instead of __zero_reg__ to add carry
-	mul	r_arg1H, r_arg2L
-	add	r_resH, r0
-	adc	r_resHL, r1
-	adc	r_resHH, r_arg1HH ; add carry
-	mul	r_arg1L, r_arg2H
-	add	r_resH, r0
-	adc	r_resHL, r1
-	adc	r_resHH, r_arg1HH ; add carry
-	movw	r_arg1L, r_resL
-	movw	r_arg1HL, r_resHL
-	clr	r1		; __zero_reg__ clobbered by "mul"
-	ret
-#else
+DEFUN __mulsi3
 	clr	r_resHH		; clear result
 	clr	r_resHL		; clear result
 	clr	r_resH		; clear result
@@ -414,13 +255,13 @@ __mulsi3_exit:
 	mov_h	r_arg1H,r_resH
 	mov_l	r_arg1L,r_resL
 	ret
-#endif /* defined (__AVR_HAVE_MUL__) */
+ENDF __mulsi3
+
 #undef r_arg1L 
 #undef r_arg1H 
 #undef r_arg1HL
 #undef r_arg1HH
              
-             
 #undef r_arg2L 
 #undef r_arg2H 
 #undef r_arg2HL
@@ -431,9 +272,183 @@ __mulsi3_exit:
 #undef r_resHL 
 #undef r_resHH 
 
-.endfunc
 #endif /* defined (L_mulsi3) */
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)    
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 16
+*******************************************************/
+                              
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+    XCALL   __umulhisi3
+    ;; Sign-extend B
+    tst     B1
+    brpl    1f
+    sub     C2, A0
+    sbc     C3, A1
+1:  ;; Sign-extend A
+    XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+    XCALL   __umulhisi3
+    ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+    ;; Sign-extend A
+    sbrs    A1, 7
+    ret
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+    mul     A0, B0
+    movw    C0, r0
+    mul     A1, B1
+    movw    C2, r0
+    mul     A0, B1
+    add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    adc     C3, __zero_reg__
+    mul     A1, B0
+    add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    adc     C3, __zero_reg__
+    ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 32
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst     A1
+    brmi    __mulohisi3
+#else
+    sbrs    A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP    __muluhisi3
+    ;; FALLTHRU
+ENDF __mulshisi3
+    
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+    XCALL   __muluhisi3
+    ;; One-extend R27:R26 (A1:A0)
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+    XCALL   __umulhisi3
+    mul     A0, B3
+    add     C3, r0
+    mul     A1, B2
+    add     C3, r0
+    mul     A0, B2
+    add     C2, r0
+    adc     C3, r1
+    clr     __zero_reg__
+    ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+    Multiplication  32 x 32
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0   * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+    movw    A0, C0
+    push    C2
+    push    C3
+    XCALL   __muluhisi3
+    pop     A1
+    pop     A0
+    ;; A1:A0 now contains the high word of A
+    mul     A0, B0
+    add     C2, r0
+    adc     C3, r1
+    mul     A0, B1
+    add     C3, r0
+    mul     A1, B0
+    add     C3, r0
+    clr     __zero_reg__
+    ret
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 	
+
 /*******************************************************
        Division 8 / 8 => (result + remainder)
 *******************************************************/
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 176624)
+++ config/avr/avr.md	(working copy)
@@ -126,12 +126,25 @@ (define_attr "length" ""
 		       (const_int 2))]
         (const_int 2)))
 
-;; Define mode iterator
+;; Define mode iterators
+(define_mode_iterator QIHI  [(QI "") (HI "")])
+(define_mode_iterator QIHI2 [(QI "") (HI "")])
 (define_mode_iterator QISI [(QI "") (HI "") (SI "")])
 (define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")])
 (define_mode_iterator HIDI [(HI "") (SI "") (DI "")])
 (define_mode_iterator HISI [(HI "") (SI "")])
 
+;; Define code iterators
+;; Define two incarnations so that we can build the cross product.
+(define_code_iterator any_extend  [sign_extend zero_extend])
+(define_code_iterator any_extend2 [sign_extend zero_extend])
+
+;; Define code attributes
+(define_code_attr extend_prefix
+  [(sign_extend "s")
+   (zero_extend "u")])
+
+
 ;;========================================================================
 ;; The following is used by nonlocal_goto and setjmp.
 ;; The receiver pattern will create no instructions since internally
@@ -1349,69 +1362,310 @@ (define_insn "*mulhi3_call"
 
 ;; Operand 2 (reg:SI 18) not clobbered on the enhanced core.
 ;; All call-used registers clobbered otherwise - normal library call.
+;;    To support widening multiplicatioon with constant we postpone
+;; expanding to the implicit library call until post combine and
+;; prior to register allocation.  Clobber all hard registers that
+;; might be used by the (widening) multiply until it is split and
+;; it's final register footprint is worked out.
+
 (define_expand "mulsi3"
-  [(set (reg:SI 22) (match_operand:SI 1 "register_operand" ""))
-   (set (reg:SI 18) (match_operand:SI 2 "register_operand" ""))
-   (parallel [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
-	      (clobber (reg:HI 26))
-	      (clobber (reg:HI 30))])
-   (set (match_operand:SI 0 "register_operand" "") (reg:SI 22))]
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (match_operand:SI 1 "register_operand" "")
+                            (match_operand:SI 2 "nonmemory_operand" "")))
+              (clobber (reg:DI 18))])]
   "AVR_HAVE_MUL"
-  "")
+  {
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
 
-(define_insn "*mulsi3_call"
-  [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
-   (clobber (reg:HI 26))
-   (clobber (reg:HI 30))]
-  "AVR_HAVE_MUL"
-  "%~call __mulsi3"
-  [(set_attr "type" "xcall")
-   (set_attr "cc" "clobber")])
+    if (o16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+  })
 
-(define_expand "mulhisi3"
-  [(set (reg:HI 18)
-        (match_operand:HI 1 "register_operand" ""))
-   (set (reg:HI 20)
-        (match_operand:HI 2 "register_operand" ""))
+(define_insn_and_split "*mulsi3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                      "=r")
+        (mult:SI (match_operand:SI 1 "pseudo_register_operand"              "r")
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:SI 18)
+        (match_dup 1))
    (set (reg:SI 22) 
-        (mult:SI (sign_extend:SI (reg:HI 18))
-                 (sign_extend:SI (reg:HI 20))))
-   (set (match_operand:SI 0 "register_operand" "") 
+        (match_dup 2))
+   (parallel [(set (reg:SI 22)
+                   (mult:SI (reg:SI 22)
+                            (reg:SI 18)))
+              (clobber (reg:HI 26))])
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+
+    if (o16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+  })
+
+;; "muluqisi3"
+;; "muluhisi3"
+(define_insn_and_split "mulu<mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
+        (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (zero_extend:SI (reg:HI 26))
+                 (reg:SI 18)))
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+       
+    if (QImode == <MODE>mode)
+      operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]);
+
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[1] = force_reg (HImode, operands[1]);
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+  })
+
+;; "mulsqisi3"
+;; "mulshisi3"
+(define_insn_and_split "muls<mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
+        (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (sign_extend:SI (reg:HI 26))
+                 (reg:SI 18)))
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+       
+    if (QImode == <MODE>mode)
+      operands[1] = gen_rtx_SIGN_EXTEND (HImode, operands[1]);
+
+    if (u16_operand (operands[2], SImode)
+        || s16_operand (operands[2], SImode))
+      {
+        rtx xop2 = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+
+        operands[1] = force_reg (HImode, operands[1]);
+
+        if (u16_operand (operands[2], SImode))
+          emit_insn (gen_usmulhisi3 (operands[0], xop2, operands[1]));
+        else
+          emit_insn (gen_mulhisi3 (operands[0], operands[1], xop2));
+
+        DONE;
+      }
+  })
+
+;; One-extend operand 1
+
+(define_insn_and_split "mulohisi3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                          "=r")
+        (mult:SI (not:SI (zero_extend:SI 
+                          (not:HI (match_operand:HI 1 "pseudo_register_operand" "r"))))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"     "rn")))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+                 (reg:SI 18)))
+   (set (match_dup 0)
         (reg:SI 22))]
+  "")
+
+(define_expand "mulhisi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+                            (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+              (clobber (reg:DI 18))])]
   "AVR_HAVE_MUL"
   "")
 
 (define_expand "umulhisi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+                            (zero_extend:SI (match_operand:HI 2 "register_operand" ""))))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  "")
+
+(define_expand "usmulhisi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+                            (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  "")
+
+;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3"
+;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3"
+;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3"
+;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3"
+(define_insn_and_split
+  "*<any_extend:extend_prefix><any_extend2:extend_prefix>mul<QIHI:mode><QIHI2:mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                            "=r")
+        (mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand"   "r"))
+                 (any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r"))))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
   [(set (reg:HI 18)
-        (match_operand:HI 1 "register_operand" ""))
-   (set (reg:HI 20)
-        (match_operand:HI 2 "register_operand" ""))
-   (set (reg:SI 22) 
-        (mult:SI (zero_extend:SI (reg:HI 18))
-                 (zero_extend:SI (reg:HI 20))))
-   (set (match_operand:SI 0 "register_operand" "") 
+        (match_dup 1))
+   (set (reg:HI 26)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (match_dup 3)
+                 (match_dup 4)))
+   (set (match_dup 0)
         (reg:SI 22))]
+  {
+    rtx xop1 = operands[1];
+    rtx xop2 = operands[2];
+
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+       
+    if (QImode == <QIHI:MODE>mode)
+      xop1 = gen_rtx_fmt_e (<any_extend:CODE>, HImode, xop1);
+
+    if (QImode == <QIHI2:MODE>mode)
+      xop2 = gen_rtx_fmt_e (<any_extend2:CODE>, HImode, xop2);
+
+    if (<any_extend:CODE> == <any_extend2:CODE>
+        || <any_extend:CODE> == ZERO_EXTEND)
+      {
+        operands[1] = xop1;
+        operands[2] = xop2;
+        operands[3] = gen_rtx_fmt_e (<any_extend:CODE>, SImode, gen_rtx_REG (HImode, 18));
+        operands[4] = gen_rtx_fmt_e (<any_extend2:CODE>, SImode, gen_rtx_REG (HImode, 26));
+      }
+    else
+      {
+        /* <any_extend:CODE>  = SIGN_EXTEND */
+        /* <any_extend2:CODE> = ZERO_EXTEND */
+
+        operands[1] = xop2;
+        operands[2] = xop1;
+        operands[3] = gen_rtx_ZERO_EXTEND (SImode, gen_rtx_REG (HImode, 18));
+        operands[4] = gen_rtx_SIGN_EXTEND (SImode, gen_rtx_REG (HImode, 26));
+      }
+  })
+
+(define_insn "*mulsi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (reg:SI 22)
+                 (reg:SI 18)))
+   (clobber (reg:HI 26))]
   "AVR_HAVE_MUL"
-  "")
+  "%~call __mulsi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
 
 (define_insn "*mulhisi3_call"
-  [(set (reg:SI 22) 
+  [(set (reg:SI 22)
         (mult:SI (sign_extend:SI (reg:HI 18))
-                 (sign_extend:SI (reg:HI 20))))]
+                 (sign_extend:SI (reg:HI 26))))]
   "AVR_HAVE_MUL"
   "%~call __mulhisi3"
   [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])
 
 (define_insn "*umulhisi3_call"
-  [(set (reg:SI 22) 
+  [(set (reg:SI 22)
         (mult:SI (zero_extend:SI (reg:HI 18))
-                 (zero_extend:SI (reg:HI 20))))]
+                 (zero_extend:SI (reg:HI 26))))]
   "AVR_HAVE_MUL"
   "%~call __umulhisi3"
   [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])
 
+(define_insn "*usmulhisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (zero_extend:SI (reg:HI 18))
+                 (sign_extend:SI (reg:HI 26))))]
+  "AVR_HAVE_MUL"
+  "%~call __usmulhisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*muluhisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (zero_extend:SI (reg:HI 26))
+                 (reg:SI 18)))]
+  "AVR_HAVE_MUL"
+  "%~call __muluhisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mulshisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (sign_extend:SI (reg:HI 26))
+                 (reg:SI 18)))]
+  "AVR_HAVE_MUL"
+  "%~call __mulshisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mulohisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+                 (reg:SI 18)))]
+  "AVR_HAVE_MUL"
+  "%~call __mulohisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
 ; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / %
 ; divmod
 
@@ -2399,9 +2653,16 @@ (define_insn "one_cmplsi2"
 ;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
 ;; sign extend
 
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends.  A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't be combined to a widening
+;; multiplication.  There is no need for combine to propagate hard registers,
+;; register allocation can do it just as well.
+
 (define_insn "extendqihi2"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
-        (sign_extend:HI (match_operand:QI 1 "register_operand" "0,*r")))]
+        (sign_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
   ""
   "@
 	clr %B0\;sbrc %0,7\;com %B0
@@ -2411,7 +2672,7 @@ (define_insn "extendqihi2"
 
 (define_insn "extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-        (sign_extend:SI (match_operand:QI 1 "register_operand" "0,*r")))]
+        (sign_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
   ""
   "@
 	clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0
@@ -2420,8 +2681,8 @@ (define_insn "extendqisi2"
    (set_attr "cc" "set_n,set_n")])
 
 (define_insn "extendhisi2"
-  [(set (match_operand:SI 0 "register_operand"               "=r,&r")
-        (sign_extend:SI (match_operand:HI 1 "register_operand" "0,*r")))]
+  [(set (match_operand:SI 0 "register_operand"                               "=r,r")
+        (sign_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r")))]
   ""
   "@
 	clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0
@@ -2438,7 +2699,7 @@ (define_insn "extendhisi2"
 
 (define_insn_and_split "zero_extendqihi2"
   [(set (match_operand:HI 0 "register_operand" "=r")
-        (zero_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+        (zero_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
   ""
   "#"
   "reload_completed"
@@ -2454,7 +2715,7 @@ (define_insn_and_split "zero_extendqihi2
 
 (define_insn_and_split "zero_extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+        (zero_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
   ""
   "#"
   "reload_completed"
@@ -2469,8 +2730,8 @@ (define_insn_and_split "zero_extendqisi2
 })
 
 (define_insn_and_split "zero_extendhisi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  [(set (match_operand:SI 0 "register_operand"                               "=r")
+        (zero_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))]
   ""
   "#"
   "reload_completed"
Index: config/avr/t-avr
===================================================================
--- config/avr/t-avr	(revision 176624)
+++ config/avr/t-avr	(working copy)
@@ -41,7 +41,9 @@ LIB1ASMFUNCS = \
 	_mulhi3 \
 	_mulhisi3 \
 	_umulhisi3 \
-	_xmulhisi3_exit \
+	_usmulhisi3 \
+	_muluhisi3 \
+	_mulshisi3 \
 	_mulsi3 \
 	_udivmodqi4 \
 	_divmodqi4 \
Index: config/avr/avr.c
===================================================================
--- config/avr/avr.c	(revision 176624)
+++ config/avr/avr.c	(working copy)
@@ -5512,6 +5512,34 @@ avr_rtx_costs (rtx x, int codearg, int o
 	    return false;
 	  break;
 
+	case SImode:
+	  if (AVR_HAVE_MUL)
+            {
+              if (!speed)
+                {
+                  /* Add some additional costs besides CALL like moves etc.  */
+
+                  *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+                }
+              else
+                {
+                  /* Just a rough estimate.  Even with -O2 we don't want bulky
+                     code expanded inline.  */
+
+                  *total = COSTS_N_INSNS (25);
+                }
+            }
+          else
+            {
+              if (speed)
+                *total = COSTS_N_INSNS (300);
+              else
+                /* Add some additional costs besides CALL like moves etc.  */
+                *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+            }
+          
+          return true;
+          
 	default:
 	  return false;
 	}

Reply via email to