Hi, All !
Here is the source for the __muldi3 for MCUs with HW multiplier:
-------------------------------Source begins--------------------------------
#define r_reshh r11 /* res = arg1 * arg2 */
#define r_reshl r10
#define r_reslh r9
#define r_resll r8
#define r_arg1hh r15 /* arg1 */
#define r_arg1hl r14
#define r_arg1lh r13
#define r_arg1ll r12
#define r_arg2hh r7 /* arg2 */
#define r_arg2hl r6
#define r_arg2lh r5
#define r_arg2ll r4
.global __muldi3
.func __muldi3
__muldi3:
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
mov 18+0(r1), r_arg2ll ; 18 is a stack offset
mov 18+2(r1), r_arg2lh ; so move arg 2 in.
mov 18+4(r1), r_arg2hl
mov 18+6(r1), r_arg2hh
;; r15:r14:r13:r12 * r7:r6:r5:r4 -> r11:r10:r9:r8
;; actual code follows....
mov r_arg1ll,&__MPYS
mov r_arg2ll,&__OP2 ;; LL1xLL2
mov &__RESLO,r_resll
mov &__RESHI,&__RESLO
mov &__SUMEXT,&__RESHI
mov r_arg1ll,&__MACS
mov r_arg2lh,&__OP2 ;; LL1xLH2
mov r_arg1lh,&__MACS
mov r_arg2ll,&__OP2 ;; LH1xLL2
mov &__RESLO,r_reslh
mov &__RESHI,&__RESLO
mov &__SUMEXT,&__RESHI
mov r_arg2lh,&__OP2 ;; LH1xLH2
mov r_arg1ll,&__MACS
mov r_arg2hl,&__OP2 ;; LL1xHL2
mov r_arg1hl,&__MACS
mov r_arg2ll,&__OP2 ;; HL1xLL2
mov &__RESLO,r_reshl
mov &__RESHI,&__RESLO
mov r_arg2lh,&__OP2 ;; HL1xLH2
mov r_arg1ll,&__MACS
mov r_arg2hh,&__OP2 ;; LL1xHH2
mov r_arg1lh,&__MACS
mov r_arg2hl,&__OP2 ;; LH1xHL2
mov r_arg1hh,&__MACS
mov r_arg2ll,&__OP2 ;; HH1xLL2
mov &__RESLO,r_reshh
;; reload result
mov r_resll, r12
mov r_reslh, r13
mov r_reshl, r14
mov r_reshh, r15
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
ret
.endfunc
--------------------------------------------Source
Ends--------------------------------------------
I have made a little test 1234567890*1234567890=1524157875019052100.
I have not made many tests since I have found a simple and better solution
of my problem.
So now I do not need 64 bit arish :) (I use a little inline assemly and 32
bit arish).
All the best !
Oleg.