J. Mayer wrote: > > On Wed, 2007-10-24 at 12:20 +0200, Fabrice Bellard wrote: > > I strongly suggest to reuse my code which was in target-i386/helper.c > > revision 1.80 which was far easier to validate. Moreover, integer > > divisions from target-i386/helper.c should be put in the same file. > > I fully agree with this. I still use the same code in the PowerPC > op_helper.c file because I never conviced myself that the host_utils > version was bug-free. I would likely switch to the common version if I > could be sure it cannot lead to any regression.
Like this? Questions/Comments I have: - Is the BSD-style copyright still ok for this version? - The x86-64 assembler is untested for this version, could you check it works for you? - SPARC and Alpha look like they will break on 32bit hosts, they should do multiplications the same way as the other 64bit targets. Thiemo Index: qemu-cvs/host-utils.c =================================================================== --- qemu-cvs.orig/host-utils.c +++ qemu-cvs/host-utils.c @@ -1,6 +1,8 @@ /* * Utility compute operations used by translated code. * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2003-2007 Jocelyn Mayer * Copyright (c) 2007 Aurelien Jarno * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -24,54 +26,90 @@ #include "vl.h" -/* Signed 64x64 -> 128 multiplication */ +#define DEBUG_MULDEV -void muls64(int64_t *phigh, int64_t *plow, int64_t a, int64_t b) +/* Long integer helpers */ +static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) { -#if defined(__x86_64__) - __asm__ ("imul %0\n\t" - : "=d" (*phigh), "=a" (*plow) - : "a" (a), "0" (b) - ); -#else - int64_t ph; - uint64_t pm1, pm2, pl; + *plow += a; + /* carry test */ + if (*plow < a) + (*phigh)++; + *phigh += b; +} + +static void neg128 (uint64_t *plow, uint64_t *phigh) +{ + *plow = ~*plow; + *phigh = ~*phigh; + add128(plow, phigh, 1, 0); +} - pl = (uint64_t)((uint32_t)a) * (uint64_t)((uint32_t)b); - pm1 = (a >> 32) * (uint32_t)b; - pm2 = (uint32_t)a * (b >> 32); - ph = (a >> 32) * (b >> 32); - - ph += (int64_t)pm1 >> 32; - ph += (int64_t)pm2 >> 32; - pm1 = (uint64_t)((uint32_t)pm1) + (uint64_t)((uint32_t)pm2) + (pl >> 32); +static void mul64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) +{ + uint32_t a0, a1, b0, b1; + uint64_t v; - *phigh = ph + ((int64_t)pm1 >> 32); - *plow = (pm1 << 32) + (uint32_t)pl; -#endif + a0 = a; + a1 = a >> 32; + + b0 = b; + b1 = b >> 32; + + v = (uint64_t)a0 * (uint64_t)b0; + *plow = v; + *phigh = 0; + + v = (uint64_t)a0 * (uint64_t)b1; + add128(plow, phigh, v << 32, v >> 32); + + v = (uint64_t)a1 * (uint64_t)b0; + add128(plow, phigh, v << 32, v >> 32); + + v = (uint64_t)a1 * (uint64_t)b1; + *phigh += v; } + /* Unsigned 64x64 -> 128 multiplication */ -void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b) +void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) { #if defined(__x86_64__) __asm__ ("mul %0\n\t" : "=d" (*phigh), "=a" (*plow) - : "a" (a), "0" (b) - ); + : "a" (a), "0" (b)); #else - uint64_t ph, pm1, pm2, pl; + mul64(plow, phigh, a, b); +#endif +#if defined(DEBUG_MULDIV) + printf("mulu64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n", + a, b, *phigh, *plow); +#endif +} - pl = (uint64_t)((uint32_t)a) * (uint64_t)((uint32_t)b); - pm1 = (a >> 32) * (uint32_t)b; - pm2 = (uint32_t)a * (b >> 32); - ph = (a >> 32) * (b >> 32); - - ph += pm1 >> 32; - ph += pm2 >> 32; - pm1 = (uint64_t)((uint32_t)pm1) + (uint64_t)((uint32_t)pm2) + (pl >> 32); +/* Signed 64x64 -> 128 multiplication */ +void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) +{ +#if defined(__x86_64__) + __asm__ ("imul %0\n\t" + : "=d" (*phigh), "=a" (*plow) + : "a" (a), "0" (b)); +#else + int sa, sb; - *phigh = ph + (pm1 >> 32); - *plow = (pm1 << 32) + (uint32_t)pl; + sa = (a < 0); + if (sa) + a = -a; + sb = (b < 0); + if (sb) + b = -b; + mul64(plow, phigh, a, b); + if (sa ^ sb) { + neg128(plow, phigh); + } +#endif +#if defined(DEBUG_MULDIV) + printf("muls64: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n", + a, b, *phigh, *plow); #endif } Index: qemu-cvs/target-ppc/op_helper.c =================================================================== --- qemu-cvs.orig/target-ppc/op_helper.c +++ qemu-cvs/target-ppc/op_helper.c @@ -217,75 +217,14 @@ /*****************************************************************************/ /* Fixed point operations helpers */ #if defined(TARGET_PPC64) -static void add128 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) -{ - *plow += a; - /* carry test */ - if (*plow < a) - (*phigh)++; - *phigh += b; -} - -static void neg128 (uint64_t *plow, uint64_t *phigh) -{ - *plow = ~*plow; - *phigh = ~*phigh; - add128(plow, phigh, 1, 0); -} - -static void mul64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) -{ - uint32_t a0, a1, b0, b1; - uint64_t v; - - a0 = a; - a1 = a >> 32; - - b0 = b; - b1 = b >> 32; - - v = (uint64_t)a0 * (uint64_t)b0; - *plow = v; - *phigh = 0; - - v = (uint64_t)a0 * (uint64_t)b1; - add128(plow, phigh, v << 32, v >> 32); - - v = (uint64_t)a1 * (uint64_t)b0; - add128(plow, phigh, v << 32, v >> 32); - - v = (uint64_t)a1 * (uint64_t)b1; - *phigh += v; -#if defined(DEBUG_MULDIV) - printf("mul: 0x%016llx * 0x%016llx = 0x%016llx%016llx\n", - a, b, *phigh, *plow); -#endif -} - void do_mul64 (uint64_t *plow, uint64_t *phigh) { - mul64(plow, phigh, T0, T1); -} - -static void imul64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) -{ - int sa, sb; - - sa = (a < 0); - if (sa) - a = -a; - sb = (b < 0); - if (sb) - b = -b; - mul64(plow, phigh, a, b); - if (sa ^ sb) { - neg128(plow, phigh); - } + mulu64(plow, phigh, T0, T1); } void do_imul64 (uint64_t *plow, uint64_t *phigh) { - imul64(plow, phigh, T0, T1); + muls64(plow, phigh, T0, T1); } #endif Index: qemu-cvs/exec-all.h =================================================================== --- qemu-cvs.orig/exec-all.h +++ qemu-cvs/exec-all.h @@ -91,8 +91,8 @@ extern FILE *logfile; extern int loglevel; -void muls64(int64_t *phigh, int64_t *plow, int64_t a, int64_t b); -void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b); +void muls64(int64_t *plow, int64_t *phigh, int64_t a, int64_t b); +void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); int gen_intermediate_code(CPUState *env, struct TranslationBlock *tb); int gen_intermediate_code_pc(CPUState *env, struct TranslationBlock *tb); Index: qemu-cvs/target-i386/helper.c =================================================================== --- qemu-cvs.orig/target-i386/helper.c +++ qemu-cvs/target-i386/helper.c @@ -3735,7 +3735,7 @@ { uint64_t r0, r1; - muls64(&r1, &r0, EAX, T0); + muls64(&r0, &r1, EAX, T0); EAX = r0; EDX = r1; CC_DST = r0; @@ -3746,7 +3746,7 @@ { uint64_t r0, r1; - muls64(&r1, &r0, T0, T1); + muls64(&r0, &r1, T0, T1); T0 = r0; CC_DST = r0; CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63)); Index: qemu-cvs/target-mips/op.c =================================================================== --- qemu-cvs.orig/target-mips/op.c +++ qemu-cvs/target-mips/op.c @@ -876,13 +876,13 @@ #if defined(TARGET_MIPSN32) || defined(TARGET_MIPS64) void op_dmult (void) { - CALL_FROM_TB4(muls64, &(env->HI[0][env->current_tc]), &(env->LO[0][env->current_tc]), T0, T1); + CALL_FROM_TB4(muls64, &(env->LO[0][env->current_tc]), &(env->HI[0][env->current_tc]), T0, T1); RETURN(); } void op_dmultu (void) { - CALL_FROM_TB4(mulu64, &(env->HI[0][env->current_tc]), &(env->LO[0][env->current_tc]), T0, T1); + CALL_FROM_TB4(mulu64, &(env->LO[0][env->current_tc]), &(env->HI[0][env->current_tc]), T0, T1); RETURN(); } #endif