On Sun, 2007-11-04 at 02:24 +0000, Jocelyn Mayer wrote:
> CVSROOT:      /sources/qemu
> Module name:  qemu
> Changes by:   Jocelyn Mayer <j_mayer> 07/11/04 02:24:58
> 
> Modified files:
>       .              : exec-all.h host-utils.c host-utils.h 
>       target-alpha   : op.c 
>       target-i386    : helper.c 
> 
> Log message:
>       For consistency, move muls64 / mulu64 prototypes to host-utils.h
>       Make x86_64 optimized versions inline.

Following this patch, I also got optimized versions of muls64 / mulu64 /
clz64 for PowerPC 64 and clz32 for PowerPC 32 hosts.
Seems like it could be useful...

-- 
J. Mayer <[EMAIL PROTECTED]>
Never organized
Index: host-utils.h
===================================================================
RCS file: /sources/qemu/qemu/host-utils.h,v
retrieving revision 1.3
diff -u -d -d -p -r1.3 host-utils.h
--- host-utils.h	4 Nov 2007 02:24:57 -0000	1.3
+++ host-utils.h	4 Nov 2007 02:26:34 -0000
@@ -40,6 +40,25 @@ static always_inline void muls64 (uint64
              : "=d" (*phigh), "=a" (*plow)
              : "a" (a), "0" (b));
 }
+#elif defined(__powerpc64__)
+#define __HAVE_FAST_MULU64__
+static always_inline void mulu64 (uint64_t *plow, uint64_t *phigh,
+                                  uint64_t a, uint64_t b)
+{
+    __asm__ ("mulld %1, %2, %3  \n\t"
+             "mulhdu %0, %2, %3 \n\t"
+             : "=r"(*phigh), "=r"(*plow)
+             : "r"(a), "r"(b));
+}
+#define __HAVE_FAST_MULS64__
+static always_inline void muls64 (uint64_t *plow, uint64_t *phigh,
+                                  uint64_t a, uint64_t b)
+{
+    __asm__ ("mulld %1, %2, %3 \n\t"
+             "mulhd %0, %2, %3 \n\t"
+             : "=r"(*phigh), "=r"(*plow)
+             : "r"(a), "r"(b));
+}
 #else
 void muls64(int64_t *phigh, int64_t *plow, int64_t a, int64_t b);
 void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
@@ -50,7 +69,19 @@ void mulu64(uint64_t *phigh, uint64_t *p
    cope with that. */
 
 /* Binary search for leading zeros.  */
+#if defined(__powerpc__)
+#define __HAVE_FAST_CLZ32__
+static always_inline int clz32 (uint32_t val)
+{
+    int cnt;
+
+    __asm__ ("cntlzw %0, %1 \n\t"
+             : "=r"(cnt)
+             : "r"(val));
 
+    return cnt;
+}
+#else
 static always_inline int clz32(uint32_t val)
 {
     int cnt = 0;
@@ -80,12 +111,26 @@ static always_inline int clz32(uint32_t 
     }
     return cnt;
 }
+#endif
 
 static always_inline int clo32(uint32_t val)
 {
     return clz32(~val);
 }
 
+#if defined(__powerpc64__)
+#define __HAVE_FAST_CLZ64__
+static always_inline int clz64 (uint32_t val)
+{
+    int cnt;
+
+    __asm__ ("cntlzd %0, %1 \n\t"
+             : "=r"(cnt)
+             : "r"(val));
+
+    return cnt;
+}
+#else
 static always_inline int clz64(uint64_t val)
 {
     int cnt = 0;
@@ -98,6 +143,7 @@ static always_inline int clz64(uint64_t 
 
     return cnt + clz32(val);
 }
+#endif
 
 static always_inline int clo64(uint64_t val)
 {

Reply via email to