On 2/5/24 18:55, Richard Henderson wrote:
The unit operation for fmul8x16 and friends is described in the
manual as "MS16b".  Split that out for clarity.  Improve rounding
with an unconditional addition of 0.5 as a fixed-point integer.

Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
---
  target/sparc/vis_helper.c | 78 ++++++++++++---------------------------
  1 file changed, 24 insertions(+), 54 deletions(-)


@@ -150,23 +138,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2)
  uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
  {
      VIS64 s, d;
-    uint32_t tmp;
s.ll = src1;
      d.ll = src2;
-#define PMUL(r) \
-    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
+    d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0));

s.VIS_SB64(1) = upper bit, OK.

+    d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1));
+    d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2));
+    d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3));
return d.ll;
  }
@@ -174,23 +153,14 @@ uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
  uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
  {
      VIS64 s, d;
-    uint32_t tmp;
s.ll = src1;
      d.ll = src2;
-#define PMUL(r) \
-    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
-    if ((tmp & 0xff) > 0x7f) {                                          \
-        tmp += 0x100;                                                   \
-    }                                                                   \
-    d.VIS_W64(r) = tmp >> 8;
-
-    PMUL(0);
-    PMUL(1);
-    PMUL(2);
-    PMUL(3);
-#undef PMUL
+    d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0));

s.VIS_B64(0) for lower bit, OK.

+    d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1));
+    d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2));
+    d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3));
return d.ll;
  }

Maybe add a comment for high/low bits in fmul8sux16/fmul8ulx16,
as it was not obvious at first. Otherwise,

Reviewed-by: Philippe Mathieu-Daudé <phi...@linaro.org>


Reply via email to