On 2/5/24 18:55, Richard Henderson wrote:
The unit operation for fmul8x16 and friends is described in the
manual as "MS16b". Split that out for clarity. Improve rounding
with an unconditional addition of 0.5 as a fixed-point integer.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
---
target/sparc/vis_helper.c | 78 ++++++++++++---------------------------
1 file changed, 24 insertions(+), 54 deletions(-)
@@ -150,23 +138,14 @@ uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2)
uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
{
VIS64 s, d;
- uint32_t tmp;
s.ll = src1;
d.ll = src2;
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
+ d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0));
s.VIS_SB64(1) = upper bit, OK.
+ d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1));
+ d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2));
+ d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3));
return d.ll;
}
@@ -174,23 +153,14 @@ uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
{
VIS64 s, d;
- uint32_t tmp;
s.ll = src1;
d.ll = src2;
-#define PMUL(r) \
- tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \
- if ((tmp & 0xff) > 0x7f) { \
- tmp += 0x100; \
- } \
- d.VIS_W64(r) = tmp >> 8;
-
- PMUL(0);
- PMUL(1);
- PMUL(2);
- PMUL(3);
-#undef PMUL
+ d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0));
s.VIS_B64(0) for lower bit, OK.
+ d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1));
+ d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2));
+ d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3));
return d.ll;
}
Maybe add a comment for high/low bits in fmul8sux16/fmul8ulx16,
as it was not obvious at first. Otherwise,
Reviewed-by: Philippe Mathieu-Daudé <phi...@linaro.org>