In both cases, we can sink the write-back and perform the accumulate into the normal destination temps.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/translate-neon.c.inc | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/target/arm/translate-neon.c.inc b/target/arm/translate-neon.c.inc index 7a0269970c..7cd41c79ec 100644 --- a/target/arm/translate-neon.c.inc +++ b/target/arm/translate-neon.c.inc @@ -2037,17 +2037,14 @@ static bool do_long_3d(DisasContext *s, arg_3diff *a, if (accfn) { tmp = tcg_temp_new_i64(); read_neon_element64(tmp, a->vd, 0, MO_64); - accfn(tmp, tmp, rd0); - write_neon_element64(tmp, a->vd, 0, MO_64); + accfn(rd0, tmp, rd0); read_neon_element64(tmp, a->vd, 1, MO_64); - accfn(tmp, tmp, rd1); - write_neon_element64(tmp, a->vd, 1, MO_64); + accfn(rd1, tmp, rd1); tcg_temp_free_i64(tmp); - } else { - write_neon_element64(rd0, a->vd, 0, MO_64); - write_neon_element64(rd1, a->vd, 1, MO_64); } + write_neon_element64(rd0, a->vd, 0, MO_64); + write_neon_element64(rd1, a->vd, 1, MO_64); tcg_temp_free_i64(rd0); tcg_temp_free_i64(rd1); @@ -2670,16 +2667,14 @@ static bool do_2scalar_long(DisasContext *s, arg_2scalar *a, if (accfn) { TCGv_i64 t64 = tcg_temp_new_i64(); read_neon_element64(t64, a->vd, 0, MO_64); - accfn(t64, t64, rn0_64); - write_neon_element64(t64, a->vd, 0, MO_64); + accfn(rn0_64, t64, rn0_64); read_neon_element64(t64, a->vd, 1, MO_64); - accfn(t64, t64, rn1_64); - write_neon_element64(t64, a->vd, 1, MO_64); + accfn(rn1_64, t64, rn1_64); tcg_temp_free_i64(t64); - } else { - write_neon_element64(rn0_64, a->vd, 0, MO_64); - write_neon_element64(rn1_64, a->vd, 1, MO_64); } + + write_neon_element64(rn0_64, a->vd, 0, MO_64); + write_neon_element64(rn1_64, a->vd, 1, MO_64); tcg_temp_free_i64(rn0_64); tcg_temp_free_i64(rn1_64); return true; -- 2.25.1