It is easy enough to use mov instead of or-with-zero and relying on the optimizer to fold away the or.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/arm/translate-a64.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 1726ec622d..16aa71451c 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -8459,7 +8459,7 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, tcg_rn = tcg_temp_new_i64(); tcg_rd = tcg_temp_new_i64(); tcg_rd_narrowed = tcg_temp_new_i32(); - tcg_final = tcg_const_i64(0); + tcg_final = tcg_temp_new_i64(); if (round) { tcg_round = tcg_constant_i64(1ULL << (shift - 1)); @@ -8473,7 +8473,11 @@ static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, false, is_u_shift, size+1, shift); narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); - tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + if (i == 0) { + tcg_gen_mov_i64(tcg_final, tcg_rd); + } else { + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } } if (!is_q) { -- 2.34.1