Richard Henderson <r...@twiddle.net> writes: > On 09/27/2016 10:31 PM, Nikunj A Dadhania wrote: >> +DEF_HELPER_1(bswap16x4, i64, i64) > > DEF_HELPER_FLAGS_1(bswap16x4, TCG_CALL_NO_RWG_SE, i64, i64) > >> + uint64_t m = 0x00ff00ff00ff00ffull; >> + return ((x & m) << 8) | ((x >> 8) & m); > > ... although I suppose this is only 5 instructions, and could reasonably be > done inline too. Especially if you shared the one 64-bit constant across the > two bswaps.
Something like this: static void gen_bswap16x4(TCGv_i64 val) { TCGv_i64 mask = tcg_const_i64(0x00FF00FF00FF00FF); TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); /* val = ((val & mask) << 8) | ((val >> 8) & mask) */ tcg_gen_and_i64(t0, val, mask); tcg_gen_shri_i64(t0, t0, 8); tcg_gen_shli_i64(t1, val, 8); tcg_gen_and_i64(t1, t1, mask); tcg_gen_or_i64(val, t0, t1); tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); tcg_temp_free_i64(mask); } > > >> + if (ctx->le_mode) { >> + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); >> + gen_helper_bswap16x4(xth, xth); >> + tcg_gen_addi_tl(EA, EA, 8); >> + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); >> + gen_helper_bswap16x4(xtl, xtl); >> + } else { >> + tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); >> + tcg_gen_addi_tl(EA, EA, 8); >> + tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); >> + } > > Better to not duplicate this. > > tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); > tcg_gen_addi_tl(EA, EA, 8); > tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); > if (ctx->le_mode) { > gen_helper_bswap16x4(xth, xth); > gen_helper_bswap16x4(xtl, xtl); > } Sure, much better, thanks. Regards Nikunj