x86 cannot provide an optimized generic deposit implementation. But at least for a few special cases, namely for writing bits 0..7, 8..15, and 0..15, a version using only a single instruction is feasible. Introducing such helpers improves emulating 16-bit x86 code on x86, but also rarer cases where 32-bit or 64-bit code accesses bytes or words.
Signed-off-by: Jan Kiszka <jan.kis...@siemens.com> --- tcg/i386/tcg-target.c | 25 +++++++++++++++++++++++++ tcg/i386/tcg-target.h | 6 ++++++ tcg/tcg-op.h | 12 ++++++++++++ tcg/tcg-opc.h | 6 ++++++ tcg/tcg.h | 3 +++ 5 files changed, 52 insertions(+), 0 deletions(-) diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 281f87d..3f9d0ad 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1747,6 +1747,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; #endif + OP_32_64(deposit8l): + tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, + args[2], args[0]); + break; + + OP_32_64(deposit8h): + tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4); + break; + + OP_32_64(deposit16l): + tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]); + break; + default: tcg_abort(); } @@ -1802,6 +1815,14 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_setcond_i32, { "q", "r", "ri" } }, +#if TCG_TARGET_REG_BITS == 64 + { INDEX_op_deposit8l_i32, { "r", "0", "r" } }, +#else + { INDEX_op_deposit8l_i32, { "abcd", "0", "abcd" } }, +#endif + { INDEX_op_deposit8h_i32, { "abcd", "0", "abcd" } }, + { INDEX_op_deposit16l_i32, { "r", "0", "r" } }, + #if TCG_TARGET_REG_BITS == 32 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } }, { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, @@ -1853,6 +1874,10 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ext8u_i64, { "r", "r" } }, { INDEX_op_ext16u_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + + { INDEX_op_deposit8l_i64, { "r", "0", "r" } }, + { INDEX_op_deposit8h_i64, { "abcd", "0", "abcd" } }, + { INDEX_op_deposit16l_i64, { "r", "0", "r" } }, #endif #if TCG_TARGET_REG_BITS == 64 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 5088e47..54b2f60 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -91,6 +91,9 @@ enum { #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_deposit_i32 0 +#define TCG_TARGET_HAS_deposit8l_i32 1 +#define TCG_TARGET_HAS_deposit8h_i32 1 +#define TCG_TARGET_HAS_deposit16l_i32 1 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 1 @@ -112,6 +115,9 @@ enum { #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_deposit8l_i64 1 +#define TCG_TARGET_HAS_deposit8h_i64 1 +#define TCG_TARGET_HAS_deposit16l_i64 1 #endif #define TCG_TARGET_HAS_GUEST_BASE diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 404b637..80ffccb 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -2047,6 +2047,12 @@ static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, { if (TCG_TARGET_HAS_deposit_i32) { tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, arg1, arg2, ofs, len); + } else if (ofs == 0 && len == 8 && TCG_TARGET_HAS_deposit8l_i32) { + tcg_gen_op3_i32(INDEX_op_deposit8l_i32, ret, arg1, arg2); + } else if (ofs == 8 && len == 8 && TCG_TARGET_HAS_deposit8h_i32) { + tcg_gen_op3_i32(INDEX_op_deposit8h_i32, ret, arg1, arg2); + } else if (ofs == 0 && len == 16 && TCG_TARGET_HAS_deposit16l_i32) { + tcg_gen_op3_i32(INDEX_op_deposit16l_i32, ret, arg1, arg2); } else { uint32_t mask = (1u << len) - 1; TCGv_i32 t1 = tcg_temp_new_i32 (); @@ -2066,6 +2072,12 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, { if (TCG_TARGET_HAS_deposit_i64) { tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, arg1, arg2, ofs, len); + } else if (ofs == 0 && len == 8 && TCG_TARGET_HAS_deposit8l_i64) { + tcg_gen_op3_i32(INDEX_op_deposit8l_i64, ret, arg1, arg2); + } else if (ofs == 8 && len == 8 && TCG_TARGET_HAS_deposit8h_i64) { + tcg_gen_op3_i32(INDEX_op_deposit8h_i64, ret, arg1, arg2); + } else if (ofs == 0 && len == 16 && TCG_TARGET_HAS_deposit16l_i64) { + tcg_gen_op3_i32(INDEX_op_deposit16l_i64, ret, arg1, arg2); } else { uint64_t mask = (1ull << len) - 1; TCGv_i64 t1 = tcg_temp_new_i64 (); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 8e06d03..0871d15 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -80,6 +80,9 @@ DEF(sar_i32, 1, 2, 0, 0) DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) +DEF(deposit8l_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_deposit8l_i32)) +DEF(deposit8h_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_deposit8h_i32)) +DEF(deposit16l_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_deposit16l_i32)) DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) @@ -139,6 +142,9 @@ DEF(sar_i64, 1, 2, 0, IMPL64) DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) +DEF(deposit8l_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_deposit8l_i64)) +DEF(deposit8h_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_deposit8h_i64)) +DEF(deposit16l_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_deposit16l_i64)) DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS | IMPL64) DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) diff --git a/tcg/tcg.h b/tcg/tcg.h index dc5e9c9..9a95d2b 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -69,6 +69,9 @@ typedef uint64_t TCGRegSet; #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_deposit_i64 0 +#define TCG_TARGET_HAS_deposit8l_i64 0 +#define TCG_TARGET_HAS_deposit8h_i64 0 +#define TCG_TARGET_HAS_deposit16l_i64 0 #endif /* Only one of DIV or DIV2 should be defined. */ -- 1.7.3.4