Re: [PATCH 6/6] target/mips: Convert Loongson [D]MULT[U].G opcodes to decodetree
On 31/8/23 21:12, Philippe Mathieu-Daudé wrote: On 21/1/21 21:06, Richard Henderson wrote: On 1/12/21 11:55 AM, Philippe Mathieu-Daudé wrote: Convert the following opcodes to decodetree: - MULT.G - multiply 32-bit signed integers - MULTU.G - multiply 32-bit unsigned integers - DMULT.G - multiply 64-bit signed integers - DMULTU.G - multiply 64-bit unsigned integers Now that all opcodes from the extension have been converted, we can remove completely gen_loongson_integer() and its 2 calls in decode_opc_special2_legacy() and decode_opc_special3_legacy(). Signed-off-by: Philippe Mathieu-Daudé --- target/mips/godson2.decode | 5 ++ target/mips/loong-ext.decode | 5 ++ target/mips/loong_translate.c | 58 ++ target/mips/translate.c | 92 +-- 4 files changed, 70 insertions(+), 90 deletions(-) +static bool gen_lext_MULT_G(DisasContext *s, int rd, int rs, int rt, + bool is_double, bool is_unsigned) +{ + TCGv t0, t1; + + if (is_double) { + if (TARGET_LONG_BITS != 64) { + return false; + } + check_mips_64(s); + } + + if (rd == 0) { + /* Treat as NOP. */ + return true; + } + + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + + gen_load_gpr(t0, rs); + gen_load_gpr(t1, rt); + + if (is_unsigned && !is_double) { + tcg_gen_ext32u_tl(t0, t0); + tcg_gen_ext32u_tl(t1, t1); + } While this is a faithful conversion of the existing code, these extensions make no difference to the result. They are redundant with + tcg_gen_mul_tl(cpu_gpr[rd], t0, t1); + if (!is_double) { + tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]); this one, which discards any bit that might have been set by the input bits that are cleared. I see. There is no actual difference between MULT.G and MULTU.G, or DMULT.G and DMULTU.G, because they don't record the most significant bits of the infinite result in any way. Right. +static bool trans_MULT_G(DisasContext *s, arg_muldiv *a) +{ + return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, false); +} + +static bool trans_MULTU_G(DisasContext *s, arg_muldiv *a) +{ + return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, true); +} + +static bool trans_DMULT_G(DisasContext *s, arg_muldiv *a) +{ + return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, false); +} + +static bool trans_DMULTU_G(DisasContext *s, arg_muldiv *a) +{ + return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, true); +} So... if you want to clean this up afterward, or before is up to you. "before" ended being way simpler :>
Re: [PATCH 6/6] target/mips: Convert Loongson [D]MULT[U].G opcodes to decodetree
On 21/1/21 21:06, Richard Henderson wrote: On 1/12/21 11:55 AM, Philippe Mathieu-Daudé wrote: Convert the following opcodes to decodetree: - MULT.G - multiply 32-bit signed integers - MULTU.G - multiply 32-bit unsigned integers - DMULT.G - multiply 64-bit signed integers - DMULTU.G - multiply 64-bit unsigned integers Now that all opcodes from the extension have been converted, we can remove completely gen_loongson_integer() and its 2 calls in decode_opc_special2_legacy() and decode_opc_special3_legacy(). Signed-off-by: Philippe Mathieu-Daudé --- target/mips/godson2.decode| 5 ++ target/mips/loong-ext.decode | 5 ++ target/mips/loong_translate.c | 58 ++ target/mips/translate.c | 92 +-- 4 files changed, 70 insertions(+), 90 deletions(-) diff --git a/target/mips/godson2.decode b/target/mips/godson2.decode index 805452fa975..cf12d9072ec 100644 --- a/target/mips/godson2.decode +++ b/target/mips/godson2.decode @@ -13,6 +13,11 @@ @rs_rt_rd .. rs:5 rt:5 rd:5 . .. &muldiv +MULT.G 01 . . . 0 011000 @rs_rt_rd +MULTU.G 01 . . . 0 011001 @rs_rt_rd +DMULT.G 01 . . . 0 011100 @rs_rt_rd +DMULTU.G01 . . . 0 011101 @rs_rt_rd + DIV.G 01 . . . 0 011010 @rs_rt_rd DIVU.G 01 . . . 0 011011 @rs_rt_rd DDIV.G 01 . . . 0 00 @rs_rt_rd diff --git a/target/mips/loong-ext.decode b/target/mips/loong-ext.decode index b0715894ee1..2281afaad95 100644 --- a/target/mips/loong-ext.decode +++ b/target/mips/loong-ext.decode @@ -14,6 +14,11 @@ @rs_rt_rd .. rs:5 rt:5 rd:5 . .. &muldiv +MULT.G 011100 . . . 0 01 @rs_rt_rd +DMULT.G 011100 . . . 0 010001 @rs_rt_rd +MULTU.G 011100 . . . 0 010010 @rs_rt_rd +DMULTU.G011100 . . . 0 010011 @rs_rt_rd + DIV.G 011100 . . . 0 010100 @rs_rt_rd DDIV.G 011100 . . . 0 010101 @rs_rt_rd DIVU.G 011100 . . . 0 010110 @rs_rt_rd diff --git a/target/mips/loong_translate.c b/target/mips/loong_translate.c index 50609ce4178..2af94535921 100644 --- a/target/mips/loong_translate.c +++ b/target/mips/loong_translate.c @@ -263,6 +263,64 @@ static bool trans_DMODU_G(DisasContext *s, arg_muldiv *a) return gen_lext_MODU_G(s, a->rt, a->rs, a->rd, true); } +static bool gen_lext_MULT_G(DisasContext *s, int rd, int rs, int rt, +bool is_double, bool is_unsigned) +{ +TCGv t0, t1; + +if (is_double) { +if (TARGET_LONG_BITS != 64) { +return false; +} +check_mips_64(s); +} + +if (rd == 0) { +/* Treat as NOP. */ +return true; +} + +t0 = tcg_temp_new(); +t1 = tcg_temp_new(); + +gen_load_gpr(t0, rs); +gen_load_gpr(t1, rt); + +if (is_unsigned && !is_double) { +tcg_gen_ext32u_tl(t0, t0); +tcg_gen_ext32u_tl(t1, t1); +} While this is a faithful conversion of the existing code, these extensions make no difference to the result. They are redundant with +tcg_gen_mul_tl(cpu_gpr[rd], t0, t1); +if (!is_double) { +tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]); this one, which discards any bit that might have been set by the input bits that are cleared. I see. There is no actual difference between MULT.G and MULTU.G, or DMULT.G and DMULTU.G, because they don't record the most significant bits of the infinite result in any way. Right. +static bool trans_MULT_G(DisasContext *s, arg_muldiv *a) +{ +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, false); +} + +static bool trans_MULTU_G(DisasContext *s, arg_muldiv *a) +{ +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, true); +} + +static bool trans_DMULT_G(DisasContext *s, arg_muldiv *a) +{ +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, false); +} + +static bool trans_DMULTU_G(DisasContext *s, arg_muldiv *a) +{ +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, true); +} So... if you want to clean this up afterward, or before is up to you. IIUC you are suggesting this simplification: -- >8 -- diff --git a/target/mips/tcg/godson2.decode b/target/mips/tcg/godson2.decode index 4fb8fdba9c..86015ac8e5 100644 --- a/target/mips/tcg/godson2.decode +++ b/target/mips/tcg/godson2.decode @@ -15,6 +15,4 @@ -MULT_G 01 . . . 0 011000 @rs_rt_rd -MULTU_G 01 . . . 0 011001 @rs_rt_rd -DMULT_G 01 . . . 0 011100 @rs_rt_rd -DMULTU_G01 . . . 0 011101 @rs_rt_rd +MULTu_G 01 . . . 0 01100- @rs_rt_rd +DMULTu_G
Re: [PATCH 6/6] target/mips: Convert Loongson [D]MULT[U].G opcodes to decodetree
On 1/12/21 11:55 AM, Philippe Mathieu-Daudé wrote: > Convert the following opcodes to decodetree: > > - MULT.G - multiply 32-bit signed integers > - MULTU.G - multiply 32-bit unsigned integers > - DMULT.G - multiply 64-bit signed integers > - DMULTU.G - multiply 64-bit unsigned integers > > Now that all opcodes from the extension have been converted, we > can remove completely gen_loongson_integer() and its 2 calls in > decode_opc_special2_legacy() and decode_opc_special3_legacy(). > > Signed-off-by: Philippe Mathieu-Daudé > --- > target/mips/godson2.decode| 5 ++ > target/mips/loong-ext.decode | 5 ++ > target/mips/loong_translate.c | 58 ++ > target/mips/translate.c | 92 +-- > 4 files changed, 70 insertions(+), 90 deletions(-) > > diff --git a/target/mips/godson2.decode b/target/mips/godson2.decode > index 805452fa975..cf12d9072ec 100644 > --- a/target/mips/godson2.decode > +++ b/target/mips/godson2.decode > @@ -13,6 +13,11 @@ > > @rs_rt_rd .. rs:5 rt:5 rd:5 . .. &muldiv > > +MULT.G 01 . . . 0 011000 @rs_rt_rd > +MULTU.G 01 . . . 0 011001 @rs_rt_rd > +DMULT.G 01 . . . 0 011100 @rs_rt_rd > +DMULTU.G01 . . . 0 011101 @rs_rt_rd > + > DIV.G 01 . . . 0 011010 @rs_rt_rd > DIVU.G 01 . . . 0 011011 @rs_rt_rd > DDIV.G 01 . . . 0 00 @rs_rt_rd > diff --git a/target/mips/loong-ext.decode b/target/mips/loong-ext.decode > index b0715894ee1..2281afaad95 100644 > --- a/target/mips/loong-ext.decode > +++ b/target/mips/loong-ext.decode > @@ -14,6 +14,11 @@ > > @rs_rt_rd .. rs:5 rt:5 rd:5 . .. &muldiv > > +MULT.G 011100 . . . 0 01 @rs_rt_rd > +DMULT.G 011100 . . . 0 010001 @rs_rt_rd > +MULTU.G 011100 . . . 0 010010 @rs_rt_rd > +DMULTU.G011100 . . . 0 010011 @rs_rt_rd > + > DIV.G 011100 . . . 0 010100 @rs_rt_rd > DDIV.G 011100 . . . 0 010101 @rs_rt_rd > DIVU.G 011100 . . . 0 010110 @rs_rt_rd > diff --git a/target/mips/loong_translate.c b/target/mips/loong_translate.c > index 50609ce4178..2af94535921 100644 > --- a/target/mips/loong_translate.c > +++ b/target/mips/loong_translate.c > @@ -263,6 +263,64 @@ static bool trans_DMODU_G(DisasContext *s, arg_muldiv *a) > return gen_lext_MODU_G(s, a->rt, a->rs, a->rd, true); > } > > +static bool gen_lext_MULT_G(DisasContext *s, int rd, int rs, int rt, > +bool is_double, bool is_unsigned) > +{ > +TCGv t0, t1; > + > +if (is_double) { > +if (TARGET_LONG_BITS != 64) { > +return false; > +} > +check_mips_64(s); > +} > + > +if (rd == 0) { > +/* Treat as NOP. */ > +return true; > +} > + > +t0 = tcg_temp_new(); > +t1 = tcg_temp_new(); > + > +gen_load_gpr(t0, rs); > +gen_load_gpr(t1, rt); > + > +if (is_unsigned && !is_double) { > +tcg_gen_ext32u_tl(t0, t0); > +tcg_gen_ext32u_tl(t1, t1); > +} While this is a faithful conversion of the existing code, these extensions make no difference to the result. They are redundant with > +tcg_gen_mul_tl(cpu_gpr[rd], t0, t1); > +if (!is_double) { > +tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]); this one, which discards any bit that might have been set by the input bits that are cleared. There is no actual difference between MULT.G and MULTU.G, or DMULT.G and DMULTU.G, because they don't record the most significant bits of the infinite result in any way. > +static bool trans_MULT_G(DisasContext *s, arg_muldiv *a) > +{ > +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, false); > +} > + > +static bool trans_MULTU_G(DisasContext *s, arg_muldiv *a) > +{ > +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, true); > +} > + > +static bool trans_DMULT_G(DisasContext *s, arg_muldiv *a) > +{ > +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, false); > +} > + > +static bool trans_DMULTU_G(DisasContext *s, arg_muldiv *a) > +{ > +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, true); > +} So... if you want to clean this up afterward, or before is up to you. r~
[PATCH 6/6] target/mips: Convert Loongson [D]MULT[U].G opcodes to decodetree
Convert the following opcodes to decodetree: - MULT.G - multiply 32-bit signed integers - MULTU.G - multiply 32-bit unsigned integers - DMULT.G - multiply 64-bit signed integers - DMULTU.G - multiply 64-bit unsigned integers Now that all opcodes from the extension have been converted, we can remove completely gen_loongson_integer() and its 2 calls in decode_opc_special2_legacy() and decode_opc_special3_legacy(). Signed-off-by: Philippe Mathieu-Daudé --- target/mips/godson2.decode| 5 ++ target/mips/loong-ext.decode | 5 ++ target/mips/loong_translate.c | 58 ++ target/mips/translate.c | 92 +-- 4 files changed, 70 insertions(+), 90 deletions(-) diff --git a/target/mips/godson2.decode b/target/mips/godson2.decode index 805452fa975..cf12d9072ec 100644 --- a/target/mips/godson2.decode +++ b/target/mips/godson2.decode @@ -13,6 +13,11 @@ @rs_rt_rd .. rs:5 rt:5 rd:5 . .. &muldiv +MULT.G 01 . . . 0 011000 @rs_rt_rd +MULTU.G 01 . . . 0 011001 @rs_rt_rd +DMULT.G 01 . . . 0 011100 @rs_rt_rd +DMULTU.G01 . . . 0 011101 @rs_rt_rd + DIV.G 01 . . . 0 011010 @rs_rt_rd DIVU.G 01 . . . 0 011011 @rs_rt_rd DDIV.G 01 . . . 0 00 @rs_rt_rd diff --git a/target/mips/loong-ext.decode b/target/mips/loong-ext.decode index b0715894ee1..2281afaad95 100644 --- a/target/mips/loong-ext.decode +++ b/target/mips/loong-ext.decode @@ -14,6 +14,11 @@ @rs_rt_rd .. rs:5 rt:5 rd:5 . .. &muldiv +MULT.G 011100 . . . 0 01 @rs_rt_rd +DMULT.G 011100 . . . 0 010001 @rs_rt_rd +MULTU.G 011100 . . . 0 010010 @rs_rt_rd +DMULTU.G011100 . . . 0 010011 @rs_rt_rd + DIV.G 011100 . . . 0 010100 @rs_rt_rd DDIV.G 011100 . . . 0 010101 @rs_rt_rd DIVU.G 011100 . . . 0 010110 @rs_rt_rd diff --git a/target/mips/loong_translate.c b/target/mips/loong_translate.c index 50609ce4178..2af94535921 100644 --- a/target/mips/loong_translate.c +++ b/target/mips/loong_translate.c @@ -263,6 +263,64 @@ static bool trans_DMODU_G(DisasContext *s, arg_muldiv *a) return gen_lext_MODU_G(s, a->rt, a->rs, a->rd, true); } +static bool gen_lext_MULT_G(DisasContext *s, int rd, int rs, int rt, +bool is_double, bool is_unsigned) +{ +TCGv t0, t1; + +if (is_double) { +if (TARGET_LONG_BITS != 64) { +return false; +} +check_mips_64(s); +} + +if (rd == 0) { +/* Treat as NOP. */ +return true; +} + +t0 = tcg_temp_new(); +t1 = tcg_temp_new(); + +gen_load_gpr(t0, rs); +gen_load_gpr(t1, rt); + +if (is_unsigned && !is_double) { +tcg_gen_ext32u_tl(t0, t0); +tcg_gen_ext32u_tl(t1, t1); +} +tcg_gen_mul_tl(cpu_gpr[rd], t0, t1); +if (!is_double) { +tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]); +} + +tcg_temp_free(t0); +tcg_temp_free(t1); + +return true; +} + +static bool trans_MULT_G(DisasContext *s, arg_muldiv *a) +{ +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, false); +} + +static bool trans_MULTU_G(DisasContext *s, arg_muldiv *a) +{ +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, true); +} + +static bool trans_DMULT_G(DisasContext *s, arg_muldiv *a) +{ +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, false); +} + +static bool trans_DMULTU_G(DisasContext *s, arg_muldiv *a) +{ +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, true); +} + bool decode_loongson(DisasContext *ctx, uint32_t insn) { if ((ctx->insn_flags & INSN_LOONGSON2E) diff --git a/target/mips/translate.c b/target/mips/translate.c index 144e51b063a..5b5fe31c534 100644 --- a/target/mips/translate.c +++ b/target/mips/translate.c @@ -337,11 +337,6 @@ enum { OPC_MUL = 0x02 | OPC_SPECIAL2, OPC_MSUB = 0x04 | OPC_SPECIAL2, OPC_MSUBU= 0x05 | OPC_SPECIAL2, -/* Loongson 2F */ -OPC_MULT_G_2F = 0x10 | OPC_SPECIAL2, -OPC_DMULT_G_2F = 0x11 | OPC_SPECIAL2, -OPC_MULTU_G_2F = 0x12 | OPC_SPECIAL2, -OPC_DMULTU_G_2F = 0x13 | OPC_SPECIAL2, /* Misc */ OPC_CLZ = 0x20 | OPC_SPECIAL2, OPC_CLO = 0x21 | OPC_SPECIAL2, @@ -370,12 +365,6 @@ enum { OPC_RDHWR= 0x3B | OPC_SPECIAL3, OPC_GINV = 0x3D | OPC_SPECIAL3, -/* Loongson 2E */ -OPC_MULT_G_2E = 0x18 | OPC_SPECIAL3, -OPC_MULTU_G_2E = 0x19 | OPC_SPECIAL3, -OPC_DMULT_G_2E = 0x1C | OPC_SPECIAL3, -OPC_DMULTU_G_2E = 0x1D | OPC_SPECIAL3, - /* MIPS DSP Load */ OPC_LX_DSP = 0x0A | OPC_SPECIAL3, /* MIPS DSP Arithmetic */ @@ -4962,65 +49