Re: [PATCH 6/6] target/mips: Convert Loongson [D]MULT[U].G opcodes to decodetree

2023-08-31 Thread Philippe Mathieu-Daudé

On 31/8/23 21:12, Philippe Mathieu-Daudé wrote:

On 21/1/21 21:06, Richard Henderson wrote:

On 1/12/21 11:55 AM, Philippe Mathieu-Daudé wrote:

Convert the following opcodes to decodetree:

- MULT.G - multiply 32-bit signed integers
- MULTU.G - multiply 32-bit unsigned integers
- DMULT.G - multiply 64-bit signed integers
- DMULTU.G - multiply 64-bit unsigned integers

Now that all opcodes from the extension have been converted, we
can remove completely gen_loongson_integer() and its 2 calls in
decode_opc_special2_legacy() and decode_opc_special3_legacy().

Signed-off-by: Philippe Mathieu-Daudé 
---
  target/mips/godson2.decode    |  5 ++
  target/mips/loong-ext.decode  |  5 ++
  target/mips/loong_translate.c | 58 ++
  target/mips/translate.c   | 92 +--
  4 files changed, 70 insertions(+), 90 deletions(-)




+static bool gen_lext_MULT_G(DisasContext *s, int rd, int rs, int rt,
+    bool is_double, bool is_unsigned)
+{
+    TCGv t0, t1;
+
+    if (is_double) {
+    if (TARGET_LONG_BITS != 64) {
+    return false;
+    }
+    check_mips_64(s);
+    }
+
+    if (rd == 0) {
+    /* Treat as NOP. */
+    return true;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+
+    if (is_unsigned && !is_double) {
+    tcg_gen_ext32u_tl(t0, t0);
+    tcg_gen_ext32u_tl(t1, t1);
+    }


While this is a faithful conversion of the existing code, these 
extensions make

no difference to the result.  They are redundant with


+    tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+    if (!is_double) {
+    tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);


this one, which discards any bit that might have been set by the input 
bits

that are cleared.


I see.


There is no actual difference between MULT.G and MULTU.G, or DMULT.G and
DMULTU.G, because they don't record the most significant bits of the 
infinite

result in any way.


Right.


+static bool trans_MULT_G(DisasContext *s, arg_muldiv *a)
+{
+    return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, false);
+}
+
+static bool trans_MULTU_G(DisasContext *s, arg_muldiv *a)
+{
+    return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, true);
+}
+
+static bool trans_DMULT_G(DisasContext *s, arg_muldiv *a)
+{
+    return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, false);
+}
+
+static bool trans_DMULTU_G(DisasContext *s, arg_muldiv *a)
+{
+    return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, true);
+}


So... if you want to clean this up afterward, or before is up to you.


"before" ended being way simpler :>




Re: [PATCH 6/6] target/mips: Convert Loongson [D]MULT[U].G opcodes to decodetree

2023-08-31 Thread Philippe Mathieu-Daudé

On 21/1/21 21:06, Richard Henderson wrote:

On 1/12/21 11:55 AM, Philippe Mathieu-Daudé wrote:

Convert the following opcodes to decodetree:

- MULT.G - multiply 32-bit signed integers
- MULTU.G - multiply 32-bit unsigned integers
- DMULT.G - multiply 64-bit signed integers
- DMULTU.G - multiply 64-bit unsigned integers

Now that all opcodes from the extension have been converted, we
can remove completely gen_loongson_integer() and its 2 calls in
decode_opc_special2_legacy() and decode_opc_special3_legacy().

Signed-off-by: Philippe Mathieu-Daudé 
---
  target/mips/godson2.decode|  5 ++
  target/mips/loong-ext.decode  |  5 ++
  target/mips/loong_translate.c | 58 ++
  target/mips/translate.c   | 92 +--
  4 files changed, 70 insertions(+), 90 deletions(-)

diff --git a/target/mips/godson2.decode b/target/mips/godson2.decode
index 805452fa975..cf12d9072ec 100644
--- a/target/mips/godson2.decode
+++ b/target/mips/godson2.decode
@@ -13,6 +13,11 @@
  
  @rs_rt_rd   .. rs:5  rt:5  rd:5  . ..   &muldiv
  
+MULT.G  01 . . . 0 011000   @rs_rt_rd

+MULTU.G 01 . . . 0 011001   @rs_rt_rd
+DMULT.G 01 . . . 0 011100   @rs_rt_rd
+DMULTU.G01 . . . 0 011101   @rs_rt_rd
+
  DIV.G   01 . . . 0 011010   @rs_rt_rd
  DIVU.G  01 . . . 0 011011   @rs_rt_rd
  DDIV.G  01 . . . 0 00   @rs_rt_rd
diff --git a/target/mips/loong-ext.decode b/target/mips/loong-ext.decode
index b0715894ee1..2281afaad95 100644
--- a/target/mips/loong-ext.decode
+++ b/target/mips/loong-ext.decode
@@ -14,6 +14,11 @@
  
  @rs_rt_rd   .. rs:5  rt:5  rd:5  . ..   &muldiv
  
+MULT.G  011100 . . . 0 01   @rs_rt_rd

+DMULT.G 011100 . . . 0 010001   @rs_rt_rd
+MULTU.G 011100 . . . 0 010010   @rs_rt_rd
+DMULTU.G011100 . . . 0 010011   @rs_rt_rd
+
  DIV.G   011100 . . . 0 010100   @rs_rt_rd
  DDIV.G  011100 . . . 0 010101   @rs_rt_rd
  DIVU.G  011100 . . . 0 010110   @rs_rt_rd
diff --git a/target/mips/loong_translate.c b/target/mips/loong_translate.c
index 50609ce4178..2af94535921 100644
--- a/target/mips/loong_translate.c
+++ b/target/mips/loong_translate.c
@@ -263,6 +263,64 @@ static bool trans_DMODU_G(DisasContext *s, arg_muldiv *a)
  return gen_lext_MODU_G(s, a->rt, a->rs, a->rd, true);
  }
  
+static bool gen_lext_MULT_G(DisasContext *s, int rd, int rs, int rt,

+bool is_double, bool is_unsigned)
+{
+TCGv t0, t1;
+
+if (is_double) {
+if (TARGET_LONG_BITS != 64) {
+return false;
+}
+check_mips_64(s);
+}
+
+if (rd == 0) {
+/* Treat as NOP. */
+return true;
+}
+
+t0 = tcg_temp_new();
+t1 = tcg_temp_new();
+
+gen_load_gpr(t0, rs);
+gen_load_gpr(t1, rt);
+
+if (is_unsigned && !is_double) {
+tcg_gen_ext32u_tl(t0, t0);
+tcg_gen_ext32u_tl(t1, t1);
+}


While this is a faithful conversion of the existing code, these extensions make
no difference to the result.  They are redundant with


+tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+if (!is_double) {
+tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);


this one, which discards any bit that might have been set by the input bits
that are cleared.


I see.


There is no actual difference between MULT.G and MULTU.G, or DMULT.G and
DMULTU.G, because they don't record the most significant bits of the infinite
result in any way.


Right.


+static bool trans_MULT_G(DisasContext *s, arg_muldiv *a)
+{
+return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, false);
+}
+
+static bool trans_MULTU_G(DisasContext *s, arg_muldiv *a)
+{
+return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, true);
+}
+
+static bool trans_DMULT_G(DisasContext *s, arg_muldiv *a)
+{
+return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, false);
+}
+
+static bool trans_DMULTU_G(DisasContext *s, arg_muldiv *a)
+{
+return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, true);
+}


So... if you want to clean this up afterward, or before is up to you.


IIUC you are suggesting this simplification:

-- >8 --
diff --git a/target/mips/tcg/godson2.decode b/target/mips/tcg/godson2.decode
index 4fb8fdba9c..86015ac8e5 100644
--- a/target/mips/tcg/godson2.decode
+++ b/target/mips/tcg/godson2.decode
@@ -15,6 +15,4 @@

-MULT_G  01 . . . 0 011000   @rs_rt_rd
-MULTU_G 01 . . . 0 011001   @rs_rt_rd
-DMULT_G 01 . . . 0 011100   @rs_rt_rd
-DMULTU_G01 . . . 0 011101   @rs_rt_rd
+MULTu_G 01 . . . 0 01100-   @rs_rt_rd
+DMULTu_G   

Re: [PATCH 6/6] target/mips: Convert Loongson [D]MULT[U].G opcodes to decodetree

2021-01-21 Thread Richard Henderson
On 1/12/21 11:55 AM, Philippe Mathieu-Daudé wrote:
> Convert the following opcodes to decodetree:
> 
> - MULT.G - multiply 32-bit signed integers
> - MULTU.G - multiply 32-bit unsigned integers
> - DMULT.G - multiply 64-bit signed integers
> - DMULTU.G - multiply 64-bit unsigned integers
> 
> Now that all opcodes from the extension have been converted, we
> can remove completely gen_loongson_integer() and its 2 calls in
> decode_opc_special2_legacy() and decode_opc_special3_legacy().
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  target/mips/godson2.decode|  5 ++
>  target/mips/loong-ext.decode  |  5 ++
>  target/mips/loong_translate.c | 58 ++
>  target/mips/translate.c   | 92 +--
>  4 files changed, 70 insertions(+), 90 deletions(-)
> 
> diff --git a/target/mips/godson2.decode b/target/mips/godson2.decode
> index 805452fa975..cf12d9072ec 100644
> --- a/target/mips/godson2.decode
> +++ b/target/mips/godson2.decode
> @@ -13,6 +13,11 @@
>  
>  @rs_rt_rd   .. rs:5  rt:5  rd:5  . ..   &muldiv
>  
> +MULT.G  01 . . . 0 011000   @rs_rt_rd
> +MULTU.G 01 . . . 0 011001   @rs_rt_rd
> +DMULT.G 01 . . . 0 011100   @rs_rt_rd
> +DMULTU.G01 . . . 0 011101   @rs_rt_rd
> +
>  DIV.G   01 . . . 0 011010   @rs_rt_rd
>  DIVU.G  01 . . . 0 011011   @rs_rt_rd
>  DDIV.G  01 . . . 0 00   @rs_rt_rd
> diff --git a/target/mips/loong-ext.decode b/target/mips/loong-ext.decode
> index b0715894ee1..2281afaad95 100644
> --- a/target/mips/loong-ext.decode
> +++ b/target/mips/loong-ext.decode
> @@ -14,6 +14,11 @@
>  
>  @rs_rt_rd   .. rs:5  rt:5  rd:5  . ..   &muldiv
>  
> +MULT.G  011100 . . . 0 01   @rs_rt_rd
> +DMULT.G 011100 . . . 0 010001   @rs_rt_rd
> +MULTU.G 011100 . . . 0 010010   @rs_rt_rd
> +DMULTU.G011100 . . . 0 010011   @rs_rt_rd
> +
>  DIV.G   011100 . . . 0 010100   @rs_rt_rd
>  DDIV.G  011100 . . . 0 010101   @rs_rt_rd
>  DIVU.G  011100 . . . 0 010110   @rs_rt_rd
> diff --git a/target/mips/loong_translate.c b/target/mips/loong_translate.c
> index 50609ce4178..2af94535921 100644
> --- a/target/mips/loong_translate.c
> +++ b/target/mips/loong_translate.c
> @@ -263,6 +263,64 @@ static bool trans_DMODU_G(DisasContext *s, arg_muldiv *a)
>  return gen_lext_MODU_G(s, a->rt, a->rs, a->rd, true);
>  }
>  
> +static bool gen_lext_MULT_G(DisasContext *s, int rd, int rs, int rt,
> +bool is_double, bool is_unsigned)
> +{
> +TCGv t0, t1;
> +
> +if (is_double) {
> +if (TARGET_LONG_BITS != 64) {
> +return false;
> +}
> +check_mips_64(s);
> +}
> +
> +if (rd == 0) {
> +/* Treat as NOP. */
> +return true;
> +}
> +
> +t0 = tcg_temp_new();
> +t1 = tcg_temp_new();
> +
> +gen_load_gpr(t0, rs);
> +gen_load_gpr(t1, rt);
> +
> +if (is_unsigned && !is_double) {
> +tcg_gen_ext32u_tl(t0, t0);
> +tcg_gen_ext32u_tl(t1, t1);
> +}

While this is a faithful conversion of the existing code, these extensions make
no difference to the result.  They are redundant with

> +tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
> +if (!is_double) {
> +tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);

this one, which discards any bit that might have been set by the input bits
that are cleared.

There is no actual difference between MULT.G and MULTU.G, or DMULT.G and
DMULTU.G, because they don't record the most significant bits of the infinite
result in any way.

> +static bool trans_MULT_G(DisasContext *s, arg_muldiv *a)
> +{
> +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, false);
> +}
> +
> +static bool trans_MULTU_G(DisasContext *s, arg_muldiv *a)
> +{
> +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, true);
> +}
> +
> +static bool trans_DMULT_G(DisasContext *s, arg_muldiv *a)
> +{
> +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, false);
> +}
> +
> +static bool trans_DMULTU_G(DisasContext *s, arg_muldiv *a)
> +{
> +return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, true);
> +}

So... if you want to clean this up afterward, or before is up to you.


r~



[PATCH 6/6] target/mips: Convert Loongson [D]MULT[U].G opcodes to decodetree

2021-01-12 Thread Philippe Mathieu-Daudé
Convert the following opcodes to decodetree:

- MULT.G - multiply 32-bit signed integers
- MULTU.G - multiply 32-bit unsigned integers
- DMULT.G - multiply 64-bit signed integers
- DMULTU.G - multiply 64-bit unsigned integers

Now that all opcodes from the extension have been converted, we
can remove completely gen_loongson_integer() and its 2 calls in
decode_opc_special2_legacy() and decode_opc_special3_legacy().

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/godson2.decode|  5 ++
 target/mips/loong-ext.decode  |  5 ++
 target/mips/loong_translate.c | 58 ++
 target/mips/translate.c   | 92 +--
 4 files changed, 70 insertions(+), 90 deletions(-)

diff --git a/target/mips/godson2.decode b/target/mips/godson2.decode
index 805452fa975..cf12d9072ec 100644
--- a/target/mips/godson2.decode
+++ b/target/mips/godson2.decode
@@ -13,6 +13,11 @@
 
 @rs_rt_rd   .. rs:5  rt:5  rd:5  . ..   &muldiv
 
+MULT.G  01 . . . 0 011000   @rs_rt_rd
+MULTU.G 01 . . . 0 011001   @rs_rt_rd
+DMULT.G 01 . . . 0 011100   @rs_rt_rd
+DMULTU.G01 . . . 0 011101   @rs_rt_rd
+
 DIV.G   01 . . . 0 011010   @rs_rt_rd
 DIVU.G  01 . . . 0 011011   @rs_rt_rd
 DDIV.G  01 . . . 0 00   @rs_rt_rd
diff --git a/target/mips/loong-ext.decode b/target/mips/loong-ext.decode
index b0715894ee1..2281afaad95 100644
--- a/target/mips/loong-ext.decode
+++ b/target/mips/loong-ext.decode
@@ -14,6 +14,11 @@
 
 @rs_rt_rd   .. rs:5  rt:5  rd:5  . ..   &muldiv
 
+MULT.G  011100 . . . 0 01   @rs_rt_rd
+DMULT.G 011100 . . . 0 010001   @rs_rt_rd
+MULTU.G 011100 . . . 0 010010   @rs_rt_rd
+DMULTU.G011100 . . . 0 010011   @rs_rt_rd
+
 DIV.G   011100 . . . 0 010100   @rs_rt_rd
 DDIV.G  011100 . . . 0 010101   @rs_rt_rd
 DIVU.G  011100 . . . 0 010110   @rs_rt_rd
diff --git a/target/mips/loong_translate.c b/target/mips/loong_translate.c
index 50609ce4178..2af94535921 100644
--- a/target/mips/loong_translate.c
+++ b/target/mips/loong_translate.c
@@ -263,6 +263,64 @@ static bool trans_DMODU_G(DisasContext *s, arg_muldiv *a)
 return gen_lext_MODU_G(s, a->rt, a->rs, a->rd, true);
 }
 
+static bool gen_lext_MULT_G(DisasContext *s, int rd, int rs, int rt,
+bool is_double, bool is_unsigned)
+{
+TCGv t0, t1;
+
+if (is_double) {
+if (TARGET_LONG_BITS != 64) {
+return false;
+}
+check_mips_64(s);
+}
+
+if (rd == 0) {
+/* Treat as NOP. */
+return true;
+}
+
+t0 = tcg_temp_new();
+t1 = tcg_temp_new();
+
+gen_load_gpr(t0, rs);
+gen_load_gpr(t1, rt);
+
+if (is_unsigned && !is_double) {
+tcg_gen_ext32u_tl(t0, t0);
+tcg_gen_ext32u_tl(t1, t1);
+}
+tcg_gen_mul_tl(cpu_gpr[rd], t0, t1);
+if (!is_double) {
+tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+}
+
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+
+return true;
+}
+
+static bool trans_MULT_G(DisasContext *s, arg_muldiv *a)
+{
+return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, false);
+}
+
+static bool trans_MULTU_G(DisasContext *s, arg_muldiv *a)
+{
+return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, false, true);
+}
+
+static bool trans_DMULT_G(DisasContext *s, arg_muldiv *a)
+{
+return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, false);
+}
+
+static bool trans_DMULTU_G(DisasContext *s, arg_muldiv *a)
+{
+return gen_lext_MULT_G(s, a->rt, a->rs, a->rd, true, true);
+}
+
 bool decode_loongson(DisasContext *ctx, uint32_t insn)
 {
 if ((ctx->insn_flags & INSN_LOONGSON2E)
diff --git a/target/mips/translate.c b/target/mips/translate.c
index 144e51b063a..5b5fe31c534 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -337,11 +337,6 @@ enum {
 OPC_MUL  = 0x02 | OPC_SPECIAL2,
 OPC_MSUB = 0x04 | OPC_SPECIAL2,
 OPC_MSUBU= 0x05 | OPC_SPECIAL2,
-/* Loongson 2F */
-OPC_MULT_G_2F   = 0x10 | OPC_SPECIAL2,
-OPC_DMULT_G_2F  = 0x11 | OPC_SPECIAL2,
-OPC_MULTU_G_2F  = 0x12 | OPC_SPECIAL2,
-OPC_DMULTU_G_2F = 0x13 | OPC_SPECIAL2,
 /* Misc */
 OPC_CLZ  = 0x20 | OPC_SPECIAL2,
 OPC_CLO  = 0x21 | OPC_SPECIAL2,
@@ -370,12 +365,6 @@ enum {
 OPC_RDHWR= 0x3B | OPC_SPECIAL3,
 OPC_GINV = 0x3D | OPC_SPECIAL3,
 
-/* Loongson 2E */
-OPC_MULT_G_2E   = 0x18 | OPC_SPECIAL3,
-OPC_MULTU_G_2E  = 0x19 | OPC_SPECIAL3,
-OPC_DMULT_G_2E  = 0x1C | OPC_SPECIAL3,
-OPC_DMULTU_G_2E = 0x1D | OPC_SPECIAL3,
-
 /* MIPS DSP Load */
 OPC_LX_DSP = 0x0A | OPC_SPECIAL3,
 /* MIPS DSP Arithmetic */
@@ -4962,65 +49