Re: [PATCH v4 06/12] target/hexagon: introduce new helper functions

2021-04-19 Thread Alessandro Di Federico via
On Mon, 19 Apr 2021 15:00:17 +
Taylor Simpson  wrote:

> Once this patch series is merged, many load/store instructions will
> have manual overrides.  I can easily provide overrides for the
> remainder.  Then, we could skip them in the idef-parser.  At a
> minimum, you should skip the ones that are part of that series
> - circular addressing
> https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01355.html
> - bit reverse addressing
> https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01354.html
> - load and unpack bytes
> https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01353.html
>   
> - load into shifted register
> https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01359.html
> 
> Alessandro, what do you think?

If, for an instruction, all idef-parse does is calling an helper, yeah,
it makes sense to exclude them.

I'll look into this again once your patchset is in.

-- 
Alessandro Di Federico
rev.ng



RE: [PATCH v4 06/12] target/hexagon: introduce new helper functions

2021-04-19 Thread Taylor Simpson


> -Original Message-
> From: Richard Henderson 
> Sent: Sunday, April 18, 2021 4:31 PM
> To: Alessandro Di Federico ; qemu-devel@nongnu.org
> Cc: Taylor Simpson ; Brian Cain
> ; bab...@rev.ng; ni...@rev.ng; phi...@redhat.com;
> Alessandro Di Federico 
> Subject: Re: [PATCH v4 06/12] target/hexagon: introduce new helper
> functions
> 
> On 4/15/21 9:34 AM, Alessandro Di Federico wrote:
> > +void gen_store32(TCGv vaddr, TCGv src, tcg_target_long width, unsigned
> slot)
> > +{
> > +tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
> > +tcg_gen_movi_tl(hex_store_width[slot], width);
> > +tcg_gen_mov_tl(hex_store_val32[slot], src);
> > +}
> > +
> > +void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, DisasContext
> *ctx,
> > +unsigned slot)
> > +{
> > +gen_store32(vaddr, src, 1, slot);
> > +ctx->store_width[slot] = 1;
> > +}
> 
> Why is store_width here and not in gen_store32?
> Do you really need so many helpers here, as opposed to making use of
> MemOp?

These are included in this patch
https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01355.html
which hasn't been merged yet.

> 
> > +void gen_sat_i32_ext(TCGv ovfl, TCGv dest, TCGv source, int width)
> > +{
> > +gen_sat_i32(dest, source, width);
> > +TCGv zero = tcg_const_i32(0);
> > +TCGv one = tcg_const_i32(1);
> > +tcg_gen_movcond_i32(TCG_COND_NE, ovfl, source, dest, one, zero);
> 
> (source != dest ? 1 : 0) -> (source != dest).
> 
> Therefore, tcg_gen_setcond_i32.
> 
> Or did you intend
> 
> ovfl = (source != dest ? 1 : ovfl)?
> 
> which is probably still better as
> 
>tcg_gen_setcond_tl(TCG_COND_NE, tmp, source,dest);
>tcg_gen_or_tl(ovfl, ovfl, tmp);
> 
> > +void gen_fbrev(TCGv result, TCGv src)
> > +{
> > +TCGv lo = tcg_temp_new();
> > +TCGv tmp1 = tcg_temp_new();
> > +TCGv tmp2 = tcg_temp_new();
> > +
> > +/* Bit reversal of low 16 bits */
> > +tcg_gen_extract_tl(lo, src, 0, 16);
> > +tcg_gen_andi_tl(tmp1, lo, 0x);
> > +tcg_gen_shri_tl(tmp1, tmp1, 1);
> > +tcg_gen_andi_tl(tmp2, lo, 0x);
> > +tcg_gen_shli_tl(tmp2, tmp2, 1);
> > +tcg_gen_or_tl(lo, tmp1, tmp2);
> > +tcg_gen_andi_tl(tmp1, lo, 0x);
> > +tcg_gen_shri_tl(tmp1, tmp1, 2);
> > +tcg_gen_andi_tl(tmp2, lo, 0x);
> > +tcg_gen_shli_tl(tmp2, tmp2, 2);
> > +tcg_gen_or_tl(lo, tmp1, tmp2);
> > +tcg_gen_andi_tl(tmp1, lo, 0xf0f0);
> > +tcg_gen_shri_tl(tmp1, tmp1, 4);
> > +tcg_gen_andi_tl(tmp2, lo, 0x0f0f);
> > +tcg_gen_shli_tl(tmp2, tmp2, 4);
> > +tcg_gen_or_tl(lo, tmp1, tmp2);
> > +tcg_gen_bswap16_tl(lo, lo);
> > +
> > +/* Final tweaks */
> > +tcg_gen_deposit_tl(result, src, lo, 0, 16);
> > +tcg_gen_or_tl(result, result, lo);
> > +
> > +tcg_temp_free(lo);
> > +tcg_temp_free(tmp1);
> > +tcg_temp_free(tmp2);
> > +}
> 
> Coordinate with Taylor.
> https://lists.gnu.org/archive/html/qemu-devel/2021-03/msg10007.html

Once this patch series is merged, many load/store instructions will have manual 
overrides.  I can easily provide overrides for the remainder.  Then, we could 
skip them in the idef-parser.  At a minimum, you should skip the ones that are 
part of that series
- circular addressing   
https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01355.html
- bit reverse addressing
https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01354.html
- load and unpack bytes 
https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01353.html 

- load into shifted register
https://lists.gnu.org/archive/html/qemu-devel/2021-04/msg01359.html

Alessandro, what do you think?

Thanks,
Taylor



Re: [PATCH v4 06/12] target/hexagon: introduce new helper functions

2021-04-18 Thread Richard Henderson

On 4/15/21 9:34 AM, Alessandro Di Federico wrote:

+void gen_store32(TCGv vaddr, TCGv src, tcg_target_long width, unsigned slot)
+{
+tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
+tcg_gen_movi_tl(hex_store_width[slot], width);
+tcg_gen_mov_tl(hex_store_val32[slot], src);
+}
+
+void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, DisasContext *ctx,
+unsigned slot)
+{
+gen_store32(vaddr, src, 1, slot);
+ctx->store_width[slot] = 1;
+}


Why is store_width here and not in gen_store32?
Do you really need so many helpers here, as opposed to making use of MemOp?


+void gen_sat_i32_ext(TCGv ovfl, TCGv dest, TCGv source, int width)
+{
+gen_sat_i32(dest, source, width);
+TCGv zero = tcg_const_i32(0);
+TCGv one = tcg_const_i32(1);
+tcg_gen_movcond_i32(TCG_COND_NE, ovfl, source, dest, one, zero);


(source != dest ? 1 : 0) -> (source != dest).

Therefore, tcg_gen_setcond_i32.

Or did you intend

ovfl = (source != dest ? 1 : ovfl)?

which is probably still better as

  tcg_gen_setcond_tl(TCG_COND_NE, tmp, source,dest);
  tcg_gen_or_tl(ovfl, ovfl, tmp);


+void gen_fbrev(TCGv result, TCGv src)
+{
+TCGv lo = tcg_temp_new();
+TCGv tmp1 = tcg_temp_new();
+TCGv tmp2 = tcg_temp_new();
+
+/* Bit reversal of low 16 bits */
+tcg_gen_extract_tl(lo, src, 0, 16);
+tcg_gen_andi_tl(tmp1, lo, 0x);
+tcg_gen_shri_tl(tmp1, tmp1, 1);
+tcg_gen_andi_tl(tmp2, lo, 0x);
+tcg_gen_shli_tl(tmp2, tmp2, 1);
+tcg_gen_or_tl(lo, tmp1, tmp2);
+tcg_gen_andi_tl(tmp1, lo, 0x);
+tcg_gen_shri_tl(tmp1, tmp1, 2);
+tcg_gen_andi_tl(tmp2, lo, 0x);
+tcg_gen_shli_tl(tmp2, tmp2, 2);
+tcg_gen_or_tl(lo, tmp1, tmp2);
+tcg_gen_andi_tl(tmp1, lo, 0xf0f0);
+tcg_gen_shri_tl(tmp1, tmp1, 4);
+tcg_gen_andi_tl(tmp2, lo, 0x0f0f);
+tcg_gen_shli_tl(tmp2, tmp2, 4);
+tcg_gen_or_tl(lo, tmp1, tmp2);
+tcg_gen_bswap16_tl(lo, lo);
+
+/* Final tweaks */
+tcg_gen_deposit_tl(result, src, lo, 0, 16);
+tcg_gen_or_tl(result, result, lo);
+
+tcg_temp_free(lo);
+tcg_temp_free(tmp1);
+tcg_temp_free(tmp2);
+}


Coordinate with Taylor.
https://lists.gnu.org/archive/html/qemu-devel/2021-03/msg10007.html


r~



[PATCH v4 06/12] target/hexagon: introduce new helper functions

2021-04-15 Thread Alessandro Di Federico via
From: Niccolò Izzo 

These helpers will be employed by the idef-parser generated code.

Signed-off-by: Alessandro Di Federico 
Signed-off-by: Niccolò Izzo 
---
 target/hexagon/genptr.c | 188 
 target/hexagon/genptr.h |  22 +
 target/hexagon/macros.h |   9 ++
 3 files changed, 219 insertions(+)

diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 1ddb4360f1..024419bf39 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -28,6 +28,12 @@
 #include "gen_tcg.h"
 #include "genptr.h"
 
+TCGv gen_read_reg(TCGv result, int num)
+{
+tcg_gen_mov_tl(result, hex_gpr[num]);
+return result;
+}
+
 TCGv gen_read_preg(TCGv pred, uint8_t num)
 {
 tcg_gen_mov_tl(pred, hex_pred[num]);
@@ -330,5 +336,187 @@ static inline void gen_store_conditional8(CPUHexagonState 
*env,
 tcg_gen_movi_tl(hex_llsc_addr, ~0);
 }
 
+void gen_store32(TCGv vaddr, TCGv src, tcg_target_long width, unsigned slot)
+{
+tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
+tcg_gen_movi_tl(hex_store_width[slot], width);
+tcg_gen_mov_tl(hex_store_val32[slot], src);
+}
+
+void gen_store1(TCGv_env cpu_env, TCGv vaddr, TCGv src, DisasContext *ctx,
+unsigned slot)
+{
+gen_store32(vaddr, src, 1, slot);
+ctx->store_width[slot] = 1;
+}
+
+void gen_store2(TCGv_env cpu_env, TCGv vaddr, TCGv src, DisasContext *ctx,
+unsigned slot)
+{
+gen_store32(vaddr, src, 2, slot);
+ctx->store_width[slot] = 2;
+}
+
+void gen_store4(TCGv_env cpu_env, TCGv vaddr, TCGv src, DisasContext *ctx,
+unsigned slot)
+{
+gen_store32(vaddr, src, 4, slot);
+ctx->store_width[slot] = 4;
+}
+
+
+void gen_store8(TCGv_env cpu_env, TCGv vaddr, TCGv_i64 src, DisasContext *ctx,
+unsigned slot)
+{
+tcg_gen_mov_tl(hex_store_addr[slot], vaddr);
+tcg_gen_movi_tl(hex_store_width[slot], 8);
+tcg_gen_mov_i64(hex_store_val64[slot], src);
+ctx->store_width[slot] = 8;
+}
+
+void gen_set_usr_field(int field, TCGv val)
+{
+tcg_gen_deposit_tl(hex_gpr[HEX_REG_USR], hex_gpr[HEX_REG_USR], val,
+   reg_field_info[field].offset,
+   reg_field_info[field].width);
+}
+
+void gen_set_usr_fieldi(int field, int x)
+{
+TCGv val = tcg_const_tl(x);
+gen_set_usr_field(field, val);
+tcg_temp_free(val);
+}
+
+void gen_write_new_pc(TCGv addr)
+{
+/* If there are multiple branches in a packet, ignore the second one */
+TCGv zero = tcg_const_tl(0);
+tcg_gen_movcond_tl(TCG_COND_NE, hex_next_PC, hex_branch_taken, zero,
+   hex_next_PC, addr);
+tcg_gen_movi_tl(hex_branch_taken, 1);
+tcg_temp_free(zero);
+}
+
+void gen_sat_i32(TCGv dest, TCGv source, int width)
+{
+TCGv max_val = tcg_const_i32((1 << (width - 1)) - 1);
+TCGv min_val = tcg_const_i32(-(1 << (width - 1)));
+tcg_gen_smin_tl(dest, source, max_val);
+tcg_gen_smax_tl(dest, dest, min_val);
+tcg_temp_free_i32(max_val);
+tcg_temp_free_i32(min_val);
+}
+
+void gen_sat_i32_ext(TCGv ovfl, TCGv dest, TCGv source, int width)
+{
+gen_sat_i32(dest, source, width);
+TCGv zero = tcg_const_i32(0);
+TCGv one = tcg_const_i32(1);
+tcg_gen_movcond_i32(TCG_COND_NE, ovfl, source, dest, one, zero);
+tcg_temp_free_i32(zero);
+tcg_temp_free_i32(one);
+}
+
+void gen_satu_i32(TCGv dest, TCGv source, int width)
+{
+TCGv max_val = tcg_const_i32((1 << width) - 1);
+tcg_gen_movcond_i32(TCG_COND_GTU, dest, source, max_val, max_val, source);
+TCGv_i32 zero = tcg_const_i32(0);
+tcg_gen_movcond_i32(TCG_COND_LT, dest, source, zero, zero, dest);
+tcg_temp_free_i32(max_val);
+tcg_temp_free_i32(zero);
+}
+
+void gen_satu_i32_ext(TCGv ovfl, TCGv dest, TCGv source, int width)
+{
+gen_satu_i32(dest, source, width);
+TCGv zero = tcg_const_i32(0);
+TCGv one = tcg_const_i32(1);
+tcg_gen_movcond_i32(TCG_COND_NE, ovfl, dest, source, one, zero);
+tcg_temp_free_i32(zero);
+tcg_temp_free_i32(one);
+}
+
+void gen_sat_i64(TCGv_i64 dest, TCGv_i64 source, int width)
+{
+TCGv_i64 max_val = tcg_const_i64((1 << (width - 1)) - 1);
+TCGv_i64 min_val = tcg_const_i64(-(1 << (width - 1)));
+tcg_gen_smin_i64(dest, source, max_val);
+tcg_gen_smax_i64(dest, dest, min_val);
+tcg_temp_free_i64(max_val);
+tcg_temp_free_i64(min_val);
+}
+
+void gen_sat_i64_ext(TCGv ovfl, TCGv_i64 dest, TCGv_i64 source, int width)
+{
+gen_sat_i64(dest, source, width);
+TCGv_i64 ovfl_64 = tcg_temp_new_i64();
+TCGv_i64 zero = tcg_const_i64(0);
+TCGv_i64 one = tcg_const_i64(1);
+tcg_gen_movcond_i64(TCG_COND_NE, ovfl_64, dest, source, one, zero);
+tcg_gen_trunc_i64_tl(ovfl, ovfl_64);
+tcg_temp_free_i64(ovfl_64);
+tcg_temp_free_i64(zero);
+tcg_temp_free_i64(one);
+}
+
+void gen_satu_i64(TCGv_i64 dest, TCGv_i64 source, int width)
+{
+TCGv_i64 max_val = tcg_const_i64((1 << width) - 1);
+tcg_gen_movcond_i64(TCG_CON