Re: [PATCH v2 13/15] tcg/arm: Implement TCG_TARGET_HAS_shv_vec

2021-02-08 Thread Richard Henderson
On 2/8/21 12:50 PM, Peter Maydell wrote:
> On Mon, 8 Feb 2021 at 03:28, Richard Henderson
>  wrote:
>>
>> The three vector shift by vector operations are all implemented via
>> expansion.  Therefore do not actually set TCG_TARGET_HAS_shv_vec,
>> as none of shlv_vec, shrv_vec, sarv_vec may actually appear in the
>> instruction stream, and therefore also do not appear in tcg_target_op_def.
>>
>> Signed-off-by: Richard Henderson 
>> ---
>>  tcg/arm/tcg-target.opc.h |  3 ++
>>  tcg/arm/tcg-target.c.inc | 61 +++-
>>  2 files changed, 63 insertions(+), 1 deletion(-)
> 
>> +switch (opc) {
>> +case INDEX_op_shlv_vec:
>> +/*
>> + * Merely propagate shlv_vec to arm_ushl_vec.
>> + * In this way we don't set TCG_TARGET_HAS_shv_vec
>> + * because everything is done via expansion.
>> + */
>> +v2 = temp_tcgv_vec(arg_temp(a2));
>> +vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
>> +  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
>> +break;
> 
> tcg/aarch64 seems to set TCG_TARGET_HAS_shv_vec and
> only do the right-shifts via expand_op. Is there a difference
> between the two that means Neon has to do it this way, or is it
> just a "works either way" thing?

It's a works either way thing.


r~

> 
>> +
> 
> Reviewed-by: Peter Maydell 
> 
> thanks
> -- PMM
> 




Re: [PATCH v2 13/15] tcg/arm: Implement TCG_TARGET_HAS_shv_vec

2021-02-08 Thread Peter Maydell
On Mon, 8 Feb 2021 at 03:28, Richard Henderson
 wrote:
>
> The three vector shift by vector operations are all implemented via
> expansion.  Therefore do not actually set TCG_TARGET_HAS_shv_vec,
> as none of shlv_vec, shrv_vec, sarv_vec may actually appear in the
> instruction stream, and therefore also do not appear in tcg_target_op_def.
>
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.opc.h |  3 ++
>  tcg/arm/tcg-target.c.inc | 61 +++-
>  2 files changed, 63 insertions(+), 1 deletion(-)

> +switch (opc) {
> +case INDEX_op_shlv_vec:
> +/*
> + * Merely propagate shlv_vec to arm_ushl_vec.
> + * In this way we don't set TCG_TARGET_HAS_shv_vec
> + * because everything is done via expansion.
> + */
> +v2 = temp_tcgv_vec(arg_temp(a2));
> +vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
> +  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
> +break;

tcg/aarch64 seems to set TCG_TARGET_HAS_shv_vec and
only do the right-shifts via expand_op. Is there a difference
between the two that means Neon has to do it this way, or is it
just a "works either way" thing?

> +

Reviewed-by: Peter Maydell 

thanks
-- PMM



[PATCH v2 13/15] tcg/arm: Implement TCG_TARGET_HAS_shv_vec

2021-02-07 Thread Richard Henderson
The three vector shift by vector operations are all implemented via
expansion.  Therefore do not actually set TCG_TARGET_HAS_shv_vec,
as none of shlv_vec, shrv_vec, sarv_vec may actually appear in the
instruction stream, and therefore also do not appear in tcg_target_op_def.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.opc.h |  3 ++
 tcg/arm/tcg-target.c.inc | 61 +++-
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h
index 7a4578e9b4..d19153dcb9 100644
--- a/tcg/arm/tcg-target.opc.h
+++ b/tcg/arm/tcg-target.opc.h
@@ -10,3 +10,6 @@
  * emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
  * consider these to be UNSPEC with names.
  */
+
+DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC)
+DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 875d975d4b..b088f61a99 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -215,6 +215,8 @@ typedef enum {
 INSN_VSHLI = 0xf2800510,  /* VSHL (immediate) */
 INSN_VSARI = 0xf2800010,  /* VSHR.S */
 INSN_VSHRI = 0xf3800010,  /* VSHR.U */
+INSN_VSHL_S= 0xf2000400,  /* VSHL.S (register) */
+INSN_VSHL_U= 0xf3000400,  /* VSHL.U (register) */
 
 INSN_VBSL  = 0xf3100110,
 INSN_VBIT  = 0xf3200110,
@@ -2422,6 +2424,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_usadd_vec:
 case INDEX_op_ussub_vec:
 case INDEX_op_xor_vec:
+case INDEX_op_arm_sshl_vec:
+case INDEX_op_arm_ushl_vec:
 return C_O1_I2(w, w, w);
 case INDEX_op_or_vec:
 case INDEX_op_andc_vec:
@@ -2818,6 +2822,17 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_xor_vec:
 tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2);
 return;
+case INDEX_op_arm_sshl_vec:
+/*
+ * Note that Vm is the data and Vn is the shift count,
+ * therefore the arguments appear reversed.
+ */
+tcg_out_vreg3(s, INSN_VSHL_S, q, vece, a0, a2, a1);
+return;
+case INDEX_op_arm_ushl_vec:
+/* See above. */
+tcg_out_vreg3(s, INSN_VSHL_U, q, vece, a0, a2, a1);
+return;
 case INDEX_op_shli_vec:
 tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece));
 return;
@@ -2952,6 +2967,10 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 case INDEX_op_umax_vec:
 case INDEX_op_umin_vec:
 return vece < MO_64;
+case INDEX_op_shlv_vec:
+case INDEX_op_shrv_vec:
+case INDEX_op_sarv_vec:
+return -1;
 default:
 return 0;
 }
@@ -2960,7 +2979,47 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
TCGArg a0, ...)
 {
-g_assert_not_reached();
+va_list va;
+TCGv_vec v0, v1, v2, t1;
+TCGArg a2;
+
+va_start(va, a0);
+v0 = temp_tcgv_vec(arg_temp(a0));
+v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+a2 = va_arg(va, TCGArg);
+va_end(va);
+
+switch (opc) {
+case INDEX_op_shlv_vec:
+/*
+ * Merely propagate shlv_vec to arm_ushl_vec.
+ * In this way we don't set TCG_TARGET_HAS_shv_vec
+ * because everything is done via expansion.
+ */
+v2 = temp_tcgv_vec(arg_temp(a2));
+vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0),
+  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+break;
+
+case INDEX_op_shrv_vec:
+case INDEX_op_sarv_vec:
+/* Right shifts are negative left shifts for NEON.  */
+v2 = temp_tcgv_vec(arg_temp(a2));
+t1 = tcg_temp_new_vec(type);
+tcg_gen_neg_vec(vece, t1, v2);
+if (opc == INDEX_op_shrv_vec) {
+opc = INDEX_op_arm_ushl_vec;
+} else {
+opc = INDEX_op_arm_sshl_vec;
+}
+vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
+  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
+tcg_temp_free_vec(t1);
+break;
+
+default:
+g_assert_not_reached();
+}
 }
 
 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
-- 
2.25.1