Re: [PATCH v6 41/61] target/riscv: vector floating-point merge instructions

2020-03-28 Thread LIU Zhiwei




On 2020/3/28 11:23, Richard Henderson wrote:

On 3/17/20 8:06 AM, LIU Zhiwei wrote:

+for (i = 0; i < vl; i++) {\
+if (!vm && !vext_elem_mask(v0, mlen, i)) {\
+ETYPE s2 = *((ETYPE *)vs2 + H(i));\
+*((ETYPE *)vd + H1(i)) = s2;  \

H1 should be H.

Yes.



+} else {  \
+*((ETYPE *)vd + H(i)) = (ETYPE)s1;\
+} \

You can also hoist the s2 dereference out of the IF, and let the assignment be
unconditional.

   *((ETYPE *)vd + H(i))
 = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1);

Yes, it's much better.

Zhiwei



r~





Re: [PATCH v6 41/61] target/riscv: vector floating-point merge instructions

2020-03-27 Thread Richard Henderson
On 3/17/20 8:06 AM, LIU Zhiwei wrote:
> +for (i = 0; i < vl; i++) {\
> +if (!vm && !vext_elem_mask(v0, mlen, i)) {\
> +ETYPE s2 = *((ETYPE *)vs2 + H(i));\
> +*((ETYPE *)vd + H1(i)) = s2;  \

H1 should be H.

> +} else {  \
> +*((ETYPE *)vd + H(i)) = (ETYPE)s1;\
> +} \

You can also hoist the s2 dereference out of the IF, and let the assignment be
unconditional.

  *((ETYPE *)vd + H(i))
= (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1);


r~



[PATCH v6 41/61] target/riscv: vector floating-point merge instructions

2020-03-17 Thread LIU Zhiwei
Signed-off-by: LIU Zhiwei 
---
 target/riscv/helper.h   |  4 +++
 target/riscv/insn32.decode  |  2 ++
 target/riscv/insn_trans/trans_rvv.inc.c | 34 +
 target/riscv/vector_helper.c| 30 ++
 4 files changed, 70 insertions(+)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 93914fc7c4..3c813d23d1 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -990,3 +990,7 @@ DEF_HELPER_6(vmford_vf_d, void, ptr, ptr, i64, ptr, env, 
i32)
 DEF_HELPER_5(vfclass_v_h, void, ptr, ptr, ptr, env, i32)
 DEF_HELPER_5(vfclass_v_w, void, ptr, ptr, ptr, env, i32)
 DEF_HELPER_5(vfclass_v_d, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(vfmerge_vfm_h, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmerge_vfm_w, void, ptr, ptr, i64, ptr, env, i32)
+DEF_HELPER_6(vfmerge_vfm_d, void, ptr, ptr, i64, ptr, env, i32)
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 23e80fe954..14cb4e2e66 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -513,6 +513,8 @@ vmfge_vf01 . . . 101 . 1010111 @r_vm
 vmford_vv   011010 . . . 001 . 1010111 @r_vm
 vmford_vf   011010 . . . 101 . 1010111 @r_vm
 vfclass_v   100011 . . 1 001 . 1010111 @r2_vm
+vfmerge_vfm 010111 0 . . 101 . 1010111 @r_vm_0
+vfmv_v_f010111 1 0 . 101 . 1010111 @r2
 
 vsetvli 0 ... . 111 . 1010111  @r2_zimm
 vsetvl  100 . . 111 . 1010111  @r
diff --git a/target/riscv/insn_trans/trans_rvv.inc.c 
b/target/riscv/insn_trans/trans_rvv.inc.c
index b7327a2972..7cdeec9cd0 100644
--- a/target/riscv/insn_trans/trans_rvv.inc.c
+++ b/target/riscv/insn_trans/trans_rvv.inc.c
@@ -2036,3 +2036,37 @@ GEN_OPFVF_TRANS(vmford_vf, opfvf_cmp_check)
 
 /* Vector Floating-Point Classify Instruction */
 GEN_OPFV_TRANS(vfclass_v, opfv_check)
+
+/* Vector Floating-Point Merge Instruction */
+GEN_OPFVF_TRANS(vfmerge_vfm,  opfvf_check)
+
+static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a)
+{
+if (vext_check_isa_ill(s) &&
+vext_check_reg(s, a->rd, false) &&
+(s->sew != 0)) {
+
+if (s->vl_eq_vlmax) {
+tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
+ MAXSZ(s), MAXSZ(s), cpu_fpr[a->rs1]);
+} else {
+TCGv_ptr dest = tcg_temp_new_ptr();
+TCGv_i32 desc;
+uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+static gen_helper_vmv_vx * const fns[3] = {
+gen_helper_vmv_v_x_h,
+gen_helper_vmv_v_x_w,
+gen_helper_vmv_v_x_d,
+};
+
+desc = tcg_const_i32(simd_desc(0, s->vlen / 8, data));
+tcg_gen_addi_ptr(dest, cpu_env, vreg_ofs(s, a->rd));
+fns[s->sew - 1](dest, cpu_fpr[a->rs1], cpu_env, desc);
+
+tcg_temp_free_ptr(dest);
+tcg_temp_free_i32(desc);
+}
+return true;
+}
+return false;
+}
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index c1a0d14ea8..650a17cc1c 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4146,3 +4146,33 @@ RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
 GEN_VEXT_V(vfclass_v_h, 2, 2, clearh)
 GEN_VEXT_V(vfclass_v_w, 4, 4, clearl)
 GEN_VEXT_V(vfclass_v_d, 8, 8, clearq)
+
+/* Vector Floating-Point Merge Instruction */
+#define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN)  \
+void HELPER(NAME)(void *vd, void *v0, uint64_t s1,\
+void *vs2, CPURISCVState *env, uint32_t desc) \
+{ \
+uint32_t mlen = vext_mlen(desc);  \
+uint32_t vm = vext_vm(desc);  \
+uint32_t vl = env->vl;\
+uint32_t esz = sizeof(ETYPE); \
+uint32_t vlmax = vext_maxsz(desc) / esz;  \
+uint32_t i;   \
+  \
+if (vl == 0) {\
+return;   \
+} \
+for (i = 0; i < vl; i++) {\
+if (!vm && !vext_elem_mask(v0, mlen, i)) {\
+ETYPE s2 = *((ETYPE *)vs2 + H(i));\
+*((ETYPE *)vd + H1(i)) = s2;  \
+} else {  \
+*((ETYPE *)vd + H(i)) = (ETYPE)s1;\
+} \
+} \
+CLEAR_FN(vd, vl, vl * esz, vlmax * esz);  \
+}
+
+GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh)