[PATCH qemu v12 09/15] target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 44 +
 target/riscv/vector_helper.c| 20 +++
 2 files changed, 64 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 430847b0f9..46ee673040 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2123,11 +2123,22 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v 
*a)
 /* vmv.v.v has rs2 = 0 and vm = 1 */
 vext_check_sss(s, a->rd, a->rs1, 0, 1)) {
 if (s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
  vreg_ofs(s, a->rs1),
  MAXSZ(s), MAXSZ(s));
 } else {
 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 static gen_helper_gvec_2_ptr * const fns[4] = {
 gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
 gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
@@ -2163,6 +2174,16 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x 
*a)
 s1 = get_gpr(s, a->rs1, EXT_SIGN);
 
 if (s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
 MAXSZ(s), MAXSZ(s), s1);
 } else {
@@ -2170,6 +2191,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
 TCGv_i64 s1_i64 = tcg_temp_new_i64();
 TCGv_ptr dest = tcg_temp_new_ptr();
 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 static gen_helper_vmv_vx * const fns[4] = {
 gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
@@ -2200,6 +,16 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i 
*a)
 vext_check_ss(s, a->rd, 0, 1)) {
 int64_t simm = sextract64(a->rs1, 0, 5);
 if (s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd),
  MAXSZ(s), MAXSZ(s), simm);
 mark_vs_dirty(s);
@@ -2208,6 +2240,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
 TCGv_i64 s1;
 TCGv_ptr dest;
 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 static gen_helper_vmv_vx * const fns[4] = {
 gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
@@ -2780,6 +2813,16 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f 
*a)
 TCGv_i64 t1;
 
 if (s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 t1 = tcg_temp_new_i64();
 /* NaN-box f[rs1] */
 do_nanbox(s, t1, cpu_fpr[a->rs1]);
@@ -2791,6 +2834,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f 
*a)
 TCGv_ptr dest;
 TCGv_i32 desc;
 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s

[PATCH qemu v12 11/15] target/riscv: rvv: Add tail agnostic for vector floating-point instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Compares write mask registers, and so always operate under a tail-
agnostic policy.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
---
 target/riscv/insn_trans/trans_rvv.c.inc |  15 +
 target/riscv/vector_helper.c| 443 +---
 2 files changed, 262 insertions(+), 196 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 46ee673040..a267945267 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2404,6 +2404,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = FIELD_DP32(data, VDATA, VTA, s->vta);   \
+data = \
+FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1),\
vreg_ofs(s, a->rs2), cpu_env,   \
@@ -2486,6 +2489,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
\
 gen_set_rm(s, RISCV_FRM_DYN); \
 data = FIELD_DP32(data, VDATA, VM, a->vm);\
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);\
+data = FIELD_DP32(data, VDATA, VTA, s->vta);  \
+data = FIELD_DP32(data, VDATA, VTA_ALL_1S,\
+  s->cfg_vta_all_1s); \
 return opfvf_trans(a->rd, a->rs1, a->rs2, data,   \
fns[s->sew - 1], s);   \
 } \
@@ -2524,6 +2530,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
  \
 data = FIELD_DP32(data, VDATA, VM, a->vm);   \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);   \
+data = FIELD_DP32(data, VDATA, VTA, s->vta); \
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),   \
vreg_ofs(s, a->rs1),  \
vreg_ofs(s, a->rs2), cpu_env, \
@@ -2563,6 +2570,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
 gen_set_rm(s, RISCV_FRM_DYN);\
 data = FIELD_DP32(data, VDATA, VM, a->vm);   \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);   \
+data = FIELD_DP32(data, VDATA, VTA, s->vta); \
 return opfvf_trans(a->rd, a->rs1, a->rs2, data,  \
fns[s->sew - 1], s);  \
 }\
@@ -2599,6 +2607,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = FIELD_DP32(data, VDATA, VTA, s->vta);   \
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1),\
vreg_ofs(s, a->rs2), cpu_env,   \
@@ -2638,6 +2647,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
 gen_set_rm(s, RISCV_FRM_DYN);\
 data = FIELD_DP32(data, VDATA, VM, a->vm);   \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);   \
+data = FIELD_DP32(data, VDATA, VTA, s->vta); \
 return opfvf_trans(a->rd, a->rs1, a->rs2, data,  \
fns[s->sew - 1], s);  \
 }\
@@ -2721,6 +2731,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs2), cpu_env,
s->cfg_ptr->vlen / 8,
@@ -2935,6 +2946,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) 
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 

[PATCH qemu v12 05/15] target/riscv: rvv: Add tail agnostic for vector load / store instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Destination register of unit-stride mask load and store instructions are
always written with a tail-agnostic policy.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 11 ++
 target/riscv/translate.c|  2 ++
 target/riscv/vector_helper.c| 28 +
 3 files changed, 41 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index cc80bf00ff..99691f1b9f 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -711,6 +711,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
 }
 
@@ -748,6 +749,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
 }
 
@@ -774,6 +776,8 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, 
uint8_t eew)
 /* EMUL = 1, NFIELDS = 1 */
 data = FIELD_DP32(data, VDATA, LMUL, 0);
 data = FIELD_DP32(data, VDATA, NF, 1);
+/* Mask destination register are always tail-agnostic */
+data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s);
 return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
 }
 
@@ -791,6 +795,8 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, 
uint8_t eew)
 /* EMUL = 1, NFIELDS = 1 */
 data = FIELD_DP32(data, VDATA, LMUL, 0);
 data = FIELD_DP32(data, VDATA, NF, 1);
+/* Mask destination register are always tail-agnostic */
+data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s);
 return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
 }
 
@@ -862,6 +868,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
 }
 
@@ -891,6 +898,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 fn = fns[eew];
 if (fn == NULL) {
 return false;
@@ -991,6 +999,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
 }
 
@@ -1043,6 +1052,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
 }
 
@@ -1108,6 +1118,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldff_trans(a->rd, a->rs1, data, fn, s);
 }
 
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 7775dade26..58cbb6ded3 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -95,6 +95,7 @@ typedef struct DisasContext {
 int8_t lmul;
 uint8_t sew;
 uint8_t vta;
+bool cfg_vta_all_1s;
 target_ulong vstart;
 bool vl_eq_vlmax;
 uint8_t ntemp;
@@ -1085,6 +1086,7 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
 ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
 ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
+ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s;
 ctx->vstart = env->vstart;
 ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
 ctx->misa_mxl_max = env->misa_mxl_max;
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 396e252179..d186bf9512 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -270,6 +270,8 @@ vex

[PATCH qemu v12 10/15] target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
---
 target/riscv/vector_helper.c | 220 ++-
 1 file changed, 114 insertions(+), 106 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index f7e36b0564..9d66cd1a83 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2080,10 +2080,12 @@ static inline void
 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
  CPURISCVState *env,
  uint32_t desc,
- opivv2_rm_fn *fn)
+ opivv2_rm_fn *fn, uint32_t esz)
 {
 uint32_t vm = vext_vm(desc);
 uint32_t vl = env->vl;
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+uint32_t vta = vext_vta(desc);
 
 switch (env->vxrm) {
 case 0: /* rnu */
@@ -2103,15 +2105,17 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
  env, vl, vm, 3, fn);
 break;
 }
+/* set tail elements to 1s */
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
 }
 
 /* generate helpers for fixed point instructions with OPIVV format */
-#define GEN_VEXT_VV_RM(NAME)\
+#define GEN_VEXT_VV_RM(NAME, ESZ)   \
 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
   CPURISCVState *env, uint32_t desc)\
 {   \
 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc,   \
- do_##NAME);\
+ do_##NAME, ESZ);   \
 }
 
 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t 
b)
@@ -2161,10 +2165,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, 
saddu8)
 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
-GEN_VEXT_VV_RM(vsaddu_vv_b)
-GEN_VEXT_VV_RM(vsaddu_vv_h)
-GEN_VEXT_VV_RM(vsaddu_vv_w)
-GEN_VEXT_VV_RM(vsaddu_vv_d)
+GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
+GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
+GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
+GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
 
 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
   CPURISCVState *env, int vxrm);
@@ -2197,10 +2201,12 @@ static inline void
 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
  CPURISCVState *env,
  uint32_t desc,
- opivx2_rm_fn *fn)
+ opivx2_rm_fn *fn, uint32_t esz)
 {
 uint32_t vm = vext_vm(desc);
 uint32_t vl = env->vl;
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+uint32_t vta = vext_vta(desc);
 
 switch (env->vxrm) {
 case 0: /* rnu */
@@ -2220,25 +2226,27 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void 
*vs2,
  env, vl, vm, 3, fn);
 break;
 }
+/* set tail elements to 1s */
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
 }
 
 /* generate helpers for fixed point instructions with OPIVX format */
-#define GEN_VEXT_VX_RM(NAME)  \
+#define GEN_VEXT_VX_RM(NAME, ESZ) \
 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,\
 void *vs2, CPURISCVState *env, uint32_t desc) \
 { \
 vext_vx_rm_2(vd, v0, s1, vs2, env, desc,  \
- do_##NAME);  \
+ do_##NAME, ESZ); \
 }
 
 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
-GEN_VEXT_VX_RM(vsaddu_vx_b)
-GEN_VEXT_VX_RM(vsaddu_vx_h)
-GEN_VEXT_VX_RM(vsaddu_vx_w)
-GEN_VEXT_VX_RM(vsaddu_vx_d)
+GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
+GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
+GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
+GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
 
 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
 {
@@ -2284,19 +2292,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, 
sadd8)
 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
-GEN_VEXT_VV_RM(vsadd_vv_b)
-GEN_VEXT_VV_RM(vsadd_vv_h)
-GEN_VEXT_VV_RM(vsadd_vv_w)
-GEN_VEXT_VV_RM(vsadd_vv_d)
+GEN_VEXT_VV_RM(vsadd_vv_b, 1)
+GEN_VEXT_VV_RM(vsadd_vv_h, 2)
+GEN_VEXT_VV_RM(vsadd_vv_w, 4)
+GEN_VEXT_VV_RM(vsadd_vv_d, 8)
 
 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
 RVVCALL(OPIVX2_RM,

[PATCH qemu v12 04/15] target/riscv: rvv: Add tail agnostic for vv instructions

2022-04-27 Thread ~eopxd
From: eopXD 

According to v-spec, tail agnostic behavior can be either kept as
undisturbed or set elements' bits to all 1s. To distinguish the
difference of tail policies, QEMU should be able to simulate the tail
agnostic behavior as "set tail elements' bits to all 1s".

There are multiple possibility for agnostic elements according to
v-spec. The main intent of this patch-set tries to add option that
can distinguish between tail policies. Setting agnostic elements to
all 1s allows QEMU to express this.

This is the first commit regarding the optional tail agnostic
behavior. Follow-up commits will add this optional behavior
for all rvv instructions.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
---
 target/riscv/cpu.h  |   2 +
 target/riscv/cpu_helper.c   |   2 +
 target/riscv/insn_trans/trans_rvv.c.inc |  11 +
 target/riscv/internals.h|   5 +-
 target/riscv/translate.c|   2 +
 target/riscv/vector_helper.c| 296 +---
 6 files changed, 187 insertions(+), 131 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index c069fe85fa..8c4a79b5a0 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -369,6 +369,7 @@ struct RISCVCPUConfig {
 bool ext_zhinxmin;
 bool ext_zve32f;
 bool ext_zve64f;
+bool rvv_ta_all_1s;
 
 /* Vendor-specific custom extensions */
 bool ext_XVentanaCondOps;
@@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
 /* If PointerMasking should be applied */
 FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
 FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
+FIELD(TB_FLAGS, VTA, 24, 1)
 
 #ifdef TARGET_RISCV32
 #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 1c60fb2e80..2941c88c31 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
*pc,
 flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
 FIELD_EX64(env->vtype, VTYPE, VLMUL));
 flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
+flags = FIELD_DP32(flags, TB_FLAGS, VTA,
+FIELD_EX64(env->vtype, VTYPE, VTA));
 } else {
 flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
 }
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 57953923d5..cc80bf00ff 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1223,6 +1223,16 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
*gvec_fn,
 tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 if (a->vm && s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 gvec_fn(s->sew, vreg_ofs(s, a->rd),
 vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
 MAXSZ(s), MAXSZ(s));
@@ -1231,6 +1241,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
*gvec_fn,
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
cpu_env, s->cfg_ptr->vlen / 8,
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index dbb322bfa7..512c6c30cf 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -24,8 +24,9 @@
 /* share data between vector helpers and decode code */
 FIELD(VDATA, VM, 0, 1)
 FIELD(VDATA, LMUL, 1, 3)
-FIELD(VDATA, NF, 4, 4)
-FIELD(VDATA, WD, 4, 1)
+FIELD(VDATA, VTA, 4, 1)
+FIELD(VDATA, NF, 5, 4)
+FIELD(VDATA, WD, 5, 1)
 
 /* float point classify helpers */
 target_ulong fclass_h(uint64_t frs1);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index fac998a6b5..7775dade26 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -94,6 +94,7 @@ typedef struct DisasContext {
  */
 int8_t lmul;
 uint8_t sew;
+uint8_t vta;
 target_ulong vstart;
 bool vl_eq_vlmax;
 uint8_t ntemp;
@@ -1083,6 +1084,7 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
 ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
 ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
+ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
 ctx->vstart = env->vstart;
 ctx->vl_eq_vlma

[PATCH qemu v12 15/15] target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail agnostic behavior

2022-04-27 Thread ~eopxd
From: eopXD 

According to v-spec, tail agnostic behavior can be either kept as
undisturbed or set elements' bits to all 1s. To distinguish the
difference of tail policies, QEMU should be able to simulate the tail
agnostic behavior as "set tail elements' bits to all 1s".

There are multiple possibility for agnostic elements according to
v-spec. The main intent of this patch-set tries to add option that
can distinguish between tail policies. Setting agnostic elements to
all 1s allows QEMU to express this.

This commit adds option 'rvv_ta_all_1s' is added to enable the
behavior, it is default as disabled.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
---
 target/riscv/cpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ddda4906ff..cd4cf4b41e 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -810,6 +810,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
 
 DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
+DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.34.2



[PATCH qemu v12 07/15] target/riscv: rvv: Add tail agnostic for vector integer shift instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 11 +++
 target/riscv/vector_helper.c| 11 +++
 2 files changed, 22 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index d15858fc6f..430847b0f9 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1858,6 +1858,16 @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, 
GVecGen2sFn32 *gvec_fn,
 }
 
 if (a->vm && s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 TCGv_i32 src1 = tcg_temp_new_i32();
 
 tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE));
@@ -1916,6 +1926,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = FIELD_DP32(data, VDATA, VTA, s->vta);   \
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1),\
vreg_ofs(s, a->rs2), cpu_env,   \
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 4ecdf955f3..8755671449 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1245,6 +1245,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
\
 { \
 uint32_t vm = vext_vm(desc);  \
 uint32_t vl = env->vl;\
+uint32_t esz = sizeof(TS1);   \
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);  \
+uint32_t vta = vext_vta(desc);\
 uint32_t i;   \
   \
 for (i = env->vstart; i < vl; i++) {  \
@@ -1256,6 +1259,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
\
 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);\
 } \
 env->vstart = 0;  \
+/* set tail elements to 1s */ \
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
 }
 
 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
@@ -1280,6 +1285,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1,   
   \
 {   \
 uint32_t vm = vext_vm(desc);\
 uint32_t vl = env->vl;  \
+uint32_t esz = sizeof(TD);  \
+uint32_t total_elems =  \
+vext_get_total_elems(env, desc, esz);   \
+uint32_t vta = vext_vta(desc);  \
 uint32_t i; \
 \
 for (i = env->vstart; i < vl; i++) {\
@@ -1290,6 +1299,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1,
  \
 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);\
 }   \
 env->vstart = 0;\
+/* set tail elements to 1s */   \
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
 }
 
 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
-- 
2.34.2




[PATCH qemu v12 00/15] Add tail agnostic behavior for rvv instructions

2022-04-27 Thread ~eopxd
According to v-spec, tail agnostic behavior can be either kept as
undisturbed or set elements' bits to all 1s. To distinguish the
difference of tail policies, QEMU should be able to simulate the tail
agnostic behavior as "set tail elements' bits to all 1s". An option
'rvv_ta_all_1s' is added to enable the behavior, it is default as
disabled.

There are multiple possibility for agnostic elements according to
v-spec. The main intent of this patch-set tries to add option that
can distinguish between tail policies. Setting agnostic elements to
all 1s makes things simple and allow QEMU to express this.

We may explore other possibility of agnostic behavior by adding
other options in the future. Please understand that this patch-set
is limited.

v2 updates:
- Addressed comments from Weiwei Li
- Added commit tail agnostic on load / store instructions (which
  I forgot to include into the patch-set)

v3 updates:
- Missed the very 1st commit, adding it back

v4 updates:
- Renamed vlmax to total_elems
- Deal with tail element when vl_eq_vlmax == true

v5 updates:
- Let `vext_get_total_elems` take `desc` and `esz`
- Utilize `simd_maxsz(desc)` to get `vlenb`
- Fix alignments to code

v6 updates:
- Fix `vext_get_total_elems`

v7 updates:
- Reuse `max_elems` for vector load / store helper functions. The
  translation sets desc's `lmul` to `min(1, lmul)`, making
  `vext_max_elems` equivalent to `vext_get_total_elems`.

v8 updates:
- Simplify `vext_set_elems_1s`, don't need `vext_set_elems_1s_fns`
- Fix `vext_get_total_elems`, it should derive upon EMUL instead
  of LMUL

v9 updates:
- Let instructions that is tail agnostic regardless of vta respect the
  option and not the vta.

v10 updates:
- Correct range to set element to 1s for load instructions

v11 updates:
- Separate addition of option 'rvv_ta_all_1s' as a new (last) commit
- Add description to show intent of the option in first commit for the
  optional tail agnostic behavior
- Tag WeiWei as Reviewed-by for all commits
- Tag Alistair as Reviewed-by for commit 01, 02
- Tag Alistair as Acked-by for commit 03

v12 updates:
- Add missing space in WeiWei's "Reviewed-by" tag

eopXD (15):
  target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed
  target/riscv: rvv: Rename ambiguous esz
  target/riscv: rvv: Early exit when vstart >= vl
  target/riscv: rvv: Add tail agnostic for vv instructions
  target/riscv: rvv: Add tail agnostic for vector load / store
instructions
  target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions
  target/riscv: rvv: Add tail agnostic for vector integer shift
instructions
  target/riscv: rvv: Add tail agnostic for vector integer comparison
instructions
  target/riscv: rvv: Add tail agnostic for vector integer merge and move
instructions
  target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic
instructions
  target/riscv: rvv: Add tail agnostic for vector floating-point
instructions
  target/riscv: rvv: Add tail agnostic for vector reduction instructions
  target/riscv: rvv: Add tail agnostic for vector mask instructions
  target/riscv: rvv: Add tail agnostic for vector permutation
instructions
  target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail
agnostic behavior

 target/riscv/cpu.c  |1 +
 target/riscv/cpu.h  |2 +
 target/riscv/cpu_helper.c   |2 +
 target/riscv/insn_trans/trans_rvv.c.inc |  176 +++
 target/riscv/internals.h|6 +-
 target/riscv/translate.c|4 +
 target/riscv/vector_helper.c| 1536 ++-
 7 files changed, 1110 insertions(+), 617 deletions(-)

-- 
2.34.2



[PATCH qemu v12 08/15] target/riscv: rvv: Add tail agnostic for vector integer comparison instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Compares write mask registers, and so always operate under a tail-
agnostic policy.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
---
 target/riscv/vector_helper.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 8755671449..6356b6b0ef 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1345,6 +1345,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void 
*vs2,   \
 { \
 uint32_t vm = vext_vm(desc);  \
 uint32_t vl = env->vl;\
+uint32_t esz = sizeof(ETYPE); \
+uint32_t total_elems =\
+vext_get_total_elems(env, desc, esz); \
+uint32_t vta_all_1s = vext_vta_all_1s(desc);  \
 uint32_t i;   \
   \
 for (i = env->vstart; i < vl; i++) {  \
@@ -1356,6 +1360,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void 
*vs2,   \
 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
 } \
 env->vstart = 0;  \
+/* mask destination register are always tail-agnostic */  \
+/* set tail elements to 1s */ \
+if (vta_all_1s) { \
+for (; i < total_elems; i++) {\
+vext_set_elem_mask(vd, i, 1); \
+} \
+} \
 }
 
 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
@@ -1394,6 +1405,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2,   \
 {   \
 uint32_t vm = vext_vm(desc);\
 uint32_t vl = env->vl;  \
+uint32_t esz = sizeof(ETYPE);   \
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);\
+uint32_t vta_all_1s = vext_vta_all_1s(desc);\
 uint32_t i; \
 \
 for (i = env->vstart; i < vl; i++) {\
@@ -1405,6 +1419,13 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2,   \
 DO_OP(s2, (ETYPE)(target_long)s1)); \
 }   \
 env->vstart = 0;\
+/* mask destination register are always tail-agnostic */\
+/* set tail elements to 1s */   \
+if (vta_all_1s) {   \
+for (; i < total_elems; i++) {  \
+vext_set_elem_mask(vd, i, 1);   \
+}   \
+}   \
 }
 
 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
-- 
2.34.2




[PATCH qemu v12 03/15] target/riscv: rvv: Early exit when vstart >= vl

2022-04-27 Thread ~eopxd
From: eopXD 

According to v-spec (section 5.4):
When vstart ≥ vl, there are no body elements, and no elements are
updated in any destination vector register group, including that
no tail elements are updated with agnostic values.

vmsbf.m, vmsif.m, vmsof.m, viota.m, vcompress instructions themselves
require vstart to be zero. So they don't need the early exit.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
Acked-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 27 +
 1 file changed, 27 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 275fded6e4..57953923d5 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -652,6 +652,7 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, 
uint32_t data,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -818,6 +819,7 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, 
uint32_t rs2,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -925,6 +927,7 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, 
uint32_t vs2,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -1067,6 +1070,7 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, 
uint32_t data,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -1216,6 +1220,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
*gvec_fn,
 }
 
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 if (a->vm && s->vl_eq_vlmax) {
 gvec_fn(s->sew, vreg_ofs(s, a->rd),
@@ -1263,6 +1268,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, 
uint32_t vs2, uint32_t vm,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -1427,6 +1433,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, 
uint32_t vs2, uint32_t vm,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -1513,6 +1520,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
 uint32_t data = 0;
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
@@ -1593,6 +1601,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
 uint32_t data = 0;
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
@@ -1670,6 +1679,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
 }; \
 TCGLabel *over = gen_new_label();  \
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);  \
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
@@ -1851,6 +1861,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
 }; \
 TCGLabel *over = gen_new_label();  \
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);  \
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
@@ -2061,6 +2072,7 @@ static bool trans_v

[PATCH qemu v12 02/15] target/riscv: rvv: Rename ambiguous esz

2022-04-27 Thread ~eopxd
From: eopXD 

No functional change intended in this commit.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li 
Reviewed-by: Alistair Francis 
---
 target/riscv/vector_helper.c | 76 ++--
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index e94caf1a3c..d0452a7756 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -125,9 +125,9 @@ static inline int32_t vext_lmul(uint32_t desc)
 /*
  * Get the maximum number of elements can be operated.
  *
- * esz: log2 of element size in bytes.
+ * log2_esz: log2 of element size in bytes.
  */
-static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
+static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
 {
 /*
  * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
@@ -136,7 +136,7 @@ static inline uint32_t vext_max_elems(uint32_t desc, 
uint32_t esz)
 uint32_t vlenb = simd_maxsz(desc);
 
 /* Return VLMAX */
-int scale = vext_lmul(desc) - esz;
+int scale = vext_lmul(desc) - log2_esz;
 return scale < 0 ? vlenb >> -scale : vlenb << scale;
 }
 
@@ -231,11 +231,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
  target_ulong stride, CPURISCVState *env,
  uint32_t desc, uint32_t vm,
  vext_ldst_elem_fn *ldst_elem,
- uint32_t esz, uintptr_t ra, MMUAccessType access_type)
+ uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type)
 {
 uint32_t i, k;
 uint32_t nf = vext_nf(desc);
-uint32_t max_elems = vext_max_elems(desc, esz);
+uint32_t max_elems = vext_max_elems(desc, log2_esz);
 
 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
 if (!vm && !vext_elem_mask(v0, i)) {
@@ -244,7 +244,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
 
 k = 0;
 while (k < nf) {
-target_ulong addr = base + stride * i + (k << esz);
+target_ulong addr = base + stride * i + (k << log2_esz);
 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 k++;
 }
@@ -289,18 +289,18 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
 /* unmasked unit-stride load and store operation*/
 static void
 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
- vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
+ vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
  uintptr_t ra, MMUAccessType access_type)
 {
 uint32_t i, k;
 uint32_t nf = vext_nf(desc);
-uint32_t max_elems = vext_max_elems(desc, esz);
+uint32_t max_elems = vext_max_elems(desc, log2_esz);
 
 /* load bytes from guest memory */
 for (i = env->vstart; i < evl; i++, env->vstart++) {
 k = 0;
 while (k < nf) {
-target_ulong addr = base + ((i * nf + k) << esz);
+target_ulong addr = base + ((i * nf + k) << log2_esz);
 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 k++;
 }
@@ -399,12 +399,12 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
 void *vs2, CPURISCVState *env, uint32_t desc,
 vext_get_index_addr get_index_addr,
 vext_ldst_elem_fn *ldst_elem,
-uint32_t esz, uintptr_t ra, MMUAccessType access_type)
+uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type)
 {
 uint32_t i, k;
 uint32_t nf = vext_nf(desc);
 uint32_t vm = vext_vm(desc);
-uint32_t max_elems = vext_max_elems(desc, esz);
+uint32_t max_elems = vext_max_elems(desc, log2_esz);
 
 /* load bytes from guest memory */
 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
@@ -414,7 +414,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
 
 k = 0;
 while (k < nf) {
-abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
+abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 k++;
 }
@@ -480,13 +480,13 @@ static inline void
 vext_ldff(void *vd, void *v0, target_ulong base,
   CPURISCVState *env, uint32_t desc,
   vext_ldst_elem_fn *ldst_elem,
-  uint32_t esz, uintptr_t ra)
+  uint32_t log2_esz, uintptr_t ra)
 {
 void *host;
 uint32_t i, k, vl = 0;
 uint32_t nf = vext_nf(desc);
 uint32_t vm = vext_vm(desc);
-uint32_t max_elems = vext_max_elems(desc, esz);
+uint32_t max_elems = vext_max_elems(desc, log2_esz);
 target_ulong addr, offset, remain;
 
 /* probe every access*/
@@ -494,12 +494,12 @@ vext_ldff(void *vd, void *v0, target_ulong base,
 if (!vm && !vext_elem_mask(v0, i)) {
 continue;
 }
- 

Re: [PATCH v2] WHPX: support for xcr0

2022-04-27 Thread Paolo Bonzini
Queued, thanks.  (It only took 30 months; thanks to Ivan Shcherbakov
for bringing it to my attention).

Paolo





Re: [PATCH qemu] spapr_pci: Disable IRQFD resampling on XIVE

2022-04-27 Thread Cédric Le Goater

On 4/28/22 07:32, Alexey Kardashevskiy wrote:



On 4/27/22 17:36, Cédric Le Goater wrote:

Hello Alexey,

On 4/27/22 06:36, Alexey Kardashevskiy wrote:

VFIO-PCI has an "KVM_IRQFD_FLAG_RESAMPLE" optimization for INTx EOI
handling when KVM can unmask PCI INTx (level triggered interrupt) without
switching to the userspace (==QEMU).

Unfortunately XIVE does not support level interrupts, 


That's not correctly phrased I think.



My bad, I meant "XIVE hardware".


ok. It makes more sense.

PSIHB and PHBs have internal latches to maintain the assertion level.
XIVE has none.






The QEMU XIVE device support LSIs but the POWER9 kernel-irqchips,
KVM XICS-on-XIVE and XIVE native devices, are broken with respect
to passthrough adapters using INTx.



QEMU emulates them
and therefore there is no existing code path to kick the resamplefd.
The problem appears when passing through a PCI adapter with
the "pci=nomsi" kernel parameter - the adapter's interrupt interrupt
count in /proc/interrupts will stuck at "1".

This disables resampler when the XIVE interrupt controller is configured.
This should not be very visible though KVM already exits to QEMU for INTx
and XIVE-capable boxes (POWER9 and newer) do not seem to have
performance-critical INTx-only capable devices.

Signed-off-by: Alexey Kardashevskiy 
---


Cédric, this is what I meant when I said that spapr_pci.c was unaware of
the interrupt controller type, neither xics nor xive was mentioned
in the file before.


---
  hw/ppc/spapr_pci.c | 14 +++---
  1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 5bfd4aa9e5aa..2675052601db 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -729,11 +729,19 @@ static void pci_spapr_set_irq(void *opaque, int irq_num, 
int level)
  static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
  {
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
  SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
-    PCIINTxRoute route;
+    PCIINTxRoute route = { .mode = PCI_INTX_DISABLED };
-    route.mode = PCI_INTX_ENABLED;
-    route.irq = sphb->lsi_table[pin].irq;
+    /*
+ * Disable IRQFD resampler on XIVE as it does not support LSI and QEMU
+ * emulates those so the KVM kernel resamplefd kick is skipped and EOI
+ * is not delivered to VFIO-PCI.
+ */
+    if (!spapr->xive) {


This is testing the availability of the XIVE interrupt mode, but not
the activate controller. See spapr_irq_init() which is called very
early in the machine initialization.

Is that what we want ? Is everything fine if we start the machine with
ic-mode=xics ? On a POWER9 host, this would use the KVM XICS-on-XIVE
device which is broken also AFAICT.


I should probably fix that in KVM, just not quite sure yet how for the realmode 
handlers, or just drop those on P9 and then the fix is trivial.



You should extend the SpaprInterruptControllerClass (for a routine) or
simply SpaprIrq (for a bool) if you need to handle IRQ matters from a
device model.


It is a property of KVM rather than the interrupt controller so it probably 
makes more sense to just stop advertising KVM_CAP_IRQFD_RESAMPLE. Hmmm...


I would fix the realmode handlers of the the KVM XICS-on-XIVE device
first. The problem has been there for a while.

Then, for the XIVE native mode, I would simply handle it at the QEMU
level with a 'resample' bool in SpaprIrq. It  would be tested in spapr
pci when configuring the INTx routing.


Thanks,

C.




Re: serial hang in qemu-system-ppc64 -M pseries

2022-04-27 Thread Rob Landley



On 4/28/22 00:41, Rob Landley wrote:
> On 4/27/22 10:27, Thomas Huth wrote:
>> On 26/04/2022 12.26, Rob Landley wrote:
>>> When I cut and paste 80-ish characters of text into the Linux serial 
>>> console, it
>>> reads 16 characters and stops. When I hit space, it reads another 16 
>>> characters,
>>> and if I keep at it will eventually catch up without losing data. If I type,
>>> every character shows up immediately.
>> 
>> That "16" certainly comes from VTERM_BUFSIZE in hw/char/spapr_vty.c in the 
>> QEMU sources, I think.
>> 
>>> (On other qemu targets and kernels I can cut and paste an entire uuencoded
>>> binary and it goes through just fine in one go, but this target hangs with 
>>> big
>>> pastes until I hit keys.)
>>> 
>>> Is this a qemu-side bug, or a kernel-side bug?
>>> 
>>> Kernel config attached (linux 5.18-rc3 or thereabouts), qemu invocation is:
>>> 
>>> qemu-system-ppc64 -M pseries -vga none -nographic -no-reboot -m 256 -kernel
>>> vmlinux -initrd powerpc64leroot.cpio.gz -append "panic=1 HOST=powerpc64le
>>> console=hvc0"
>> 
>> Which version of QEMU are you using?
> 
> $ qemu-system-ppc64 --version
> QEMU emulator version 6.2.92 (v6.2.0-rc2)
> Copyright (c) 2003-2021 Fabrice Bellard and the QEMU Project developers

Just confirmed it behaves the same with current git (commit cf6f26d6f9b2).

Rob



Re: [PATCH v5 1/1] virtio: fix the condition for iommu_platform not supported

2022-04-27 Thread Michael S. Tsirkin
On Thu, Apr 28, 2022 at 01:52:46PM +0800, Jason Wang wrote:
> On Thu, Apr 28, 2022 at 12:57 PM Michael S. Tsirkin  wrote:
> >
> > On Thu, Apr 28, 2022 at 11:01:10AM +0800, Jason Wang wrote:
> > > On Wed, Apr 27, 2022 at 8:25 PM Chenyi Qiang  
> > > wrote:
> > > >
> > > >
> > > >
> > > > On 4/22/2022 3:11 PM, Chenyi Qiang wrote:
> > > > >
> > > > >
> > > > > On 2/7/2022 7:28 PM, Halil Pasic wrote:
> > > > >> The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, 
> > > > >> but
> > > > >> unsupported") claims to fail the device hotplug when iommu_platform
> > > > >> is requested, but not supported by the (vhost) device. On the first
> > > > >> glance the condition for detecting that situation looks perfect, but
> > > > >> because a certain peculiarity of virtio_platform it ain't.
> > > > >>
> > > > >> In fact the aforementioned commit introduces a regression. It breaks
> > > > >> virtio-fs support for Secure Execution, and most likely also for AMD 
> > > > >> SEV
> > > > >> or any other confidential guest scenario that relies encrypted guest
> > > > >> memory.  The same also applies to any other vhost device that does 
> > > > >> not
> > > > >> support _F_ACCESS_PLATFORM.
> > > > >>
> > > > >> The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM 
> > > > >> collates
> > > > >> "device can not access all of the guest RAM" and "iova != gpa, thus
> > > > >> device needs to translate iova".
> > > > >>
> > > > >> Confidential guest technologies currently rely on the 
> > > > >> device/hypervisor
> > > > >> offering _F_ACCESS_PLATFORM, so that, after the feature has been
> > > > >> negotiated, the guest  grants access to the portions of memory the
> > > > >> device needs to see. So in for confidential guests, generally,
> > > > >> _F_ACCESS_PLATFORM is about the restricted access to memory, but not
> > > > >> about the addresses used being something else than guest physical
> > > > >> addresses.
> > > > >>
> > > > >> This is the very reason for which commit f7ef7e6e3b ("vhost: 
> > > > >> correctly
> > > > >> turn on VIRTIO_F_IOMMU_PLATFORM") fences _F_ACCESS_PLATFORM from the
> > > > >> vhost device that does not need it, because on the vhost interface it
> > > > >> only means "I/O address translation is needed".
> > > > >>
> > > > >> This patch takes inspiration from f7ef7e6e3b ("vhost: correctly turn 
> > > > >> on
> > > > >> VIRTIO_F_IOMMU_PLATFORM"), and uses the same condition for detecting 
> > > > >> the
> > > > >> situation when _F_ACCESS_PLATFORM is requested, but no I/O 
> > > > >> translation
> > > > >> by the device, and thus no device capability is needed. In this
> > > > >> situation claiming that the device does not support 
> > > > >> iommu_plattform=on
> > > > >> is counter-productive. So let us stop doing that!
> > > > >>
> > > > >> Signed-off-by: Halil Pasic 
> > > > >> Reported-by: Jakob Naucke 
> > > > >> Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> > > > >> unsupported")
> > > > >> Acked-by: Cornelia Huck 
> > > > >> Reviewed-by: Daniel Henrique Barboza 
> > > > >> Tested-by: Daniel Henrique Barboza 
> > > > >> Cc: Kevin Wolf 
> > > > >> Cc: qemu-sta...@nongnu.org
> > > > >>
> > > > >> ---
> > > > >>
> > > > >> v4->v5:
> > > > >> * added back the return; so if somebody were to add code to the end 
> > > > >> of
> > > > >>the function we are still good
> > > > >> v3->v4:
> > > > >> * Fixed commit message (thanks Connie)
> > > > >> * Removed counter-productive initialization (thanks Connie)
> > > > >> * Added tags
> > > > >> v2->v3:
> > > > >> * Caught a bug: I tired to check if vdev has the feature
> > > > >> ACCESS_PLATFORM after we have forced it. Moved the check
> > > > >> to a better place
> > > > >> v1->v2:
> > > > >> * Commit message tweaks. Most notably fixed commit SHA (Michael)
> > > > >>
> > > > >> ---
> > > > >> ---
> > > > >>   hw/virtio/virtio-bus.c | 12 +++-
> > > > >>   1 file changed, 7 insertions(+), 5 deletions(-)
> > > > >>
> > > > >> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> > > > >> index d23db98c56..0f69d1c742 100644
> > > > >> --- a/hw/virtio/virtio-bus.c
> > > > >> +++ b/hw/virtio/virtio-bus.c
> > > > >> @@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> > > > >> Error **errp)
> > > > >>   VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> > > > >>   VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
> > > > >>   bool has_iommu = virtio_host_has_feature(vdev,
> > > > >> VIRTIO_F_IOMMU_PLATFORM);
> > > > >> +bool vdev_has_iommu;
> > > > >>   Error *local_err = NULL;
> > > > >>   DPRINTF("%s: plug device.\n", qbus->name);
> > > > >> @@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> > > > >> Error **errp)
> > > > >>   return;
> > > > >>   }
> > > > >> -if (has_iommu && !virtio_host_has_feature(vdev,
> > > > >> VIRTIO_F_IOMMU_PLATFORM)) {
> > > > >> -error_setg(errp, "iommu_platform=true 

Re: [PATCH v5 1/1] virtio: fix the condition for iommu_platform not supported

2022-04-27 Thread Jason Wang
On Thu, Apr 28, 2022 at 12:57 PM Michael S. Tsirkin  wrote:
>
> On Thu, Apr 28, 2022 at 11:01:10AM +0800, Jason Wang wrote:
> > On Wed, Apr 27, 2022 at 8:25 PM Chenyi Qiang  wrote:
> > >
> > >
> > >
> > > On 4/22/2022 3:11 PM, Chenyi Qiang wrote:
> > > >
> > > >
> > > > On 2/7/2022 7:28 PM, Halil Pasic wrote:
> > > >> The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, 
> > > >> but
> > > >> unsupported") claims to fail the device hotplug when iommu_platform
> > > >> is requested, but not supported by the (vhost) device. On the first
> > > >> glance the condition for detecting that situation looks perfect, but
> > > >> because a certain peculiarity of virtio_platform it ain't.
> > > >>
> > > >> In fact the aforementioned commit introduces a regression. It breaks
> > > >> virtio-fs support for Secure Execution, and most likely also for AMD 
> > > >> SEV
> > > >> or any other confidential guest scenario that relies encrypted guest
> > > >> memory.  The same also applies to any other vhost device that does not
> > > >> support _F_ACCESS_PLATFORM.
> > > >>
> > > >> The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM collates
> > > >> "device can not access all of the guest RAM" and "iova != gpa, thus
> > > >> device needs to translate iova".
> > > >>
> > > >> Confidential guest technologies currently rely on the device/hypervisor
> > > >> offering _F_ACCESS_PLATFORM, so that, after the feature has been
> > > >> negotiated, the guest  grants access to the portions of memory the
> > > >> device needs to see. So in for confidential guests, generally,
> > > >> _F_ACCESS_PLATFORM is about the restricted access to memory, but not
> > > >> about the addresses used being something else than guest physical
> > > >> addresses.
> > > >>
> > > >> This is the very reason for which commit f7ef7e6e3b ("vhost: correctly
> > > >> turn on VIRTIO_F_IOMMU_PLATFORM") fences _F_ACCESS_PLATFORM from the
> > > >> vhost device that does not need it, because on the vhost interface it
> > > >> only means "I/O address translation is needed".
> > > >>
> > > >> This patch takes inspiration from f7ef7e6e3b ("vhost: correctly turn on
> > > >> VIRTIO_F_IOMMU_PLATFORM"), and uses the same condition for detecting 
> > > >> the
> > > >> situation when _F_ACCESS_PLATFORM is requested, but no I/O translation
> > > >> by the device, and thus no device capability is needed. In this
> > > >> situation claiming that the device does not support iommu_plattform=on
> > > >> is counter-productive. So let us stop doing that!
> > > >>
> > > >> Signed-off-by: Halil Pasic 
> > > >> Reported-by: Jakob Naucke 
> > > >> Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> > > >> unsupported")
> > > >> Acked-by: Cornelia Huck 
> > > >> Reviewed-by: Daniel Henrique Barboza 
> > > >> Tested-by: Daniel Henrique Barboza 
> > > >> Cc: Kevin Wolf 
> > > >> Cc: qemu-sta...@nongnu.org
> > > >>
> > > >> ---
> > > >>
> > > >> v4->v5:
> > > >> * added back the return; so if somebody were to add code to the end of
> > > >>the function we are still good
> > > >> v3->v4:
> > > >> * Fixed commit message (thanks Connie)
> > > >> * Removed counter-productive initialization (thanks Connie)
> > > >> * Added tags
> > > >> v2->v3:
> > > >> * Caught a bug: I tired to check if vdev has the feature
> > > >> ACCESS_PLATFORM after we have forced it. Moved the check
> > > >> to a better place
> > > >> v1->v2:
> > > >> * Commit message tweaks. Most notably fixed commit SHA (Michael)
> > > >>
> > > >> ---
> > > >> ---
> > > >>   hw/virtio/virtio-bus.c | 12 +++-
> > > >>   1 file changed, 7 insertions(+), 5 deletions(-)
> > > >>
> > > >> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> > > >> index d23db98c56..0f69d1c742 100644
> > > >> --- a/hw/virtio/virtio-bus.c
> > > >> +++ b/hw/virtio/virtio-bus.c
> > > >> @@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> > > >> Error **errp)
> > > >>   VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> > > >>   VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
> > > >>   bool has_iommu = virtio_host_has_feature(vdev,
> > > >> VIRTIO_F_IOMMU_PLATFORM);
> > > >> +bool vdev_has_iommu;
> > > >>   Error *local_err = NULL;
> > > >>   DPRINTF("%s: plug device.\n", qbus->name);
> > > >> @@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> > > >> Error **errp)
> > > >>   return;
> > > >>   }
> > > >> -if (has_iommu && !virtio_host_has_feature(vdev,
> > > >> VIRTIO_F_IOMMU_PLATFORM)) {
> > > >> -error_setg(errp, "iommu_platform=true is not supported by the
> > > >> device");
> > > >> -return;
> > > >> -}
> > > >> -
> > > >>   if (klass->device_plugged != NULL) {
> > > >>   klass->device_plugged(qbus->parent, &local_err);
> > > >>   }
> > > >> @@ -82,9 +78,15 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> > > >> Error **errp)
> > > >>   return;
> >

Re: serial hang in qemu-system-ppc64 -M pseries

2022-04-27 Thread Rob Landley
On 4/27/22 10:27, Thomas Huth wrote:
> On 26/04/2022 12.26, Rob Landley wrote:
>> When I cut and paste 80-ish characters of text into the Linux serial 
>> console, it
>> reads 16 characters and stops. When I hit space, it reads another 16 
>> characters,
>> and if I keep at it will eventually catch up without losing data. If I type,
>> every character shows up immediately.
> 
> That "16" certainly comes from VTERM_BUFSIZE in hw/char/spapr_vty.c in the 
> QEMU sources, I think.
> 
>> (On other qemu targets and kernels I can cut and paste an entire uuencoded
>> binary and it goes through just fine in one go, but this target hangs with 
>> big
>> pastes until I hit keys.)
>> 
>> Is this a qemu-side bug, or a kernel-side bug?
>> 
>> Kernel config attached (linux 5.18-rc3 or thereabouts), qemu invocation is:
>> 
>> qemu-system-ppc64 -M pseries -vga none -nographic -no-reboot -m 256 -kernel
>> vmlinux -initrd powerpc64leroot.cpio.gz -append "panic=1 HOST=powerpc64le
>> console=hvc0"
> 
> Which version of QEMU are you using?

$ qemu-system-ppc64 --version
QEMU emulator version 6.2.92 (v6.2.0-rc2)
Copyright (c) 2003-2021 Fabrice Bellard and the QEMU Project developers

>From november. I can pull and rebuild but it'll take a bit. (Hopefully
rebuilding would fix the need to echo -e '\e[?7h' afterwards to undo the "bash
command line history marches up the screen because qemu's x86 bios disabled line
wrap and then left it that way" issue...)

> Which frontend (GTK or terminal?) ... 

The above command line has -nographic, forcing terminal. Running ldd on the
binary doesn't pull up anything gtk. (It pulls up libncursesw though.)

If you want to reproduce my test locally:

wget https://landley.net/toybox/downloads/binaries/mkroot/0.8.5/powerpc64le.tgz
tar xvzf powerpc64le.tgz
cd powerpc64le
./qemu-*.sh

Then paste something longer than 16 characters at the eventual command prompt
once the kernel finishes booting.

If you want to reproduce it all from source:

git clone https://github.com/landley/toybox
cd toybox && mkdir ccc && cd ccc
wget
https://landley.net/toybox/downloads/binaries/toolchains/latest/powerpc64le-linux-musl-cross.tar.xz
-O - | tar xv
cd ..
CROSS=powerpc64le LINUX=~/linux scripts/mkroot.sh
cd root/powerpc64le
./qemu-*.sh

This assumes your linux kernel source directory is in ~/linux of course, and
that qemu-system-ppc64 is in the $PATH...

> this rings a distant bell, but I thought we had fixed these issues long ago 
> in the past... e.g.:
>
> https://yhbt.net/lore/all/1380113886-16845-10-git-send-email-mdr...@linux.vnet.ibm.com/
> 
> https://git.qemu.org/?p=qemu.git;a=commitdiff;h=8a273cbe53221d28

The qemu I'm running is newer than 2016. :)

Most targets are fine with this: I cut and paste entire uuencoded binaries into
the serial console as an easy way to insert a file into an initramfs. It can
usually take multiple megabytes without dropping a character, so you just
"uudecode" enter, and then paste.

Even my 32 bit powerpc target works fine with this. (Although -M g3beige is a
very different machine from -M pseries...)

Alas this target (and sh4 -m r2d) stop at 16 chars. (On sh4 the extra is
discarded, not delivered progressively as more interrupts are generated.)

> ... but maybe my memory also just fails and this has never been properly 
> fixed.
> 
>   Thomas

Rob



Re: [PATCH qemu] spapr_pci: Disable IRQFD resampling on XIVE

2022-04-27 Thread Alexey Kardashevskiy




On 4/27/22 17:36, Cédric Le Goater wrote:

Hello Alexey,

On 4/27/22 06:36, Alexey Kardashevskiy wrote:

VFIO-PCI has an "KVM_IRQFD_FLAG_RESAMPLE" optimization for INTx EOI
handling when KVM can unmask PCI INTx (level triggered interrupt) without
switching to the userspace (==QEMU).

Unfortunately XIVE does not support level interrupts, 


That's not correctly phrased I think.



My bad, I meant "XIVE hardware".



The QEMU XIVE device support LSIs but the POWER9 kernel-irqchips,
KVM XICS-on-XIVE and XIVE native devices, are broken with respect
to passthrough adapters using INTx.



QEMU emulates them
and therefore there is no existing code path to kick the resamplefd.
The problem appears when passing through a PCI adapter with
the "pci=nomsi" kernel parameter - the adapter's interrupt interrupt
count in /proc/interrupts will stuck at "1".

This disables resampler when the XIVE interrupt controller is configured.
This should not be very visible though KVM already exits to QEMU for INTx
and XIVE-capable boxes (POWER9 and newer) do not seem to have
performance-critical INTx-only capable devices.

Signed-off-by: Alexey Kardashevskiy 
---


Cédric, this is what I meant when I said that spapr_pci.c was unaware of
the interrupt controller type, neither xics nor xive was mentioned
in the file before.


---
  hw/ppc/spapr_pci.c | 14 +++---
  1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 5bfd4aa9e5aa..2675052601db 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -729,11 +729,19 @@ static void pci_spapr_set_irq(void *opaque, int 
irq_num, int level)

  static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
  {
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
  SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
-    PCIINTxRoute route;
+    PCIINTxRoute route = { .mode = PCI_INTX_DISABLED };
-    route.mode = PCI_INTX_ENABLED;
-    route.irq = sphb->lsi_table[pin].irq;
+    /*
+ * Disable IRQFD resampler on XIVE as it does not support LSI and 
QEMU
+ * emulates those so the KVM kernel resamplefd kick is skipped 
and EOI

+ * is not delivered to VFIO-PCI.
+ */
+    if (!spapr->xive) {


This is testing the availability of the XIVE interrupt mode, but not
the activate controller. See spapr_irq_init() which is called very
early in the machine initialization.

Is that what we want ? Is everything fine if we start the machine with
ic-mode=xics ? On a POWER9 host, this would use the KVM XICS-on-XIVE
device which is broken also AFAICT.


I should probably fix that in KVM, just not quite sure yet how for the 
realmode handlers, or just drop those on P9 and then the fix is trivial.




You should extend the SpaprInterruptControllerClass (for a routine) or
simply SpaprIrq (for a bool) if you need to handle IRQ matters from a
device model.


It is a property of KVM rather than the interrupt controller so it 
probably makes more sense to just stop advertising 
KVM_CAP_IRQFD_RESAMPLE. Hmmm...





Thanks,

C.



+    route.mode = PCI_INTX_ENABLED;
+    route.irq = sphb->lsi_table[pin].irq;
+    }
  return route;
  }




Re: [PATCH v5 1/1] virtio: fix the condition for iommu_platform not supported

2022-04-27 Thread Michael S. Tsirkin
On Thu, Apr 28, 2022 at 11:01:10AM +0800, Jason Wang wrote:
> On Wed, Apr 27, 2022 at 8:25 PM Chenyi Qiang  wrote:
> >
> >
> >
> > On 4/22/2022 3:11 PM, Chenyi Qiang wrote:
> > >
> > >
> > > On 2/7/2022 7:28 PM, Halil Pasic wrote:
> > >> The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> > >> unsupported") claims to fail the device hotplug when iommu_platform
> > >> is requested, but not supported by the (vhost) device. On the first
> > >> glance the condition for detecting that situation looks perfect, but
> > >> because a certain peculiarity of virtio_platform it ain't.
> > >>
> > >> In fact the aforementioned commit introduces a regression. It breaks
> > >> virtio-fs support for Secure Execution, and most likely also for AMD SEV
> > >> or any other confidential guest scenario that relies encrypted guest
> > >> memory.  The same also applies to any other vhost device that does not
> > >> support _F_ACCESS_PLATFORM.
> > >>
> > >> The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM collates
> > >> "device can not access all of the guest RAM" and "iova != gpa, thus
> > >> device needs to translate iova".
> > >>
> > >> Confidential guest technologies currently rely on the device/hypervisor
> > >> offering _F_ACCESS_PLATFORM, so that, after the feature has been
> > >> negotiated, the guest  grants access to the portions of memory the
> > >> device needs to see. So in for confidential guests, generally,
> > >> _F_ACCESS_PLATFORM is about the restricted access to memory, but not
> > >> about the addresses used being something else than guest physical
> > >> addresses.
> > >>
> > >> This is the very reason for which commit f7ef7e6e3b ("vhost: correctly
> > >> turn on VIRTIO_F_IOMMU_PLATFORM") fences _F_ACCESS_PLATFORM from the
> > >> vhost device that does not need it, because on the vhost interface it
> > >> only means "I/O address translation is needed".
> > >>
> > >> This patch takes inspiration from f7ef7e6e3b ("vhost: correctly turn on
> > >> VIRTIO_F_IOMMU_PLATFORM"), and uses the same condition for detecting the
> > >> situation when _F_ACCESS_PLATFORM is requested, but no I/O translation
> > >> by the device, and thus no device capability is needed. In this
> > >> situation claiming that the device does not support iommu_plattform=on
> > >> is counter-productive. So let us stop doing that!
> > >>
> > >> Signed-off-by: Halil Pasic 
> > >> Reported-by: Jakob Naucke 
> > >> Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> > >> unsupported")
> > >> Acked-by: Cornelia Huck 
> > >> Reviewed-by: Daniel Henrique Barboza 
> > >> Tested-by: Daniel Henrique Barboza 
> > >> Cc: Kevin Wolf 
> > >> Cc: qemu-sta...@nongnu.org
> > >>
> > >> ---
> > >>
> > >> v4->v5:
> > >> * added back the return; so if somebody were to add code to the end of
> > >>the function we are still good
> > >> v3->v4:
> > >> * Fixed commit message (thanks Connie)
> > >> * Removed counter-productive initialization (thanks Connie)
> > >> * Added tags
> > >> v2->v3:
> > >> * Caught a bug: I tired to check if vdev has the feature
> > >> ACCESS_PLATFORM after we have forced it. Moved the check
> > >> to a better place
> > >> v1->v2:
> > >> * Commit message tweaks. Most notably fixed commit SHA (Michael)
> > >>
> > >> ---
> > >> ---
> > >>   hw/virtio/virtio-bus.c | 12 +++-
> > >>   1 file changed, 7 insertions(+), 5 deletions(-)
> > >>
> > >> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> > >> index d23db98c56..0f69d1c742 100644
> > >> --- a/hw/virtio/virtio-bus.c
> > >> +++ b/hw/virtio/virtio-bus.c
> > >> @@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> > >> Error **errp)
> > >>   VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> > >>   VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
> > >>   bool has_iommu = virtio_host_has_feature(vdev,
> > >> VIRTIO_F_IOMMU_PLATFORM);
> > >> +bool vdev_has_iommu;
> > >>   Error *local_err = NULL;
> > >>   DPRINTF("%s: plug device.\n", qbus->name);
> > >> @@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> > >> Error **errp)
> > >>   return;
> > >>   }
> > >> -if (has_iommu && !virtio_host_has_feature(vdev,
> > >> VIRTIO_F_IOMMU_PLATFORM)) {
> > >> -error_setg(errp, "iommu_platform=true is not supported by the
> > >> device");
> > >> -return;
> > >> -}
> > >> -
> > >>   if (klass->device_plugged != NULL) {
> > >>   klass->device_plugged(qbus->parent, &local_err);
> > >>   }
> > >> @@ -82,9 +78,15 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> > >> Error **errp)
> > >>   return;
> > >>   }
> > >> +vdev_has_iommu = virtio_host_has_feature(vdev,
> > >> VIRTIO_F_IOMMU_PLATFORM);
> > >>   if (klass->get_dma_as != NULL && has_iommu) {
> > >>   virtio_add_feature(&vdev->host_features,
> > >> VIRTIO_F_IOMMU_PLATFORM);
> > >>   vdev->dma_as = klass->get_dma_as(

Re: [PATCH qemu v11 01/15] target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed

2022-04-27 Thread Weiwei Li



在 2022/3/14 下午3:38, ~eopxd 写道:

From: eopXD 

No functional change intended in this commit.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
Sorry. My fault. I miss a space when I send Reviewed-by. Maybe you can 
update this in the next version with other changes.


Reviewed-by: Weiwei Li 

Regards,

Weiwei Li


Reviewed-by: Alistair Francis 
---
  target/riscv/vector_helper.c | 1132 +-
  1 file changed, 565 insertions(+), 567 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 3bd4aac9c9..e94caf1a3c 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -710,7 +710,6 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
  
  static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,

 CPURISCVState *env, uint32_t desc,
-   uint32_t esz, uint32_t dsz,
 opivv2_fn *fn)
  {
  uint32_t vm = vext_vm(desc);
@@ -727,23 +726,23 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, 
void *vs2,
  }
  
  /* generate the helpers for OPIVV */

-#define GEN_VEXT_VV(NAME, ESZ, DSZ)   \
+#define GEN_VEXT_VV(NAME) \
  void HELPER(NAME)(void *vd, void *v0, void *vs1,  \
void *vs2, CPURISCVState *env,  \
uint32_t desc)  \
  { \
-do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
+do_vext_vv(vd, v0, vs1, vs2, env, desc,   \
 do_##NAME);\
  }
  
-GEN_VEXT_VV(vadd_vv_b, 1, 1)

-GEN_VEXT_VV(vadd_vv_h, 2, 2)
-GEN_VEXT_VV(vadd_vv_w, 4, 4)
-GEN_VEXT_VV(vadd_vv_d, 8, 8)
-GEN_VEXT_VV(vsub_vv_b, 1, 1)
-GEN_VEXT_VV(vsub_vv_h, 2, 2)
-GEN_VEXT_VV(vsub_vv_w, 4, 4)
-GEN_VEXT_VV(vsub_vv_d, 8, 8)
+GEN_VEXT_VV(vadd_vv_b)
+GEN_VEXT_VV(vadd_vv_h)
+GEN_VEXT_VV(vadd_vv_w)
+GEN_VEXT_VV(vadd_vv_d)
+GEN_VEXT_VV(vsub_vv_b)
+GEN_VEXT_VV(vsub_vv_h)
+GEN_VEXT_VV(vsub_vv_w)
+GEN_VEXT_VV(vsub_vv_d)
  
  typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
  
@@ -773,7 +772,6 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
  
  static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,

 CPURISCVState *env, uint32_t desc,
-   uint32_t esz, uint32_t dsz,
 opivx2_fn fn)
  {
  uint32_t vm = vext_vm(desc);
@@ -790,27 +788,27 @@ static void do_vext_vx(void *vd, void *v0, target_long 
s1, void *vs2,
  }
  
  /* generate the helpers for OPIVX */

-#define GEN_VEXT_VX(NAME, ESZ, DSZ)   \
+#define GEN_VEXT_VX(NAME) \
  void HELPER(NAME)(void *vd, void *v0, target_ulong s1,\
void *vs2, CPURISCVState *env,  \
uint32_t desc)  \
  { \
-do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,  \
+do_vext_vx(vd, v0, s1, vs2, env, desc,\
 do_##NAME);\
  }
  
-GEN_VEXT_VX(vadd_vx_b, 1, 1)

-GEN_VEXT_VX(vadd_vx_h, 2, 2)
-GEN_VEXT_VX(vadd_vx_w, 4, 4)
-GEN_VEXT_VX(vadd_vx_d, 8, 8)
-GEN_VEXT_VX(vsub_vx_b, 1, 1)
-GEN_VEXT_VX(vsub_vx_h, 2, 2)
-GEN_VEXT_VX(vsub_vx_w, 4, 4)
-GEN_VEXT_VX(vsub_vx_d, 8, 8)
-GEN_VEXT_VX(vrsub_vx_b, 1, 1)
-GEN_VEXT_VX(vrsub_vx_h, 2, 2)
-GEN_VEXT_VX(vrsub_vx_w, 4, 4)
-GEN_VEXT_VX(vrsub_vx_d, 8, 8)
+GEN_VEXT_VX(vadd_vx_b)
+GEN_VEXT_VX(vadd_vx_h)
+GEN_VEXT_VX(vadd_vx_w)
+GEN_VEXT_VX(vadd_vx_d)
+GEN_VEXT_VX(vsub_vx_b)
+GEN_VEXT_VX(vsub_vx_h)
+GEN_VEXT_VX(vsub_vx_w)
+GEN_VEXT_VX(vsub_vx_d)
+GEN_VEXT_VX(vrsub_vx_b)
+GEN_VEXT_VX(vrsub_vx_h)
+GEN_VEXT_VX(vrsub_vx_w)
+GEN_VEXT_VX(vrsub_vx_d)
  
  void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)

  {
@@ -889,30 +887,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, 
DO_ADD)
  RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
  RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
  RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
-GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
-GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
-GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
-GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
-GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
-GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
-GEN_VEXT_VV(vwadd_vv_b, 1, 2)
-GEN_VEXT_VV(vwadd_vv_h, 2, 4)
-GEN_VEXT_VV(vwadd_vv_w, 4, 8)
-GEN_VEXT_VV(vwsub_vv_b, 1, 2)
-GEN_VEXT_VV(vwsub_vv_h, 2, 4)
-GEN_VEXT_VV(vwsub_vv_w, 4, 8)
-GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
-GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
-GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
-GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
-GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
-GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
-GEN_VEXT_VV(vwadd_wv_b, 1, 2)
-GEN_VEXT_VV(vwadd_wv_h, 2, 4)
-GEN_VEXT_VV(vwadd_wv_w, 4, 8)
-GEN_VEXT_VV(vwsub_wv_b, 1, 2)
-GEN_VEXT_VV(vwsub_wv_h, 2, 4

Re: [RFC PATCH v3 1/5] ppc64: Add semihosting support

2022-04-27 Thread Nicholas Piggin
Excerpts from Nicholas Piggin's message of April 21, 2022 12:04 pm:
> Excerpts from Leandro Lupori's message of April 21, 2022 4:09 am:
>> On 4/18/22 17:22, Cédric Le Goater wrote:
>>> On 4/18/22 21:10, Leandro Lupori wrote:
 Add semihosting support for PPC64. This implementation is
 based on the standard for ARM semihosting version 2.0, as
 implemented by QEMU and documented in

  https://github.com/ARM-software/abi-aa/releases

 The PPC64 specific differences are the following:

 Semihosting Trap Instruction: sc 7
 Operation Number Register: r3
 Parameter Register: r4
 Return Register: r3
 Data block field size: 64 bits
>>> 
>>> 'sc' is a good way to implement semi hosting but we should make sure
>>> that it is not colliding with future extensions, at least with the
>>> next POWERPC processor. Is that the case ? if not, then the lev could
>>> be reserved.
>>> 
>> 
>> Power ISA 3.1B says that LEV values greater that 2 are reserved.
>> Level 2 is the ultravisor, so I assumed that level 7 was far enough from 
>> current max level. I don't know if POWER11 will introduce new privilege 
>> levels. Is this info publicly available somewhere? Or do you have a 
>> better level in mind to use instead?
> 
> It's not available but there are no plans to use LEV=7.
> 
> It would be fine in practice I think, but it's kind of ugly and not 
> great precedent -- how would we find out all the projects which use 
> reserved instructions or values for something? Nominally the onus is on 
> the software to accept breakage, but in reality important software that
> breaks causes a headache for the ISA.
> 
> IBM's systemsim emulator actually has an instruction to call out to the 
> emulator to do various things like IO. It uses the opcode
> 
>   .long 0x000eaeb0
> 
> That is the primary op 0 reserved space, and there is actually another 
> op 'attn' or 'sp_attn' there which IBM CPUs implement, it is similar in 
> spirit (it calls out to the service processor and/or chip error handling 
> system to deal with a condition out-of-band). You don't want to use attn 
> here because the core under emulation might implement it, I'm just 
> noting the precedent with similar functionality under this primary 
> opcode.
> 
> So I think the systemsim emulator instruction should be a good choice. 
> But it should really be documented. I will bring this up at the Open 
> Power ISA working group meeting next week and see what the options are 
> with getting it formally allocated for semihosting emulators (or what 
> the alternatives are).

Update on the ISA TWG meeting

Semihosting was well received, the idea is not so new so I think it was
easily understood by attendees.

There were no objections to allocating a new opcode for this purpose.
The preference was a new opcode rather than using a reserved sc LEV
value.

The primary opcode 0 space is possibly unsuitable because it is said
to be "allocated to specific purposes that are outside the scope of the
Power ISA." whereas I think we want a first class instruction for this,
it may have implementation-dependent behaviour but on processors that
do not implement it, we would like it to have well-defined behaviour.

So we can probably just pick an opcode and submit a patch RFC to the
ISA (I can try help with that). First, there are a few questions to
resolve:

- What behaviour do we want for CPUs which do not implement it or
  disable it? E.g., no-op or illegal instruction interrupt. Ideally
  we would choose an opcode such that the architecture is compatible
  with existing CPUs.

- Would it be useful for KVM to implement semihosting support for
  guests on hard processors?

- Is there value in an endian-agnostic instruction? (Assuming we can
  find one). This question only comes to me because our BMC gdbserver
  for debugging the host CPUs implements breakpoints by inserting an
  'attn' instruction in the host code, and that does not work if the
  host switches endian. Any possibility the semihosting instruction
  would ever be injected out-of-band? Seems not so likely.

There were also some thoughts about bringing the semihosting spec
under the Open Power group but that's outside the scope of the ISA
group. This may be a possibility we could consider but I think for
now it should be enough to document it like riscv and put it
somewhere (even in the QEMU tree should be okay for now IMO).

Thanks,
Nick



[PATCH qemu v11 15/15] target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail agnostic behavior

2022-04-27 Thread ~eopxd
From: eopXD 

According to v-spec, tail agnostic behavior can be either kept as
undisturbed or set elements' bits to all 1s. To distinguish the
difference of tail policies, QEMU should be able to simulate the tail
agnostic behavior as "set tail elements' bits to all 1s".

There are multiple possibility for agnostic elements according to
v-spec. The main intent of this patch-set tries to add option that
can distinguish between tail policies. Setting agnostic elements to
all 1s allows QEMU to express this.

This commit adds option 'rvv_ta_all_1s' is added to enable the
behavior, it is default as disabled.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/cpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ddda4906ff..cd4cf4b41e 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -810,6 +810,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
 
 DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
+DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.34.2



[PATCH qemu v11 13/15] target/riscv: rvv: Add tail agnostic for vector mask instructions

2022-04-27 Thread ~eopxd
From: eopXD 

The tail elements in the destination mask register are updated under
a tail-agnostic policy.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/insn_trans/trans_rvv.c.inc |  6 +
 target/riscv/vector_helper.c| 30 +
 2 files changed, 36 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index a267945267..ab9f876c28 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3209,6 +3209,8 @@ static bool trans_##NAME(DisasContext *s, arg_r *a)   
 \
 tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = \
+FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1),\
vreg_ofs(s, a->rs2), cpu_env,   \
@@ -3313,6 +3315,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) 
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = \
+FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \
vreg_ofs(s, 0), vreg_ofs(s, a->rs2),\
cpu_env, s->cfg_ptr->vlen / 8,  \
@@ -3350,6 +3354,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m *a)
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 static gen_helper_gvec_3_ptr * const fns[4] = {
 gen_helper_viota_m_b, gen_helper_viota_m_h,
 gen_helper_viota_m_w, gen_helper_viota_m_d,
@@ -3379,6 +3384,7 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a)
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 static gen_helper_gvec_2_ptr * const fns[4] = {
 gen_helper_vid_v_b, gen_helper_vid_v_h,
 gen_helper_vid_v_w, gen_helper_vid_v_d,
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index e0fd0e62b3..9a663a406d 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4697,6 +4697,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
\
   uint32_t desc)  \
 { \
 uint32_t vl = env->vl;\
+uint32_t total_elems = env_archcpu(env)->cfg.vlen;\
+uint32_t vta_all_1s = vext_vta_all_1s(desc);  \
 uint32_t i;   \
 int a, b; \
   \
@@ -4706,6 +4708,15 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, 
 \
 vext_set_elem_mask(vd, i, OP(b, a));  \
 } \
 env->vstart = 0;  \
+/* mask destination register are always tail- \
+ * agnostic   \
+ */   \
+/* set tail elements to 1s */ \
+if (vta_all_1s) { \
+for (; i < total_elems; i++) {\
+vext_set_elem_mask(vd, i, 1); \
+} \
+} \
 }
 
 #define DO_NAND(N, M)  (!(N & M))
@@ -4773,6 +4784,8 @@ static void vmsetm(void *vd, void *v0, void *vs2, 
CPURISCVState *env,
 {
 uint32_t vm = vext_vm(desc);
 uint32_t vl = env->vl;
+uint32_t total_elems = env_archcpu(env)->cfg.vlen;
+uint32_t vta_all_1s = vext_vta_all_1s(desc);
 int i;
 bool first_mask_bit = false;
 
@@ -4801,6 +4814,13 @@ static void vmsetm(void *vd, void *v0, void *vs2, 
CPURISCVState *env,
 }
 }
 env->vstart = 0;
+/* mask destination register are always tail-agnostic */
+/* set tail elements to 1s */
+if (vta_all_1s) {
+for (; i < total_elems; i++) {
+vext_set_elem_mask(vd, i, 1);
+}
+}
 }
 
 void HELPER(vmsbf_m)(void 

[PATCH qemu v11 11/15] target/riscv: rvv: Add tail agnostic for vector floating-point instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Compares write mask registers, and so always operate under a tail-
agnostic policy.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/insn_trans/trans_rvv.c.inc |  15 +
 target/riscv/vector_helper.c| 443 +---
 2 files changed, 262 insertions(+), 196 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 46ee673040..a267945267 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2404,6 +2404,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = FIELD_DP32(data, VDATA, VTA, s->vta);   \
+data = \
+FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1),\
vreg_ofs(s, a->rs2), cpu_env,   \
@@ -2486,6 +2489,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
\
 gen_set_rm(s, RISCV_FRM_DYN); \
 data = FIELD_DP32(data, VDATA, VM, a->vm);\
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);\
+data = FIELD_DP32(data, VDATA, VTA, s->vta);  \
+data = FIELD_DP32(data, VDATA, VTA_ALL_1S,\
+  s->cfg_vta_all_1s); \
 return opfvf_trans(a->rd, a->rs1, a->rs2, data,   \
fns[s->sew - 1], s);   \
 } \
@@ -2524,6 +2530,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
  \
 data = FIELD_DP32(data, VDATA, VM, a->vm);   \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);   \
+data = FIELD_DP32(data, VDATA, VTA, s->vta); \
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),   \
vreg_ofs(s, a->rs1),  \
vreg_ofs(s, a->rs2), cpu_env, \
@@ -2563,6 +2570,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
 gen_set_rm(s, RISCV_FRM_DYN);\
 data = FIELD_DP32(data, VDATA, VM, a->vm);   \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);   \
+data = FIELD_DP32(data, VDATA, VTA, s->vta); \
 return opfvf_trans(a->rd, a->rs1, a->rs2, data,  \
fns[s->sew - 1], s);  \
 }\
@@ -2599,6 +2607,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = FIELD_DP32(data, VDATA, VTA, s->vta);   \
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1),\
vreg_ofs(s, a->rs2), cpu_env,   \
@@ -2638,6 +2647,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
 gen_set_rm(s, RISCV_FRM_DYN);\
 data = FIELD_DP32(data, VDATA, VM, a->vm);   \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);   \
+data = FIELD_DP32(data, VDATA, VTA, s->vta); \
 return opfvf_trans(a->rd, a->rs1, a->rs2, data,  \
fns[s->sew - 1], s);  \
 }\
@@ -2721,6 +2731,7 @@ static bool do_opfv(DisasContext *s, arg_rmr *a,
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs2), cpu_env,
s->cfg_ptr->vlen / 8,
@@ -2935,6 +2946,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) 
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \

[PATCH qemu v11 06/15] target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions

2022-04-27 Thread ~eopxd
From: eopXD 

`vmadc` and `vmsbc` produces a mask value, they always operate with
a tail agnostic policy.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/insn_trans/trans_rvv.c.inc |  29 +++
 target/riscv/internals.h|   5 +-
 target/riscv/vector_helper.c| 317 +---
 3 files changed, 211 insertions(+), 140 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 99691f1b9f..d15858fc6f 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1299,6 +1299,8 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, 
uint32_t vs2, uint32_t vm,
 
 data = FIELD_DP32(data, VDATA, VM, vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
+data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
   s->cfg_ptr->vlen / 8, data));
 
@@ -1335,6 +1337,16 @@ do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn 
*gvec_fn,
 }
 
 if (a->vm && s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 TCGv_i64 src1 = tcg_temp_new_i64();
 
 tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN));
@@ -1464,6 +1476,8 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, 
uint32_t vs2, uint32_t vm,
 
 data = FIELD_DP32(data, VDATA, VM, vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
+data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);
 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlen / 8,
   s->cfg_ptr->vlen / 8, data));
 
@@ -1493,6 +1507,16 @@ do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn 
*gvec_fn,
 }
 
 if (a->vm && s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2),
 extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s));
 mark_vs_dirty(s);
@@ -1546,6 +1570,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1),
vreg_ofs(s, a->rs2),
@@ -1627,6 +1652,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1),
vreg_ofs(s, a->rs2),
@@ -1705,6 +1731,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = FIELD_DP32(data, VDATA, VTA, s->vta);   \
+data = \
+FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1),\
vreg_ofs(s, a->rs2), cpu_env,   \
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 512c6c30cf..193ce57a6d 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -25,8 +25,9 @@
 FIELD(VDATA, VM, 0, 1)
 FIELD(VDATA, LMUL, 1, 3)
 FIELD(VDATA, VTA, 4, 1)
-FIELD(VDATA, NF, 5, 4)
-FIELD(VDATA, WD, 5, 1)
+FIELD(VDATA, VTA_ALL_1S, 5, 1)
+FIELD(VDATA, NF, 6, 4)
+FIELD(VDATA, WD, 6, 1)
 
 /* float point classify helpers */
 target_ulong fclass_h(uint64_t frs1);
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index d186bf9512..4ecdf955f3 100644
--- a/target/riscv/vector_helpe

[PATCH qemu v11 12/15] target/riscv: rvv: Add tail agnostic for vector reduction instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/vector_helper.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 21e20d47e5..e0fd0e62b3 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4515,6 +4515,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
\
 { \
 uint32_t vm = vext_vm(desc);  \
 uint32_t vl = env->vl;\
+uint32_t esz = sizeof(TD);\
+uint32_t vlenb = simd_maxsz(desc);\
+uint32_t vta = vext_vta(desc);\
 uint32_t i;   \
 TD s1 =  *((TD *)vs1 + HD(0));\
   \
@@ -4527,6 +4530,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
\
 } \
 *((TD *)vd + HD(0)) = s1; \
 env->vstart = 0;  \
+/* set tail elements to 1s */ \
+vext_set_elems_1s(vd, vta, esz, vlenb);   \
 }
 
 /* vd[0] = sum(vs1[0], vs2[*]) */
@@ -4596,6 +4601,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
 \
 {  \
 uint32_t vm = vext_vm(desc);   \
 uint32_t vl = env->vl; \
+uint32_t esz = sizeof(TD); \
+uint32_t vlenb = simd_maxsz(desc); \
+uint32_t vta = vext_vta(desc); \
 uint32_t i;\
 TD s1 =  *((TD *)vs1 + HD(0)); \
\
@@ -4608,6 +4616,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
 \
 }  \
 *((TD *)vd + HD(0)) = s1;  \
 env->vstart = 0;   \
+/* set tail elements to 1s */  \
+vext_set_elems_1s(vd, vta, esz, vlenb);\
 }
 
 /* Unordered sum */
@@ -4632,6 +4642,9 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
 {
 uint32_t vm = vext_vm(desc);
 uint32_t vl = env->vl;
+uint32_t esz = sizeof(uint32_t);
+uint32_t vlenb = simd_maxsz(desc);
+uint32_t vta = vext_vta(desc);
 uint32_t i;
 uint32_t s1 =  *((uint32_t *)vs1 + H4(0));
 
@@ -4645,6 +4658,8 @@ void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
 }
 *((uint32_t *)vd + H4(0)) = s1;
 env->vstart = 0;
+/* set tail elements to 1s */
+vext_set_elems_1s(vd, vta, esz, vlenb);
 }
 
 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
@@ -4652,6 +4667,9 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
 {
 uint32_t vm = vext_vm(desc);
 uint32_t vl = env->vl;
+uint32_t esz = sizeof(uint64_t);
+uint32_t vlenb = simd_maxsz(desc);
+uint32_t vta = vext_vta(desc);
 uint32_t i;
 uint64_t s1 =  *((uint64_t *)vs1);
 
@@ -4665,6 +4683,8 @@ void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
 }
 *((uint64_t *)vd) = s1;
 env->vstart = 0;
+/* set tail elements to 1s */
+vext_set_elems_1s(vd, vta, esz, vlenb);
 }
 
 /*
-- 
2.34.2




[PATCH qemu v11 09/15] target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/insn_trans/trans_rvv.c.inc | 44 +
 target/riscv/vector_helper.c| 20 +++
 2 files changed, 64 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 430847b0f9..46ee673040 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2123,11 +2123,22 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v 
*a)
 /* vmv.v.v has rs2 = 0 and vm = 1 */
 vext_check_sss(s, a->rd, a->rs1, 0, 1)) {
 if (s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd),
  vreg_ofs(s, a->rs1),
  MAXSZ(s), MAXSZ(s));
 } else {
 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 static gen_helper_gvec_2_ptr * const fns[4] = {
 gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h,
 gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d,
@@ -2163,6 +2174,16 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x 
*a)
 s1 = get_gpr(s, a->rs1, EXT_SIGN);
 
 if (s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
 MAXSZ(s), MAXSZ(s), s1);
 } else {
@@ -2170,6 +2191,7 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a)
 TCGv_i64 s1_i64 = tcg_temp_new_i64();
 TCGv_ptr dest = tcg_temp_new_ptr();
 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 static gen_helper_vmv_vx * const fns[4] = {
 gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
@@ -2200,6 +,16 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i 
*a)
 vext_check_ss(s, a->rd, 0, 1)) {
 int64_t simm = sextract64(a->rs1, 0, 5);
 if (s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd),
  MAXSZ(s), MAXSZ(s), simm);
 mark_vs_dirty(s);
@@ -2208,6 +2240,7 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a)
 TCGv_i64 s1;
 TCGv_ptr dest;
 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 static gen_helper_vmv_vx * const fns[4] = {
 gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h,
 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d,
@@ -2780,6 +2813,16 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f 
*a)
 TCGv_i64 t1;
 
 if (s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 t1 = tcg_temp_new_i64();
 /* NaN-box f[rs1] */
 do_nanbox(s, t1, cpu_fpr[a->rs1]);
@@ -2791,6 +2834,7 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f 
*a)
 TCGv_ptr dest;
 TCGv_i32 desc;
 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s-

[PATCH qemu v11 08/15] target/riscv: rvv: Add tail agnostic for vector integer comparison instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Compares write mask registers, and so always operate under a tail-
agnostic policy.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/vector_helper.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 8755671449..6356b6b0ef 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1345,6 +1345,10 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void 
*vs2,   \
 { \
 uint32_t vm = vext_vm(desc);  \
 uint32_t vl = env->vl;\
+uint32_t esz = sizeof(ETYPE); \
+uint32_t total_elems =\
+vext_get_total_elems(env, desc, esz); \
+uint32_t vta_all_1s = vext_vta_all_1s(desc);  \
 uint32_t i;   \
   \
 for (i = env->vstart; i < vl; i++) {  \
@@ -1356,6 +1360,13 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void 
*vs2,   \
 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
 } \
 env->vstart = 0;  \
+/* mask destination register are always tail-agnostic */  \
+/* set tail elements to 1s */ \
+if (vta_all_1s) { \
+for (; i < total_elems; i++) {\
+vext_set_elem_mask(vd, i, 1); \
+} \
+} \
 }
 
 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t,  H1, DO_MSEQ)
@@ -1394,6 +1405,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2,   \
 {   \
 uint32_t vm = vext_vm(desc);\
 uint32_t vl = env->vl;  \
+uint32_t esz = sizeof(ETYPE);   \
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);\
+uint32_t vta_all_1s = vext_vta_all_1s(desc);\
 uint32_t i; \
 \
 for (i = env->vstart; i < vl; i++) {\
@@ -1405,6 +1419,13 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2,   \
 DO_OP(s2, (ETYPE)(target_long)s1)); \
 }   \
 env->vstart = 0;\
+/* mask destination register are always tail-agnostic */\
+/* set tail elements to 1s */   \
+if (vta_all_1s) {   \
+for (; i < total_elems; i++) {  \
+vext_set_elem_mask(vd, i, 1);   \
+}   \
+}   \
 }
 
 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t,  H1, DO_MSEQ)
-- 
2.34.2




[PATCH qemu v11 07/15] target/riscv: rvv: Add tail agnostic for vector integer shift instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/insn_trans/trans_rvv.c.inc | 11 +++
 target/riscv/vector_helper.c| 11 +++
 2 files changed, 22 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index d15858fc6f..430847b0f9 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1858,6 +1858,16 @@ do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, 
GVecGen2sFn32 *gvec_fn,
 }
 
 if (a->vm && s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 TCGv_i32 src1 = tcg_temp_new_i32();
 
 tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE));
@@ -1916,6 +1926,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+data = FIELD_DP32(data, VDATA, VTA, s->vta);   \
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \
vreg_ofs(s, a->rs1),\
vreg_ofs(s, a->rs2), cpu_env,   \
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 4ecdf955f3..8755671449 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -1245,6 +1245,9 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
\
 { \
 uint32_t vm = vext_vm(desc);  \
 uint32_t vl = env->vl;\
+uint32_t esz = sizeof(TS1);   \
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);  \
+uint32_t vta = vext_vta(desc);\
 uint32_t i;   \
   \
 for (i = env->vstart; i < vl; i++) {  \
@@ -1256,6 +1259,8 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1,  
\
 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK);\
 } \
 env->vstart = 0;  \
+/* set tail elements to 1s */ \
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
 }
 
 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t,  uint8_t, H1, H1, DO_SLL, 0x7)
@@ -1280,6 +1285,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1,   
   \
 {   \
 uint32_t vm = vext_vm(desc);\
 uint32_t vl = env->vl;  \
+uint32_t esz = sizeof(TD);  \
+uint32_t total_elems =  \
+vext_get_total_elems(env, desc, esz);   \
+uint32_t vta = vext_vta(desc);  \
 uint32_t i; \
 \
 for (i = env->vstart; i < vl; i++) {\
@@ -1290,6 +1299,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1,
  \
 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK);\
 }   \
 env->vstart = 0;\
+/* set tail elements to 1s */   \
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
 }
 
 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
-- 
2.34.2




[PATCH qemu v11 10/15] target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/vector_helper.c | 220 ++-
 1 file changed, 114 insertions(+), 106 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index f7e36b0564..9d66cd1a83 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -2080,10 +2080,12 @@ static inline void
 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
  CPURISCVState *env,
  uint32_t desc,
- opivv2_rm_fn *fn)
+ opivv2_rm_fn *fn, uint32_t esz)
 {
 uint32_t vm = vext_vm(desc);
 uint32_t vl = env->vl;
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+uint32_t vta = vext_vta(desc);
 
 switch (env->vxrm) {
 case 0: /* rnu */
@@ -2103,15 +2105,17 @@ vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
  env, vl, vm, 3, fn);
 break;
 }
+/* set tail elements to 1s */
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
 }
 
 /* generate helpers for fixed point instructions with OPIVV format */
-#define GEN_VEXT_VV_RM(NAME)\
+#define GEN_VEXT_VV_RM(NAME, ESZ)   \
 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
   CPURISCVState *env, uint32_t desc)\
 {   \
 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc,   \
- do_##NAME);\
+ do_##NAME, ESZ);   \
 }
 
 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t 
b)
@@ -2161,10 +2165,10 @@ RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, 
saddu8)
 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
-GEN_VEXT_VV_RM(vsaddu_vv_b)
-GEN_VEXT_VV_RM(vsaddu_vv_h)
-GEN_VEXT_VV_RM(vsaddu_vv_w)
-GEN_VEXT_VV_RM(vsaddu_vv_d)
+GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
+GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
+GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
+GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
 
 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
   CPURISCVState *env, int vxrm);
@@ -2197,10 +2201,12 @@ static inline void
 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
  CPURISCVState *env,
  uint32_t desc,
- opivx2_rm_fn *fn)
+ opivx2_rm_fn *fn, uint32_t esz)
 {
 uint32_t vm = vext_vm(desc);
 uint32_t vl = env->vl;
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);
+uint32_t vta = vext_vta(desc);
 
 switch (env->vxrm) {
 case 0: /* rnu */
@@ -2220,25 +2226,27 @@ vext_vx_rm_2(void *vd, void *v0, target_long s1, void 
*vs2,
  env, vl, vm, 3, fn);
 break;
 }
+/* set tail elements to 1s */
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
 }
 
 /* generate helpers for fixed point instructions with OPIVX format */
-#define GEN_VEXT_VX_RM(NAME)  \
+#define GEN_VEXT_VX_RM(NAME, ESZ) \
 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,\
 void *vs2, CPURISCVState *env, uint32_t desc) \
 { \
 vext_vx_rm_2(vd, v0, s1, vs2, env, desc,  \
- do_##NAME);  \
+ do_##NAME, ESZ); \
 }
 
 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
-GEN_VEXT_VX_RM(vsaddu_vx_b)
-GEN_VEXT_VX_RM(vsaddu_vx_h)
-GEN_VEXT_VX_RM(vsaddu_vx_w)
-GEN_VEXT_VX_RM(vsaddu_vx_d)
+GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
+GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
+GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
+GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
 
 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
 {
@@ -2284,19 +2292,19 @@ RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, 
sadd8)
 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
-GEN_VEXT_VV_RM(vsadd_vv_b)
-GEN_VEXT_VV_RM(vsadd_vv_h)
-GEN_VEXT_VV_RM(vsadd_vv_w)
-GEN_VEXT_VV_RM(vsadd_vv_d)
+GEN_VEXT_VV_RM(vsadd_vv_b, 1)
+GEN_VEXT_VV_RM(vsadd_vv_h, 2)
+GEN_VEXT_VV_RM(vsadd_vv_w, 4)
+GEN_VEXT_VV_RM(vsadd_vv_d, 8)
 
 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
 RVVCALL(OPIVX2_RM, 

[PATCH qemu v11 05/15] target/riscv: rvv: Add tail agnostic for vector load / store instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Destination register of unit-stride mask load and store instructions are
always written with a tail-agnostic policy.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/insn_trans/trans_rvv.c.inc | 11 ++
 target/riscv/translate.c|  2 ++
 target/riscv/vector_helper.c| 28 +
 3 files changed, 41 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index cc80bf00ff..99691f1b9f 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -711,6 +711,7 @@ static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
 }
 
@@ -748,6 +749,7 @@ static bool st_us_op(DisasContext *s, arg_r2nfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
 }
 
@@ -774,6 +776,8 @@ static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, 
uint8_t eew)
 /* EMUL = 1, NFIELDS = 1 */
 data = FIELD_DP32(data, VDATA, LMUL, 0);
 data = FIELD_DP32(data, VDATA, NF, 1);
+/* Mask destination register are always tail-agnostic */
+data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s);
 return ldst_us_trans(a->rd, a->rs1, data, fn, s, false);
 }
 
@@ -791,6 +795,8 @@ static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, 
uint8_t eew)
 /* EMUL = 1, NFIELDS = 1 */
 data = FIELD_DP32(data, VDATA, LMUL, 0);
 data = FIELD_DP32(data, VDATA, NF, 1);
+/* Mask destination register are always tail-agnostic */
+data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s);
 return ldst_us_trans(a->rd, a->rs1, data, fn, s, true);
 }
 
@@ -862,6 +868,7 @@ static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
 }
 
@@ -891,6 +898,7 @@ static bool st_stride_op(DisasContext *s, arg_rnfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 fn = fns[eew];
 if (fn == NULL) {
 return false;
@@ -991,6 +999,7 @@ static bool ld_index_op(DisasContext *s, arg_rnfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, false);
 }
 
@@ -1043,6 +1052,7 @@ static bool st_index_op(DisasContext *s, arg_rnfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s, true);
 }
 
@@ -1108,6 +1118,7 @@ static bool ldff_op(DisasContext *s, arg_r2nfvm *a, 
uint8_t eew)
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, emul);
 data = FIELD_DP32(data, VDATA, NF, a->nf);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 return ldff_trans(a->rd, a->rs1, data, fn, s);
 }
 
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 7775dade26..58cbb6ded3 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -95,6 +95,7 @@ typedef struct DisasContext {
 int8_t lmul;
 uint8_t sew;
 uint8_t vta;
+bool cfg_vta_all_1s;
 target_ulong vstart;
 bool vl_eq_vlmax;
 uint8_t ntemp;
@@ -1085,6 +1086,7 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
 ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
 ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
+ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s;
 ctx->vstart = env->vstart;
 ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
 ctx->misa_mxl_max = env->misa_mxl_max;
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 396e252179..d186bf9512 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -270,6 +270,8 @@ vext

[PATCH qemu v11 14/15] target/riscv: rvv: Add tail agnostic for vector permutation instructions

2022-04-27 Thread ~eopxd
From: eopXD 

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/insn_trans/trans_rvv.c.inc | 22 ++
 target/riscv/vector_helper.c| 40 +
 2 files changed, 62 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index ab9f876c28..2c9993844a 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3744,6 +3744,16 @@ static bool trans_vrgather_vx(DisasContext *s, arg_rmrr 
*a)
 }
 
 if (a->vm && s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 int scale = s->lmul - (s->sew + 3);
 int vlmax = scale < 0 ?
s->cfg_ptr->vlen >> -scale : s->cfg_ptr->vlen << scale;
@@ -3777,6 +3787,16 @@ static bool trans_vrgather_vi(DisasContext *s, arg_rmrr 
*a)
 }
 
 if (a->vm && s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 int scale = s->lmul - (s->sew + 3);
 int vlmax = scale < 0 ?
s->cfg_ptr->vlen >> -scale : s->cfg_ptr->vlen << scale;
@@ -3829,6 +3849,7 @@ static bool trans_vcompress_vm(DisasContext *s, arg_r *a)
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
 
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
cpu_env, s->cfg_ptr->vlen / 8,
@@ -3934,6 +3955,7 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, 
uint8_t seq)
 }
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 
 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs2), cpu_env,
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 9a663a406d..b6be95a7e4 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -4911,6 +4911,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2, \
 { \
 uint32_t vm = vext_vm(desc);  \
 uint32_t vl = env->vl;\
+uint32_t esz = sizeof(ETYPE); \
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);  \
+uint32_t vta = vext_vta(desc);\
 target_ulong offset = s1, i_min, i;   \
   \
 i_min = MAX(env->vstart, offset); \
@@ -4920,6 +4923,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2, \
 } \
 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset));  \
 } \
+/* set tail elements to 1s */ \
+vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
 }
 
 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
@@ -4935,6 +4940,9 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2, \
 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE)));   \
 uint32_t vm = vext_vm(desc);  \
 uint32_t vl = env->vl;\
+uint32_t esz = sizeof(ETYPE); \
+uint32_t total_elems = vext_get_total_elems(env, desc, esz);  \
+uint32_t vta = vext_vta(desc);\
 target_ulong i_max, i;\
   \
 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart);   \
@@ -4951,6 +4959,8 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, 
void *vs2,  

[PATCH qemu v11 01/15] target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed

2022-04-27 Thread ~eopxd
From: eopXD 

No functional change intended in this commit.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
Reviewed-by: Alistair Francis 
---
 target/riscv/vector_helper.c | 1132 +-
 1 file changed, 565 insertions(+), 567 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 3bd4aac9c9..e94caf1a3c 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -710,7 +710,6 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
 
 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
CPURISCVState *env, uint32_t desc,
-   uint32_t esz, uint32_t dsz,
opivv2_fn *fn)
 {
 uint32_t vm = vext_vm(desc);
@@ -727,23 +726,23 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, 
void *vs2,
 }
 
 /* generate the helpers for OPIVV */
-#define GEN_VEXT_VV(NAME, ESZ, DSZ)   \
+#define GEN_VEXT_VV(NAME) \
 void HELPER(NAME)(void *vd, void *v0, void *vs1,  \
   void *vs2, CPURISCVState *env,  \
   uint32_t desc)  \
 { \
-do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
+do_vext_vv(vd, v0, vs1, vs2, env, desc,   \
do_##NAME);\
 }
 
-GEN_VEXT_VV(vadd_vv_b, 1, 1)
-GEN_VEXT_VV(vadd_vv_h, 2, 2)
-GEN_VEXT_VV(vadd_vv_w, 4, 4)
-GEN_VEXT_VV(vadd_vv_d, 8, 8)
-GEN_VEXT_VV(vsub_vv_b, 1, 1)
-GEN_VEXT_VV(vsub_vv_h, 2, 2)
-GEN_VEXT_VV(vsub_vv_w, 4, 4)
-GEN_VEXT_VV(vsub_vv_d, 8, 8)
+GEN_VEXT_VV(vadd_vv_b)
+GEN_VEXT_VV(vadd_vv_h)
+GEN_VEXT_VV(vadd_vv_w)
+GEN_VEXT_VV(vadd_vv_d)
+GEN_VEXT_VV(vsub_vv_b)
+GEN_VEXT_VV(vsub_vv_h)
+GEN_VEXT_VV(vsub_vv_w)
+GEN_VEXT_VV(vsub_vv_d)
 
 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
 
@@ -773,7 +772,6 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
 
 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
CPURISCVState *env, uint32_t desc,
-   uint32_t esz, uint32_t dsz,
opivx2_fn fn)
 {
 uint32_t vm = vext_vm(desc);
@@ -790,27 +788,27 @@ static void do_vext_vx(void *vd, void *v0, target_long 
s1, void *vs2,
 }
 
 /* generate the helpers for OPIVX */
-#define GEN_VEXT_VX(NAME, ESZ, DSZ)   \
+#define GEN_VEXT_VX(NAME) \
 void HELPER(NAME)(void *vd, void *v0, target_ulong s1,\
   void *vs2, CPURISCVState *env,  \
   uint32_t desc)  \
 { \
-do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,  \
+do_vext_vx(vd, v0, s1, vs2, env, desc,\
do_##NAME);\
 }
 
-GEN_VEXT_VX(vadd_vx_b, 1, 1)
-GEN_VEXT_VX(vadd_vx_h, 2, 2)
-GEN_VEXT_VX(vadd_vx_w, 4, 4)
-GEN_VEXT_VX(vadd_vx_d, 8, 8)
-GEN_VEXT_VX(vsub_vx_b, 1, 1)
-GEN_VEXT_VX(vsub_vx_h, 2, 2)
-GEN_VEXT_VX(vsub_vx_w, 4, 4)
-GEN_VEXT_VX(vsub_vx_d, 8, 8)
-GEN_VEXT_VX(vrsub_vx_b, 1, 1)
-GEN_VEXT_VX(vrsub_vx_h, 2, 2)
-GEN_VEXT_VX(vrsub_vx_w, 4, 4)
-GEN_VEXT_VX(vrsub_vx_d, 8, 8)
+GEN_VEXT_VX(vadd_vx_b)
+GEN_VEXT_VX(vadd_vx_h)
+GEN_VEXT_VX(vadd_vx_w)
+GEN_VEXT_VX(vadd_vx_d)
+GEN_VEXT_VX(vsub_vx_b)
+GEN_VEXT_VX(vsub_vx_h)
+GEN_VEXT_VX(vsub_vx_w)
+GEN_VEXT_VX(vsub_vx_d)
+GEN_VEXT_VX(vrsub_vx_b)
+GEN_VEXT_VX(vrsub_vx_h)
+GEN_VEXT_VX(vrsub_vx_w)
+GEN_VEXT_VX(vrsub_vx_d)
 
 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
 {
@@ -889,30 +887,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, 
DO_ADD)
 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
-GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
-GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
-GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
-GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
-GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
-GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
-GEN_VEXT_VV(vwadd_vv_b, 1, 2)
-GEN_VEXT_VV(vwadd_vv_h, 2, 4)
-GEN_VEXT_VV(vwadd_vv_w, 4, 8)
-GEN_VEXT_VV(vwsub_vv_b, 1, 2)
-GEN_VEXT_VV(vwsub_vv_h, 2, 4)
-GEN_VEXT_VV(vwsub_vv_w, 4, 8)
-GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
-GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
-GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
-GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
-GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
-GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
-GEN_VEXT_VV(vwadd_wv_b, 1, 2)
-GEN_VEXT_VV(vwadd_wv_h, 2, 4)
-GEN_VEXT_VV(vwadd_wv_w, 4, 8)
-GEN_VEXT_VV(vwsub_wv_b, 1, 2)
-GEN_VEXT_VV(vwsub_wv_h, 2, 4)
-GEN_VEXT_VV(vwsub_wv_w, 4, 8)
+GEN_VEXT_VV(vwaddu_vv_b)
+GEN_VEXT_VV(vwaddu_vv_h)
+GEN_VEXT_VV(vwaddu_vv_w)
+GEN_VEXT_VV(vwsubu_vv_b)
+GEN_VEXT_VV(vwsubu_vv_h)
+GEN_VEXT_VV(vwsubu_vv_w)
+GEN_VEXT_VV(vwadd_vv_b)
+GEN_VEXT_VV(vwadd_vv_h)
+GEN_VEXT_VV(vwadd_

[PATCH qemu v11 04/15] target/riscv: rvv: Add tail agnostic for vv instructions

2022-04-27 Thread ~eopxd
From: eopXD 

According to v-spec, tail agnostic behavior can be either kept as
undisturbed or set elements' bits to all 1s. To distinguish the
difference of tail policies, QEMU should be able to simulate the tail
agnostic behavior as "set tail elements' bits to all 1s".

There are multiple possibility for agnostic elements according to
v-spec. The main intent of this patch-set tries to add option that
can distinguish between tail policies. Setting agnostic elements to
all 1s allows QEMU to express this.

This is the first commit regarding the optional tail agnostic
behavior. Follow-up commits will add this optional behavior
for all rvv instructions.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
---
 target/riscv/cpu.h  |   2 +
 target/riscv/cpu_helper.c   |   2 +
 target/riscv/insn_trans/trans_rvv.c.inc |  11 +
 target/riscv/internals.h|   5 +-
 target/riscv/translate.c|   2 +
 target/riscv/vector_helper.c| 296 +---
 6 files changed, 187 insertions(+), 131 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index c069fe85fa..8c4a79b5a0 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -369,6 +369,7 @@ struct RISCVCPUConfig {
 bool ext_zhinxmin;
 bool ext_zve32f;
 bool ext_zve64f;
+bool rvv_ta_all_1s;
 
 /* Vendor-specific custom extensions */
 bool ext_XVentanaCondOps;
@@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
 /* If PointerMasking should be applied */
 FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
 FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
+FIELD(TB_FLAGS, VTA, 24, 1)
 
 #ifdef TARGET_RISCV32
 #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 1c60fb2e80..2941c88c31 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
*pc,
 flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
 FIELD_EX64(env->vtype, VTYPE, VLMUL));
 flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
+flags = FIELD_DP32(flags, TB_FLAGS, VTA,
+FIELD_EX64(env->vtype, VTYPE, VTA));
 } else {
 flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
 }
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 57953923d5..cc80bf00ff 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1223,6 +1223,16 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
*gvec_fn,
 tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 if (a->vm && s->vl_eq_vlmax) {
+if (s->vta && s->lmul < 0) {
+/*
+ * tail elements may pass vlmax when lmul < 0
+ * set tail elements to 1s
+ */
+uint32_t vlenb = s->cfg_ptr->vlen >> 3;
+tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
+ vreg_ofs(s, a->rd), -1,
+ vlenb, vlenb);
+}
 gvec_fn(s->sew, vreg_ofs(s, a->rd),
 vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
 MAXSZ(s), MAXSZ(s));
@@ -1231,6 +1241,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
*gvec_fn,
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
+data = FIELD_DP32(data, VDATA, VTA, s->vta);
 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
cpu_env, s->cfg_ptr->vlen / 8,
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index dbb322bfa7..512c6c30cf 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -24,8 +24,9 @@
 /* share data between vector helpers and decode code */
 FIELD(VDATA, VM, 0, 1)
 FIELD(VDATA, LMUL, 1, 3)
-FIELD(VDATA, NF, 4, 4)
-FIELD(VDATA, WD, 4, 1)
+FIELD(VDATA, VTA, 4, 1)
+FIELD(VDATA, NF, 5, 4)
+FIELD(VDATA, WD, 5, 1)
 
 /* float point classify helpers */
 target_ulong fclass_h(uint64_t frs1);
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index fac998a6b5..7775dade26 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -94,6 +94,7 @@ typedef struct DisasContext {
  */
 int8_t lmul;
 uint8_t sew;
+uint8_t vta;
 target_ulong vstart;
 bool vl_eq_vlmax;
 uint8_t ntemp;
@@ -1083,6 +1084,7 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
 ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
 ctx->lmul = sextract32(FIELD_EX32(tb_flags, TB_FLAGS, LMUL), 0, 3);
+ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
 ctx->vstart = env->vstart;
 ctx->vl_eq_vlmax

[PATCH qemu v11 02/15] target/riscv: rvv: Rename ambiguous esz

2022-04-27 Thread ~eopxd
From: eopXD 

No functional change intended in this commit.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
Reviewed-by: Alistair Francis 
---
 target/riscv/vector_helper.c | 76 ++--
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index e94caf1a3c..d0452a7756 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -125,9 +125,9 @@ static inline int32_t vext_lmul(uint32_t desc)
 /*
  * Get the maximum number of elements can be operated.
  *
- * esz: log2 of element size in bytes.
+ * log2_esz: log2 of element size in bytes.
  */
-static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
+static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
 {
 /*
  * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
@@ -136,7 +136,7 @@ static inline uint32_t vext_max_elems(uint32_t desc, 
uint32_t esz)
 uint32_t vlenb = simd_maxsz(desc);
 
 /* Return VLMAX */
-int scale = vext_lmul(desc) - esz;
+int scale = vext_lmul(desc) - log2_esz;
 return scale < 0 ? vlenb >> -scale : vlenb << scale;
 }
 
@@ -231,11 +231,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
  target_ulong stride, CPURISCVState *env,
  uint32_t desc, uint32_t vm,
  vext_ldst_elem_fn *ldst_elem,
- uint32_t esz, uintptr_t ra, MMUAccessType access_type)
+ uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type)
 {
 uint32_t i, k;
 uint32_t nf = vext_nf(desc);
-uint32_t max_elems = vext_max_elems(desc, esz);
+uint32_t max_elems = vext_max_elems(desc, log2_esz);
 
 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
 if (!vm && !vext_elem_mask(v0, i)) {
@@ -244,7 +244,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
 
 k = 0;
 while (k < nf) {
-target_ulong addr = base + stride * i + (k << esz);
+target_ulong addr = base + stride * i + (k << log2_esz);
 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 k++;
 }
@@ -289,18 +289,18 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
 /* unmasked unit-stride load and store operation*/
 static void
 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
- vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
+ vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
  uintptr_t ra, MMUAccessType access_type)
 {
 uint32_t i, k;
 uint32_t nf = vext_nf(desc);
-uint32_t max_elems = vext_max_elems(desc, esz);
+uint32_t max_elems = vext_max_elems(desc, log2_esz);
 
 /* load bytes from guest memory */
 for (i = env->vstart; i < evl; i++, env->vstart++) {
 k = 0;
 while (k < nf) {
-target_ulong addr = base + ((i * nf + k) << esz);
+target_ulong addr = base + ((i * nf + k) << log2_esz);
 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 k++;
 }
@@ -399,12 +399,12 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
 void *vs2, CPURISCVState *env, uint32_t desc,
 vext_get_index_addr get_index_addr,
 vext_ldst_elem_fn *ldst_elem,
-uint32_t esz, uintptr_t ra, MMUAccessType access_type)
+uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type)
 {
 uint32_t i, k;
 uint32_t nf = vext_nf(desc);
 uint32_t vm = vext_vm(desc);
-uint32_t max_elems = vext_max_elems(desc, esz);
+uint32_t max_elems = vext_max_elems(desc, log2_esz);
 
 /* load bytes from guest memory */
 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
@@ -414,7 +414,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
 
 k = 0;
 while (k < nf) {
-abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
+abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
 k++;
 }
@@ -480,13 +480,13 @@ static inline void
 vext_ldff(void *vd, void *v0, target_ulong base,
   CPURISCVState *env, uint32_t desc,
   vext_ldst_elem_fn *ldst_elem,
-  uint32_t esz, uintptr_t ra)
+  uint32_t log2_esz, uintptr_t ra)
 {
 void *host;
 uint32_t i, k, vl = 0;
 uint32_t nf = vext_nf(desc);
 uint32_t vm = vext_vm(desc);
-uint32_t max_elems = vext_max_elems(desc, esz);
+uint32_t max_elems = vext_max_elems(desc, log2_esz);
 target_ulong addr, offset, remain;
 
 /* probe every access*/
@@ -494,12 +494,12 @@ vext_ldff(void *vd, void *v0, target_ulong base,
 if (!vm && !vext_elem_mask(v0, i)) {
 continue;
 }
-  

[PATCH qemu v11 00/15] Add tail agnostic behavior for rvv instructions

2022-04-27 Thread ~eopxd
According to v-spec, tail agnostic behavior can be either kept as
undisturbed or set elements' bits to all 1s. To distinguish the
difference of tail policies, QEMU should be able to simulate the tail
agnostic behavior as "set tail elements' bits to all 1s". An option
'rvv_ta_all_1s' is added to enable the behavior, it is default as
disabled.

There are multiple possibility for agnostic elements according to
v-spec. The main intent of this patch-set tries to add option that
can distinguish between tail policies. Setting agnostic elements to
all 1s makes things simple and allow QEMU to express this.

We may explore other possibility of agnostic behavior by adding
other options in the future. Please understand that this patch-set
is limited.

v2 updates:
- Addressed comments from Weiwei Li
- Added commit tail agnostic on load / store instructions (which
  I forgot to include into the patch-set)

v3 updates:
- Missed the very 1st commit, adding it back

v4 updates:
- Renamed vlmax to total_elems
- Deal with tail element when vl_eq_vlmax == true

v5 updates:
- Let `vext_get_total_elems` take `desc` and `esz`
- Utilize `simd_maxsz(desc)` to get `vlenb`
- Fix alignments to code

v6 updates:
- Fix `vext_get_total_elems`

v7 updates:
- Reuse `max_elems` for vector load / store helper functions. The
  translation sets desc's `lmul` to `min(1, lmul)`, making
  `vext_max_elems` equivalent to `vext_get_total_elems`.

v8 updates:
- Simplify `vext_set_elems_1s`, don't need `vext_set_elems_1s_fns`
- Fix `vext_get_total_elems`, it should derive upon EMUL instead
  of LMUL

v9 updates:
- Let instructions that is tail agnostic regardless of vta respect the
  option and not the vta.

v10 updates:
- Correct range to set element to 1s for load instructions

v11 updates:
- Separate addition of option 'rvv_ta_all_1s' as a new (last) commit
- Add description to show intent of the option in first commit for the
  optional tail agnostic behavior
- Tag WeiWei as Reviewed-by for all commits
- Tag Alistair as Reviewed-by for commit 01, 02
- Tag Alistair as Acked-by for commit 03

eopXD (15):
  target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed
  target/riscv: rvv: Rename ambiguous esz
  target/riscv: rvv: Early exit when vstart >= vl
  target/riscv: rvv: Add tail agnostic for vv instructions
  target/riscv: rvv: Add tail agnostic for vector load / store
instructions
  target/riscv: rvv: Add tail agnostic for vx, vvm, vxm instructions
  target/riscv: rvv: Add tail agnostic for vector integer shift
instructions
  target/riscv: rvv: Add tail agnostic for vector integer comparison
instructions
  target/riscv: rvv: Add tail agnostic for vector integer merge and move
instructions
  target/riscv: rvv: Add tail agnostic for vector fix-point arithmetic
instructions
  target/riscv: rvv: Add tail agnostic for vector floating-point
instructions
  target/riscv: rvv: Add tail agnostic for vector reduction instructions
  target/riscv: rvv: Add tail agnostic for vector mask instructions
  target/riscv: rvv: Add tail agnostic for vector permutation
instructions
  target/riscv: rvv: Add option 'rvv_ta_all_1s' to enable optional tail
agnostic behavior

 target/riscv/cpu.c  |1 +
 target/riscv/cpu.h  |2 +
 target/riscv/cpu_helper.c   |2 +
 target/riscv/insn_trans/trans_rvv.c.inc |  176 +++
 target/riscv/internals.h|6 +-
 target/riscv/translate.c|4 +
 target/riscv/vector_helper.c| 1536 ++-
 7 files changed, 1110 insertions(+), 617 deletions(-)

-- 
2.34.2



[PATCH qemu v11 03/15] target/riscv: rvv: Early exit when vstart >= vl

2022-04-27 Thread ~eopxd
From: eopXD 

According to v-spec (section 5.4):
When vstart ≥ vl, there are no body elements, and no elements are
updated in any destination vector register group, including that
no tail elements are updated with agnostic values.

vmsbf.m, vmsif.m, vmsof.m, viota.m, vcompress instructions themselves
require vstart to be zero. So they don't need the early exit.

Signed-off-by: eop Chen 
Reviewed-by: Frank Chang 
Reviewed-by: Weiwei Li
Acked-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 27 +
 1 file changed, 27 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 275fded6e4..57953923d5 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -652,6 +652,7 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, 
uint32_t data,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -818,6 +819,7 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, 
uint32_t rs2,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -925,6 +927,7 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, 
uint32_t vs2,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -1067,6 +1070,7 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, 
uint32_t data,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -1216,6 +1220,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
*gvec_fn,
 }
 
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 if (a->vm && s->vl_eq_vlmax) {
 gvec_fn(s->sew, vreg_ofs(s, a->rd),
@@ -1263,6 +1268,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, 
uint32_t vs2, uint32_t vm,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -1427,6 +1433,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, 
uint32_t vs2, uint32_t vm,
 
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 dest = tcg_temp_new_ptr();
 mask = tcg_temp_new_ptr();
@@ -1513,6 +1520,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
 uint32_t data = 0;
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
@@ -1593,6 +1601,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
 uint32_t data = 0;
 TCGLabel *over = gen_new_label();
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
 
 data = FIELD_DP32(data, VDATA, VM, a->vm);
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
@@ -1670,6 +1679,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
 }; \
 TCGLabel *over = gen_new_label();  \
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);  \
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
@@ -1851,6 +1861,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
 \
 }; \
 TCGLabel *over = gen_new_label();  \
 tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);  \
+tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
\
 data = FIELD_DP32(data, VDATA, VM, a->vm); \
 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
@@ -2061,6 +2072,7 @@ static bool trans_vm

RE: [RFC 00/18] vfio: Adopt iommufd

2022-04-27 Thread Tian, Kevin
> From: Alex Williamson 
> Sent: Wednesday, April 27, 2022 12:22 AM
> > >
> > > My expectation would be that libvirt uses:
> > >
> > >  -object iommufd,id=iommufd0,fd=NNN
> > >  -device vfio-pci,fd=MMM,iommufd=iommufd0
> > >
> > > Whereas simple QEMU command line would be:
> > >
> > >  -object iommufd,id=iommufd0
> > >  -device vfio-pci,iommufd=iommufd0,host=:02:00.0
> > >
> > > The iommufd object would open /dev/iommufd itself.  Creating an
> > > implicit iommufd object is someone problematic because one of the
> > > things I forgot to highlight in my previous description is that the
> > > iommufd object is meant to be shared across not only various vfio
> > > devices (platform, ccw, ap, nvme, etc), but also across subsystems, ex.
> > > vdpa.
> >
> > Out of curiosity - in concept one iommufd is sufficient to support all
> > ioas requirements across subsystems while having multiple iommufd's
> > instead lose the benefit of centralized accounting. The latter will also
> > cause some trouble when we start virtualizing ENQCMD which requires
> > VM-wide PASID virtualization thus further needs to share that
> > information across iommufd's. Not unsolvable but really no gain by
> > adding such complexity. So I'm curious whether Qemu provide
> > a way to restrict that certain object type can only have one instance
> > to discourage such multi-iommufd attempt?
> 
> I don't see any reason for QEMU to restrict iommufd objects.  The QEMU
> philosophy seems to be to let users create whatever configuration they
> want.  For libvirt though, the assumption would be that a single
> iommufd object can be used across subsystems, so libvirt would never
> automatically create multiple objects.

I like the flexibility what the objection approach gives in your proposal.
But with the said complexity in mind (with no foreseen benefit), I wonder
whether an alternative approach which treats iommufd as a global
property instead of an object is acceptable in Qemu, i.e.:

-iommufd on/off
-device vfio-pci,iommufd,[fd=MMM/host=:02:00.0]

All devices with iommufd specified then implicitly share a single iommufd
object within Qemu.

This still allows vfio devices to be specified via fd but just requires Libvirt
to grant file permission on /dev/iommu. Is it a worthwhile tradeoff to be
considered or just not a typical way in Qemu philosophy e.g. any object
associated with a device must be explicitly specified?

Thanks
Kevin


Re: [PATCH v5 1/1] virtio: fix the condition for iommu_platform not supported

2022-04-27 Thread Jason Wang
On Wed, Apr 27, 2022 at 8:25 PM Chenyi Qiang  wrote:
>
>
>
> On 4/22/2022 3:11 PM, Chenyi Qiang wrote:
> >
> >
> > On 2/7/2022 7:28 PM, Halil Pasic wrote:
> >> The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> >> unsupported") claims to fail the device hotplug when iommu_platform
> >> is requested, but not supported by the (vhost) device. On the first
> >> glance the condition for detecting that situation looks perfect, but
> >> because a certain peculiarity of virtio_platform it ain't.
> >>
> >> In fact the aforementioned commit introduces a regression. It breaks
> >> virtio-fs support for Secure Execution, and most likely also for AMD SEV
> >> or any other confidential guest scenario that relies encrypted guest
> >> memory.  The same also applies to any other vhost device that does not
> >> support _F_ACCESS_PLATFORM.
> >>
> >> The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM collates
> >> "device can not access all of the guest RAM" and "iova != gpa, thus
> >> device needs to translate iova".
> >>
> >> Confidential guest technologies currently rely on the device/hypervisor
> >> offering _F_ACCESS_PLATFORM, so that, after the feature has been
> >> negotiated, the guest  grants access to the portions of memory the
> >> device needs to see. So in for confidential guests, generally,
> >> _F_ACCESS_PLATFORM is about the restricted access to memory, but not
> >> about the addresses used being something else than guest physical
> >> addresses.
> >>
> >> This is the very reason for which commit f7ef7e6e3b ("vhost: correctly
> >> turn on VIRTIO_F_IOMMU_PLATFORM") fences _F_ACCESS_PLATFORM from the
> >> vhost device that does not need it, because on the vhost interface it
> >> only means "I/O address translation is needed".
> >>
> >> This patch takes inspiration from f7ef7e6e3b ("vhost: correctly turn on
> >> VIRTIO_F_IOMMU_PLATFORM"), and uses the same condition for detecting the
> >> situation when _F_ACCESS_PLATFORM is requested, but no I/O translation
> >> by the device, and thus no device capability is needed. In this
> >> situation claiming that the device does not support iommu_plattform=on
> >> is counter-productive. So let us stop doing that!
> >>
> >> Signed-off-by: Halil Pasic 
> >> Reported-by: Jakob Naucke 
> >> Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> >> unsupported")
> >> Acked-by: Cornelia Huck 
> >> Reviewed-by: Daniel Henrique Barboza 
> >> Tested-by: Daniel Henrique Barboza 
> >> Cc: Kevin Wolf 
> >> Cc: qemu-sta...@nongnu.org
> >>
> >> ---
> >>
> >> v4->v5:
> >> * added back the return; so if somebody were to add code to the end of
> >>the function we are still good
> >> v3->v4:
> >> * Fixed commit message (thanks Connie)
> >> * Removed counter-productive initialization (thanks Connie)
> >> * Added tags
> >> v2->v3:
> >> * Caught a bug: I tired to check if vdev has the feature
> >> ACCESS_PLATFORM after we have forced it. Moved the check
> >> to a better place
> >> v1->v2:
> >> * Commit message tweaks. Most notably fixed commit SHA (Michael)
> >>
> >> ---
> >> ---
> >>   hw/virtio/virtio-bus.c | 12 +++-
> >>   1 file changed, 7 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> >> index d23db98c56..0f69d1c742 100644
> >> --- a/hw/virtio/virtio-bus.c
> >> +++ b/hw/virtio/virtio-bus.c
> >> @@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> >> Error **errp)
> >>   VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus);
> >>   VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
> >>   bool has_iommu = virtio_host_has_feature(vdev,
> >> VIRTIO_F_IOMMU_PLATFORM);
> >> +bool vdev_has_iommu;
> >>   Error *local_err = NULL;
> >>   DPRINTF("%s: plug device.\n", qbus->name);
> >> @@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> >> Error **errp)
> >>   return;
> >>   }
> >> -if (has_iommu && !virtio_host_has_feature(vdev,
> >> VIRTIO_F_IOMMU_PLATFORM)) {
> >> -error_setg(errp, "iommu_platform=true is not supported by the
> >> device");
> >> -return;
> >> -}
> >> -
> >>   if (klass->device_plugged != NULL) {
> >>   klass->device_plugged(qbus->parent, &local_err);
> >>   }
> >> @@ -82,9 +78,15 @@ void virtio_bus_device_plugged(VirtIODevice *vdev,
> >> Error **errp)
> >>   return;
> >>   }
> >> +vdev_has_iommu = virtio_host_has_feature(vdev,
> >> VIRTIO_F_IOMMU_PLATFORM);
> >>   if (klass->get_dma_as != NULL && has_iommu) {
> >>   virtio_add_feature(&vdev->host_features,
> >> VIRTIO_F_IOMMU_PLATFORM);
> >>   vdev->dma_as = klass->get_dma_as(qbus->parent);
> >> +if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) {
> >
> > Hi Pasic,
> >
> > When testing the virtio-fs in Intel TDX, I met the error report in this
> > check. Is it appropriate to compare the dma_as against the
> > address_space_memory to de

Re: [PATCH 1/2] target/riscv: Tentatively remove Zhinx* from ISA extension string

2022-04-27 Thread Weiwei Li



在 2022/4/28 上午7:58, Alistair Francis 写道:

On Sun, Apr 24, 2022 at 3:22 PM Tsukasa OI  wrote:

This commit disables ISA string conversion for Zhinx and Zhinxmin
extensions for now.  Because extension category ordering of "H" is not
ratified, their ordering is likely invalid.

Once "H"-extension ordering is determined, we can add Zhinx* again.

Signed-off-by: Tsukasa OI 

Weiwei Li does this sound alright to you?

Alistair


Even though the rule says: "The first letter following the 'Z' 
conventionally indicates the most closely


related alphabetical extension category, IMAFDQLCBKJTPVH", zhinx* is not 
related to 'H' extension actually.


I think the most closely related alphabetical extension is 'F' extension.

Regards,

Weiwei Li


---
  target/riscv/cpu.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 0c774056c5..c765f7ff00 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -954,8 +954,6 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
**isa_str, int max_str_len)
  ISA_EDATA_ENTRY(zfh, ext_zfh),
  ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
  ISA_EDATA_ENTRY(zfinx, ext_zfinx),
-ISA_EDATA_ENTRY(zhinx, ext_zhinx),
-ISA_EDATA_ENTRY(zhinxmin, ext_zhinxmin),
  ISA_EDATA_ENTRY(zdinx, ext_zdinx),
  ISA_EDATA_ENTRY(zba, ext_zba),
  ISA_EDATA_ENTRY(zbb, ext_zbb),
--
2.32.0






Re: [PATCH] target/riscv: add scalar crypto related extenstion strings to isa_string

2022-04-27 Thread Alistair Francis
On Tue, Apr 26, 2022 at 8:14 PM Weiwei Li  wrote:
>
>  - add zbk* and zk* strings to isa_edata_arr
>
> Signed-off-by: Weiwei Li 
> Signed-off-by: Junqiang Wang 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.c | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index dff4606585..ccacdee215 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1005,7 +1005,20 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
> **isa_str, int max_str_len)
>  ISA_EDATA_ENTRY(zba, ext_zba),
>  ISA_EDATA_ENTRY(zbb, ext_zbb),
>  ISA_EDATA_ENTRY(zbc, ext_zbc),
> +ISA_EDATA_ENTRY(zbkb, ext_zbkb),
> +ISA_EDATA_ENTRY(zbkc, ext_zbkc),
> +ISA_EDATA_ENTRY(zbkx, ext_zbkx),
>  ISA_EDATA_ENTRY(zbs, ext_zbs),
> +ISA_EDATA_ENTRY(zk, ext_zk),
> +ISA_EDATA_ENTRY(zkn, ext_zkn),
> +ISA_EDATA_ENTRY(zknd, ext_zknd),
> +ISA_EDATA_ENTRY(zkne, ext_zkne),
> +ISA_EDATA_ENTRY(zknh, ext_zknh),
> +ISA_EDATA_ENTRY(zkr, ext_zkr),
> +ISA_EDATA_ENTRY(zks, ext_zks),
> +ISA_EDATA_ENTRY(zksed, ext_zksed),
> +ISA_EDATA_ENTRY(zksh, ext_zksh),
> +ISA_EDATA_ENTRY(zkt, ext_zkt),
>  ISA_EDATA_ENTRY(zve32f, ext_zve32f),
>  ISA_EDATA_ENTRY(zve64f, ext_zve64f),
>  ISA_EDATA_ENTRY(svinval, ext_svinval),
> --
> 2.17.1
>
>



Re: [PATCH v5 00/11] s390x/tcg: Implement Vector-Enhancements Facility 2

2022-04-27 Thread David Miller
I'm playing catch up a bit here,  as I was out sick for a few days.
It would be very much appreciated if you could do so,  as I'm not
familiar with what is required.

Thanks
- David Miller

On Mon, Apr 25, 2022 at 3:51 AM David Hildenbrand  wrote:
>
> On 25.04.22 09:43, Christian Borntraeger wrote:
> > Am 23.03.22 um 14:57 schrieb David Miller:
> >> Implement Vector-Enhancements Facility 2 for s390x
> >>
> >> resolves: https://gitlab.com/qemu-project/qemu/-/issues/738
> >>
> >> implements:
> >>  VECTOR LOAD ELEMENTS REVERSED   (VLER)
> >>  VECTOR LOAD BYTE REVERSED ELEMENTS  (VLBR)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT   (VLEBRH, VLEBRF, VLEBRG)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO  (VLLEBRZ)
> >>  VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE (VLBRREP)
> >>  VECTOR STORE ELEMENTS REVERSED  (VSTER)
> >>  VECTOR STORE BYTE REVERSED ELEMENTS (VSTBR)
> >>  VECTOR STORE BYTE REVERSED ELEMENTS (VSTEBRH, VSTEBRF, 
> >> VSTEBRG)
> >>  VECTOR SHIFT LEFT DOUBLE BY BIT (VSLD)
> >>  VECTOR SHIFT RIGHT DOUBLE BY BIT(VSRD)
> >>  VECTOR STRING SEARCH(VSTRS)
> >>
> >>  modifies:
> >>  VECTOR FP CONVERT FROM FIXED(VCFPS)
> >>  VECTOR FP CONVERT FROM LOGICAL  (VCFPL)
> >>  VECTOR FP CONVERT TO FIXED  (VCSFP)
> >>  VECTOR FP CONVERT TO LOGICAL(VCLFP)
> >>  VECTOR SHIFT LEFT   (VSL)
> >>  VECTOR SHIFT RIGHT ARITHMETIC   (VSRA)
> >>  VECTOR SHIFT RIGHT LOGICAL  (VSRL)
> >>
> >>
> >> David Miller (9):
> >>tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
> >>target/s390x: vxeh2: vector convert short/32b
> >>target/s390x: vxeh2: vector string search
> >>target/s390x: vxeh2: Update for changes to vector shifts
> >>target/s390x: vxeh2: vector shift double by bit
> >>target/s390x: vxeh2: vector {load, store} elements reversed
> >>target/s390x: vxeh2: vector {load, store} byte reversed elements
> >>target/s390x: vxeh2: vector {load, store} byte reversed element
> >>target/s390x: add S390_FEAT_VECTOR_ENH2 to qemu CPU model
> >>tests/tcg/s390x: Tests for Vector Enhancements Facility 2
> >>target/s390x: Fix writeback to v1 in helper_vstl
> >>
> >> Richard Henderson (2):
> >>tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
> >>target/s390x: Fix writeback to v1 in helper_vstl
> >
> >
> > I guess we can now re-do this series against 7.1-devel (qemu/master) which 
> > does
> > have the machine compat changes. Apart from that this should be ready now?
> >
>
> Yes, I think so. I can respin with the proper compat changes if requested.
>
> --
> Thanks,
>
> David / dhildenb
>



[PATCH] error-report: fix crash when compute iso8061 time

2022-04-27 Thread Lei He
g_get_real_time() returns the number of MICROSECONDS since
January 1, 1970 UTC, but g_date_time_new_from_unix_utc() expects
a timestamp in SECONDS.

Directly call g_data_time_new_from_unix_utc(g_get_real_time()) causes
overflow and a NULL pointer is returned, then qemu crashes.

Use g_date_time_new_now_utc() instead, and add a check for NULL result.

Signed-off-by: Lei He 
---
 util/error-report.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/util/error-report.c b/util/error-report.c
index dbadaf206d..d3c150661d 100644
--- a/util/error-report.c
+++ b/util/error-report.c
@@ -173,10 +173,13 @@ static char *
 real_time_iso8601(void)
 {
 #if GLIB_CHECK_VERSION(2,62,0)
-g_autoptr(GDateTime) dt = g_date_time_new_from_unix_utc(g_get_real_time());
+g_autoptr(GDateTime) dt = g_date_time_new_now_utc();
 /* ignore deprecation warning, since GLIB_VERSION_MAX_ALLOWED is 2.56 */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+if (!dt) {
+return NULL;
+}
 return g_date_time_format_iso8601(dt);
 #pragma GCC diagnostic pop
 #else
@@ -199,8 +202,10 @@ static void vreport(report_type type, const char *fmt, 
va_list ap)
 
 if (message_with_timestamp && !monitor_cur()) {
 timestr = real_time_iso8601();
-error_printf("%s ", timestr);
-g_free(timestr);
+if (timestr) {
+error_printf("%s ", timestr);
+g_free(timestr);
+}
 }
 
 /* Only prepend guest name if -msg guest-name and -name guest=... are set 
*/
-- 
2.11.0




Re: [PATCH v3] target/riscv: Fix incorrect PTE merge in walk_pte

2022-04-27 Thread Alistair Francis
On Sun, Apr 24, 2022 at 7:59 AM Ralf Ramsauer
 wrote:
>
> Two non-subsequent PTEs can be mapped to subsequent paddrs. In this
> case, walk_pte will erroneously merge them.
>
> Enforce the split up, by tracking the virtual base address.
>
> Let's say we have the mapping:
> 0x8120 -> 0x89623000 (4K)
> 0x8120f000 -> 0x89624000 (4K)
>
> Before, walk_pte would have shown:
>
> vaddrpaddrsize attr
>    ---
> 8120 89623000 2000 rwxu-ad
>
> as it only checks for subsequent paddrs. With this patch, it becomes:
>
> vaddrpaddrsize attr
>    ---
> 8120 89623000 1000 rwxu-ad
> 8120f000 89624000 1000 rwxu-ad
>
> Signed-off-by: Ralf Ramsauer 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
> [since v2: Adjust comment, rebased to latest master]
>
>  target/riscv/monitor.c | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/target/riscv/monitor.c b/target/riscv/monitor.c
> index 7efb4b62c1..17e63fab00 100644
> --- a/target/riscv/monitor.c
> +++ b/target/riscv/monitor.c
> @@ -84,6 +84,7 @@ static void walk_pte(Monitor *mon, hwaddr base, 
> target_ulong start,
>  {
>  hwaddr pte_addr;
>  hwaddr paddr;
> +target_ulong last_start = -1;
>  target_ulong pgsize;
>  target_ulong pte;
>  int ptshift;
> @@ -111,12 +112,13 @@ static void walk_pte(Monitor *mon, hwaddr base, 
> target_ulong start,
>   * A leaf PTE has been found
>   *
>   * If current PTE's permission bits differ from the last one,
> - * or current PTE's ppn does not make a contiguous physical
> - * address block together with the last one, print out the 
> last
> - * contiguous mapped block details.
> + * or the current PTE breaks up a contiguous virtual or
> + * physical mapping, address block together with the last 
> one,
> + * print out the last contiguous mapped block details.
>   */
>  if ((*last_attr != attr) ||
> -(*last_paddr + *last_size != paddr)) {
> +(*last_paddr + *last_size != paddr) ||
> +(last_start + *last_size != start)) {
>  print_pte(mon, va_bits, *vbase, *pbase,
>*last_paddr + *last_size - *pbase, *last_attr);
>
> @@ -125,6 +127,7 @@ static void walk_pte(Monitor *mon, hwaddr base, 
> target_ulong start,
>  *last_attr = attr;
>  }
>
> +last_start = start;
>  *last_paddr = paddr;
>  *last_size = pgsize;
>  } else {
> --
> 2.36.0
>



Re: [Qemu-devel] [PATCH 2/7] target/openrisc: add shutdown logic

2022-04-27 Thread Jason A. Donenfeld
Hi Stafford,

On Thu, Apr 28, 2022 at 06:48:27AM +0900, Stafford Horne wrote:
> On Wed, Apr 27, 2022 at 07:47:33PM +0100, Peter Maydell wrote:
> > On Wed, 27 Apr 2022 at 18:46, Jason A. Donenfeld  wrote:
> > >
> > > Hey Stafford,
> > >
> > > On Mon, Apr 17, 2017 at 08:23:51AM +0900, Stafford Horne wrote:
> > > > In openrisc simulators we use hooks like 'l.nop 1' to cause the
> > > > simulator to exit.  Implement that for qemu too.
> > > >
> > > > Reported-by: Waldemar Brodkorb 
> > > > Signed-off-by: Stafford Horne 
> > >
> > > I'm curious as to why this never got merged. I noticed I'm entirely able
> > > to shutdown or to reboot (which is mostly what I care about) Linux from
> > > OpenRISC. It just hangs.
> > 
> > This kind of thing needs to be either:
> >  (1) we're modelling real hardware and that real hardware has a
> > device or other mechanism guest code can prod to cause a power-off
> > or reboot. Then we model that device, and guest code triggers a
> > shutdown or reboot exactly as it would on the real hardware.
> >  (2) there is an architecturally defined ABI for simulators, debug
> > stubs, etc, that includes various operations typically including
> > an "exit the simulator" function. (Arm semihosting is an example
> > of this.) In that case we can implement that functionality,
> > guarded by and controlled by the appropriate command line options.
> > (This is generally not as nice as option 1, because the guest code
> > has to be compiled to have support for semihosting and also because
> > turning it on is usually also giving implicit permission for the
> > guest code to read and write arbitrary host files, etc.)
> > 
> > Either way, undocumented random hacks aren't a good idea, which
> > is why this wasn't merged.
> 
> Yes, this is what was brought up before.  At that time semihosting was 
> mentioned
> and I tried to understand what it was but didn't really understand it as a 
> general
> concept.  Is this something arm specific?
> 
> Since the qemu or1k-sim defines our "simulator", I suspect I could add a
> definition of our simulator ABI to the OpenRISC architecture specification.  
> The
> simulation uses of l.nop N as ABI hooks is a de-facto standard for OpenRISC.
> From the way you describe this now I take it if we document this as a
> architecture simulation ABI the patch would be accepted.

If that's what it takes, then that'd make sense.

By the way, would this also help the reboot case? That's
`reboot(RB_AUTOBOOT);`, which does:

machine_restart() ->
  do_kernel_restart() ->
atomic_notifier_chain_register(&restart_handler_list, nb) ->
  ???

As far as I can tell, nothing is wired into the reboot case for
OpenRISC. Is this something that could be fixed in the kernel without
having to patch QEMU? If so, then I could effectively get shutdown for
my CI with the -no-reboot option, which is what I'm already doing for a
few platforms.

Jason



Re: [PATCH 1/2] target/riscv: Tentatively remove Zhinx* from ISA extension string

2022-04-27 Thread Alistair Francis
On Sun, Apr 24, 2022 at 3:22 PM Tsukasa OI  wrote:
>
> This commit disables ISA string conversion for Zhinx and Zhinxmin
> extensions for now.  Because extension category ordering of "H" is not
> ratified, their ordering is likely invalid.
>
> Once "H"-extension ordering is determined, we can add Zhinx* again.
>
> Signed-off-by: Tsukasa OI 

Weiwei Li does this sound alright to you?

Alistair

> ---
>  target/riscv/cpu.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 0c774056c5..c765f7ff00 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -954,8 +954,6 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
> **isa_str, int max_str_len)
>  ISA_EDATA_ENTRY(zfh, ext_zfh),
>  ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
>  ISA_EDATA_ENTRY(zfinx, ext_zfinx),
> -ISA_EDATA_ENTRY(zhinx, ext_zhinx),
> -ISA_EDATA_ENTRY(zhinxmin, ext_zhinxmin),
>  ISA_EDATA_ENTRY(zdinx, ext_zdinx),
>  ISA_EDATA_ENTRY(zba, ext_zba),
>  ISA_EDATA_ENTRY(zbb, ext_zbb),
> --
> 2.32.0
>



Re: [PATCH v3] target/riscv: Fix incorrect PTE merge in walk_pte

2022-04-27 Thread Alistair Francis
On Sun, Apr 24, 2022 at 7:59 AM Ralf Ramsauer
 wrote:
>
> Two non-subsequent PTEs can be mapped to subsequent paddrs. In this
> case, walk_pte will erroneously merge them.
>
> Enforce the split up, by tracking the virtual base address.
>
> Let's say we have the mapping:
> 0x8120 -> 0x89623000 (4K)
> 0x8120f000 -> 0x89624000 (4K)
>
> Before, walk_pte would have shown:
>
> vaddrpaddrsize attr
>    ---
> 8120 89623000 2000 rwxu-ad
>
> as it only checks for subsequent paddrs. With this patch, it becomes:
>
> vaddrpaddrsize attr
>    ---
> 8120 89623000 1000 rwxu-ad
> 8120f000 89624000 1000 rwxu-ad
>
> Signed-off-by: Ralf Ramsauer 

Thanks for the patch. It doesn't seem to have made it to the QEMU
mailing list though. Do you mind re-sending it and checking to make
sure it is sent to the mailing list?

Alistair

> ---
> [since v2: Adjust comment, rebased to latest master]
>
>  target/riscv/monitor.c | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
>
> diff --git a/target/riscv/monitor.c b/target/riscv/monitor.c
> index 7efb4b62c1..17e63fab00 100644
> --- a/target/riscv/monitor.c
> +++ b/target/riscv/monitor.c
> @@ -84,6 +84,7 @@ static void walk_pte(Monitor *mon, hwaddr base, 
> target_ulong start,
>  {
>  hwaddr pte_addr;
>  hwaddr paddr;
> +target_ulong last_start = -1;
>  target_ulong pgsize;
>  target_ulong pte;
>  int ptshift;
> @@ -111,12 +112,13 @@ static void walk_pte(Monitor *mon, hwaddr base, 
> target_ulong start,
>   * A leaf PTE has been found
>   *
>   * If current PTE's permission bits differ from the last one,
> - * or current PTE's ppn does not make a contiguous physical
> - * address block together with the last one, print out the 
> last
> - * contiguous mapped block details.
> + * or the current PTE breaks up a contiguous virtual or
> + * physical mapping, address block together with the last 
> one,
> + * print out the last contiguous mapped block details.
>   */
>  if ((*last_attr != attr) ||
> -(*last_paddr + *last_size != paddr)) {
> +(*last_paddr + *last_size != paddr) ||
> +(last_start + *last_size != start)) {
>  print_pte(mon, va_bits, *vbase, *pbase,
>*last_paddr + *last_size - *pbase, *last_attr);
>
> @@ -125,6 +127,7 @@ static void walk_pte(Monitor *mon, hwaddr base, 
> target_ulong start,
>  *last_attr = attr;
>  }
>
> +last_start = start;
>  *last_paddr = paddr;
>  *last_size = pgsize;
>  } else {
> --
> 2.36.0
>



[PATCH v5 4/6] hw/riscv: virt: Add support for generating platform FDT entries

2022-04-27 Thread Alistair Francis
From: Alistair Francis 

Similar to the ARM virt machine add support for adding device tree
entries for dynamically created devices.

Signed-off-by: Alistair Francis 
Reviewed-by: Edgar E. Iglesias 
---
 hw/riscv/virt.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 312138e7bf..a2ffd04a78 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -28,6 +28,7 @@
 #include "hw/qdev-properties.h"
 #include "hw/char/serial.h"
 #include "target/riscv/cpu.h"
+#include "hw/core/sysbus-fdt.h"
 #include "hw/riscv/riscv_hart.h"
 #include "hw/riscv/virt.h"
 #include "hw/riscv/boot.h"
@@ -475,6 +476,12 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
 riscv_socket_fdt_write_id(mc, mc->fdt, plic_name, socket);
 qemu_fdt_setprop_cell(mc->fdt, plic_name, "phandle",
 plic_phandles[socket]);
+
+platform_bus_add_all_fdt_nodes(mc->fdt, plic_name,
+   memmap[VIRT_PLATFORM_BUS].base,
+   memmap[VIRT_PLATFORM_BUS].size,
+   VIRT_PLATFORM_BUS_IRQ);
+
 g_free(plic_name);
 
 g_free(plic_cells);
@@ -552,6 +559,12 @@ static void create_fdt_imsic(RISCVVirtState *s, const 
MemMapEntry *memmap,
 IMSIC_MMIO_GROUP_MIN_SHIFT);
 }
 qemu_fdt_setprop_cell(mc->fdt, imsic_name, "phandle", *msi_m_phandle);
+
+platform_bus_add_all_fdt_nodes(mc->fdt, imsic_name,
+   memmap[VIRT_PLATFORM_BUS].base,
+   memmap[VIRT_PLATFORM_BUS].size,
+   VIRT_PLATFORM_BUS_IRQ);
+
 g_free(imsic_name);
 
 /* S-level IMSIC node */
@@ -689,6 +702,12 @@ static void create_fdt_socket_aplic(RISCVVirtState *s,
 VIRT_IRQCHIP_NUM_SOURCES);
 riscv_socket_fdt_write_id(mc, mc->fdt, aplic_name, socket);
 qemu_fdt_setprop_cell(mc->fdt, aplic_name, "phandle", aplic_s_phandle);
+
+platform_bus_add_all_fdt_nodes(mc->fdt, aplic_name,
+   memmap[VIRT_PLATFORM_BUS].base,
+   memmap[VIRT_PLATFORM_BUS].size,
+   VIRT_PLATFORM_BUS_IRQ);
+
 g_free(aplic_name);
 
 g_free(aplic_cells);
-- 
2.35.1




[PATCH v5 5/6] hw/riscv: virt: Add device plug support

2022-04-27 Thread Alistair Francis
From: Alistair Francis 

Add support for plugging in devices, this was tested with the TPM
device.

Signed-off-by: Alistair Francis 
Reviewed-by: Edgar E. Iglesias 
Reviewed-by: Bin Meng 
---
 hw/riscv/virt.c | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index a2ffd04a78..da098917dd 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1569,10 +1569,37 @@ static void virt_set_aclint(Object *obj, bool value, 
Error **errp)
 s->have_aclint = value;
 }
 
+static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
+DeviceState *dev)
+{
+MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+if (device_is_dynamic_sysbus(mc, dev)) {
+return HOTPLUG_HANDLER(machine);
+}
+return NULL;
+}
+
+static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
+DeviceState *dev, Error **errp)
+{
+RISCVVirtState *s = RISCV_VIRT_MACHINE(hotplug_dev);
+
+if (s->platform_bus_dev) {
+MachineClass *mc = MACHINE_GET_CLASS(s);
+
+if (device_is_dynamic_sysbus(mc, dev)) {
+platform_bus_link_device(PLATFORM_BUS_DEVICE(s->platform_bus_dev),
+ SYS_BUS_DEVICE(dev));
+}
+}
+}
+
 static void virt_machine_class_init(ObjectClass *oc, void *data)
 {
 char str[128];
 MachineClass *mc = MACHINE_CLASS(oc);
+HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
 
 mc->desc = "RISC-V VirtIO board";
 mc->init = virt_machine_init;
@@ -1584,6 +1611,10 @@ static void virt_machine_class_init(ObjectClass *oc, 
void *data)
 mc->get_default_cpu_node_id = riscv_numa_get_default_cpu_node_id;
 mc->numa_mem_supported = true;
 mc->default_ram_id = "riscv_virt_board.ram";
+assert(!mc->get_hotplug_handler);
+mc->get_hotplug_handler = virt_machine_get_hotplug_handler;
+
+hc->plug = virt_machine_device_plug_cb;
 
 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
 
@@ -1614,6 +1645,10 @@ static const TypeInfo virt_machine_typeinfo = {
 .class_init = virt_machine_class_init,
 .instance_init = virt_machine_instance_init,
 .instance_size = sizeof(RISCVVirtState),
+.interfaces = (InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
+ { }
+},
 };
 
 static void virt_machine_init_register_types(void)
-- 
2.35.1




[PATCH v5 2/6] hw/core: Move the ARM sysbus-fdt to core

2022-04-27 Thread Alistair Francis
From: Alistair Francis 

The ARM virt machine currently uses sysbus-fdt to create device tree
entries for dynamically created MMIO devices.

The RISC-V virt machine can also benefit from this, so move the code to
the core directory.

Signed-off-by: Alistair Francis 
Reviewed-by: Edgar E. Iglesias 
Reviewed-by: Bin Meng 
---
 include/hw/{arm => core}/sysbus-fdt.h | 0
 hw/arm/virt.c | 2 +-
 hw/arm/xlnx-versal-virt.c | 1 -
 hw/{arm => core}/sysbus-fdt.c | 2 +-
 hw/arm/meson.build| 1 -
 hw/core/meson.build   | 1 +
 6 files changed, 3 insertions(+), 4 deletions(-)
 rename include/hw/{arm => core}/sysbus-fdt.h (100%)
 rename hw/{arm => core}/sysbus-fdt.c (99%)

diff --git a/include/hw/arm/sysbus-fdt.h b/include/hw/core/sysbus-fdt.h
similarity index 100%
rename from include/hw/arm/sysbus-fdt.h
rename to include/hw/core/sysbus-fdt.h
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 5bdd98e4a1..f94278935f 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -56,7 +56,7 @@
 #include "qemu/module.h"
 #include "hw/pci-host/gpex.h"
 #include "hw/virtio/virtio-pci.h"
-#include "hw/arm/sysbus-fdt.h"
+#include "hw/core/sysbus-fdt.h"
 #include "hw/platform-bus.h"
 #include "hw/qdev-properties.h"
 #include "hw/arm/fdt.h"
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index 66a2de7e13..37fc9b919c 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -15,7 +15,6 @@
 #include "sysemu/device_tree.h"
 #include "hw/boards.h"
 #include "hw/sysbus.h"
-#include "hw/arm/sysbus-fdt.h"
 #include "hw/arm/fdt.h"
 #include "cpu.h"
 #include "hw/qdev-properties.h"
diff --git a/hw/arm/sysbus-fdt.c b/hw/core/sysbus-fdt.c
similarity index 99%
rename from hw/arm/sysbus-fdt.c
rename to hw/core/sysbus-fdt.c
index 48c5fe9bf1..19d22cbe73 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -27,7 +27,7 @@
 #ifdef CONFIG_LINUX
 #include 
 #endif
-#include "hw/arm/sysbus-fdt.h"
+#include "hw/core/sysbus-fdt.h"
 #include "qemu/error-report.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/tpm.h"
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 721a8eb8be..122e5dd992 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -1,6 +1,5 @@
 arm_ss = ss.source_set()
 arm_ss.add(files('boot.c'), fdt)
-arm_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c'))
 arm_ss.add(when: 'CONFIG_ARM_VIRT', if_true: files('virt.c'))
 arm_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
 arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic_boards.c'))
diff --git a/hw/core/meson.build b/hw/core/meson.build
index 0f884d6fd4..7a4d02b6c0 100644
--- a/hw/core/meson.build
+++ b/hw/core/meson.build
@@ -33,6 +33,7 @@ softmmu_ss.add(when: 'CONFIG_PTIMER', if_true: 
files('ptimer.c'))
 softmmu_ss.add(when: 'CONFIG_REGISTER', if_true: files('register.c'))
 softmmu_ss.add(when: 'CONFIG_SPLIT_IRQ', if_true: files('split-irq.c'))
 softmmu_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c'))
+softmmu_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c'))
 
 softmmu_ss.add(files(
   'cpu-sysemu.c',
-- 
2.35.1




[PATCH v5 3/6] hw/riscv: virt: Create a platform bus

2022-04-27 Thread Alistair Francis
From: Alistair Francis 

Create a platform bus to allow dynamic devices to be connected. This is
based on the ARM implementation.

Signed-off-by: Alistair Francis 
Reviewed-by: Edgar E. Iglesias 
Reviewed-by: Bin Meng 
---
 include/hw/riscv/virt.h |  7 -
 hw/riscv/virt.c | 68 +
 hw/riscv/Kconfig|  1 +
 3 files changed, 56 insertions(+), 20 deletions(-)

diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 8b8db3fb7c..984e55c77f 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -46,6 +46,7 @@ struct RISCVVirtState {
 
 /*< public >*/
 Notifier machine_done;
+DeviceState *platform_bus_dev;
 RISCVHartArrayState soc[VIRT_SOCKETS_MAX];
 DeviceState *irqchip[VIRT_SOCKETS_MAX];
 PFlashCFI01 *flash[2];
@@ -76,6 +77,7 @@ enum {
 VIRT_DRAM,
 VIRT_PCIE_MMIO,
 VIRT_PCIE_PIO,
+VIRT_PLATFORM_BUS,
 VIRT_PCIE_ECAM
 };
 
@@ -85,9 +87,12 @@ enum {
 VIRTIO_IRQ = 1, /* 1 to 8 */
 VIRTIO_COUNT = 8,
 PCIE_IRQ = 0x20, /* 32 to 35 */
-VIRTIO_NDEV = 0x35 /* Arbitrary maximum number of interrupts */
+VIRT_PLATFORM_BUS_IRQ = 64, /* 64 to 96 */
+VIRTIO_NDEV = 96 /* Arbitrary maximum number of interrupts */
 };
 
+#define VIRT_PLATFORM_BUS_NUM_IRQS 32
+
 #define VIRT_IRQCHIP_IPI_MSI 1
 #define VIRT_IRQCHIP_NUM_MSIS 255
 #define VIRT_IRQCHIP_NUM_SOURCES VIRTIO_NDEV
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 99ab3d4bca..312138e7bf 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -37,6 +37,7 @@
 #include "hw/intc/riscv_imsic.h"
 #include "hw/intc/sifive_plic.h"
 #include "hw/misc/sifive_test.h"
+#include "hw/platform-bus.h"
 #include "chardev/char.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/sysemu.h"
@@ -68,25 +69,26 @@
 #endif
 
 static const MemMapEntry virt_memmap[] = {
-[VIRT_DEBUG] =   {0x0, 0x100 },
-[VIRT_MROM] ={ 0x1000,0xf000 },
-[VIRT_TEST] ={   0x10,0x1000 },
-[VIRT_RTC] = {   0x101000,0x1000 },
-[VIRT_CLINT] =   {  0x200,   0x1 },
-[VIRT_ACLINT_SSWI] = {  0x2F0,0x4000 },
-[VIRT_PCIE_PIO] ={  0x300,   0x1 },
-[VIRT_PLIC] ={  0xc00, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
-[VIRT_APLIC_M] = {  0xc00, APLIC_SIZE(VIRT_CPUS_MAX) },
-[VIRT_APLIC_S] = {  0xd00, APLIC_SIZE(VIRT_CPUS_MAX) },
-[VIRT_UART0] =   { 0x1000, 0x100 },
-[VIRT_VIRTIO] =  { 0x10001000,0x1000 },
-[VIRT_FW_CFG] =  { 0x1010,  0x18 },
-[VIRT_FLASH] =   { 0x2000, 0x400 },
-[VIRT_IMSIC_M] = { 0x2400, VIRT_IMSIC_MAX_SIZE },
-[VIRT_IMSIC_S] = { 0x2800, VIRT_IMSIC_MAX_SIZE },
-[VIRT_PCIE_ECAM] =   { 0x3000,0x1000 },
-[VIRT_PCIE_MMIO] =   { 0x4000,0x4000 },
-[VIRT_DRAM] ={ 0x8000,   0x0 },
+[VIRT_DEBUG] ={0x0, 0x100 },
+[VIRT_MROM] = { 0x1000,0xf000 },
+[VIRT_TEST] = {   0x10,0x1000 },
+[VIRT_RTC] =  {   0x101000,0x1000 },
+[VIRT_CLINT] ={  0x200,   0x1 },
+[VIRT_ACLINT_SSWI] =  {  0x2F0,0x4000 },
+[VIRT_PCIE_PIO] = {  0x300,   0x1 },
+[VIRT_PLATFORM_BUS] = {  0x400, 0x200 },
+[VIRT_PLIC] = {  0xc00, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
+[VIRT_APLIC_M] =  {  0xc00, APLIC_SIZE(VIRT_CPUS_MAX) },
+[VIRT_APLIC_S] =  {  0xd00, APLIC_SIZE(VIRT_CPUS_MAX) },
+[VIRT_UART0] ={ 0x1000, 0x100 },
+[VIRT_VIRTIO] =   { 0x10001000,0x1000 },
+[VIRT_FW_CFG] =   { 0x1010,  0x18 },
+[VIRT_FLASH] ={ 0x2000, 0x400 },
+[VIRT_IMSIC_M] =  { 0x2400, VIRT_IMSIC_MAX_SIZE },
+[VIRT_IMSIC_S] =  { 0x2800, VIRT_IMSIC_MAX_SIZE },
+[VIRT_PCIE_ECAM] ={ 0x3000,0x1000 },
+[VIRT_PCIE_MMIO] ={ 0x4000,0x4000 },
+[VIRT_DRAM] = { 0x8000,   0x0 },
 };
 
 /* PCIe high mmio is fixed for RV32 */
@@ -1162,6 +1164,32 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType 
aia_type, int aia_guests,
 return aplic_m;
 }
 
+static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip)
+{
+DeviceState *dev;
+SysBusDevice *sysbus;
+const MemMapEntry *memmap = virt_memmap;
+int i;
+MemoryRegion *sysmem = get_system_memory();
+
+dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE);
+dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE);
+qdev_prop_set_uint32(dev, "num_irqs", VIRT_PLATFORM_BUS_NUM_IRQS);
+qdev_prop_set_uint32(dev, "mmio_size", memmap[VIRT_PLATFORM_BUS].size);
+sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+s->platform_bus_dev = dev;
+
+sysbus = SYS_BUS_DEVIC

[PATCH v5 6/6] hw/riscv: Enable TPM backends

2022-04-27 Thread Alistair Francis
From: Alistair Francis 

Imply the TPM sysbus devices. This allows users to add TPM devices to
the RISC-V virt board.

This was tested by first creating an emulated TPM device:

swtpm socket --tpm2 -t -d --tpmstate dir=/tmp/tpm \
--ctrl type=unixio,path=swtpm-sock

Then launching QEMU with:

-chardev socket,id=chrtpm,path=swtpm-sock \
-tpmdev emulator,id=tpm0,chardev=chrtpm \
-device tpm-tis-device,tpmdev=tpm0

The TPM device can be seen in the memory tree and the generated device
tree.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/942
Signed-off-by: Alistair Francis 
Reviewed-by: Edgar E. Iglesias 
Reviewed-by: Bin Meng 
---
 docs/system/riscv/virt.rst | 20 
 hw/riscv/virt.c|  4 
 hw/riscv/Kconfig   |  1 +
 3 files changed, 25 insertions(+)

diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst
index 1272b6659e..f8ecec95f3 100644
--- a/docs/system/riscv/virt.rst
+++ b/docs/system/riscv/virt.rst
@@ -162,3 +162,23 @@ The minimal QEMU commands to run U-Boot SPL are:
 To test 32-bit U-Boot images, switch to use qemu-riscv32_smode_defconfig and
 riscv32_spl_defconfig builds, and replace ``qemu-system-riscv64`` with
 ``qemu-system-riscv32`` in the command lines above to boot the 32-bit U-Boot.
+
+Enabling TPM
+
+
+A TPM device can be connected to the virt board by following the steps below.
+
+First launch the TPM emulator
+
+swtpm socket --tpm2 -t -d --tpmstate dir=/tmp/tpm \
+--ctrl type=unixio,path=swtpm-sock
+
+Then launch QEMU with:
+
+...
+-chardev socket,id=chrtpm,path=swtpm-sock \
+-tpmdev emulator,id=tpm0,chardev=chrtpm \
+-device tpm-tis-device,tpmdev=tpm0
+
+The TPM device can be seen in the memory tree and the generated device
+tree and should be accessible from the guest software.
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index da098917dd..fb99ff7708 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -43,6 +43,7 @@
 #include "sysemu/device_tree.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
+#include "sysemu/tpm.h"
 #include "hw/pci/pci.h"
 #include "hw/pci-host/gpex.h"
 #include "hw/display/ramfb.h"
@@ -1617,6 +1618,9 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 hc->plug = virt_machine_device_plug_cb;
 
 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
+#ifdef CONFIG_TPM
+machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
+#endif
 
 object_class_property_add_bool(oc, "aclint", virt_get_aclint,
virt_set_aclint);
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index da790f5936..79ff61c464 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -34,6 +34,7 @@ config RISCV_VIRT
 imply PCI_DEVICES
 imply VIRTIO_VGA
 imply TEST_DEVICES
+imply TPM_TIS_SYSBUS
 select RISCV_NUMA
 select GOLDFISH_RTC
 select MSI_NONBROKEN
-- 
2.35.1




[PATCH v5 0/6] hw/riscv: Add TPM support to the virt board

2022-04-27 Thread Alistair Francis
From: Alistair Francis 

This series adds support for connecting TPM devices to the RISC-V virt
board. This is similar to how it works for the ARM virt board.

This was tested by first creating an emulated TPM device:

swtpm socket --tpm2 -t -d --tpmstate dir=/tmp/tpm \
--ctrl type=unixio,path=swtpm-sock

Then launching QEMU with:

-chardev socket,id=chrtpm,path=swtpm-sock \
-tpmdev emulator,id=tpm0,chardev=chrtpm \
-device tpm-tis-device,tpmdev=tpm0

The TPM device can be seen in the memory tree and the generated device
tree.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/942

Alistair Francis (6):
  hw/riscv: virt: Add a machine done notifier
  hw/core: Move the ARM sysbus-fdt to core
  hw/riscv: virt: Create a platform bus
  hw/riscv: virt: Add support for generating platform FDT entries
  hw/riscv: virt: Add device plug support
  hw/riscv: Enable TPM backends

 docs/system/riscv/virt.rst|  20 ++
 include/hw/{arm => core}/sysbus-fdt.h |   0
 include/hw/riscv/virt.h   |   8 +-
 hw/arm/virt.c |   2 +-
 hw/arm/xlnx-versal-virt.c |   1 -
 hw/{arm => core}/sysbus-fdt.c |   2 +-
 hw/riscv/virt.c   | 317 +-
 hw/arm/meson.build|   1 -
 hw/core/meson.build   |   1 +
 hw/riscv/Kconfig  |   2 +
 10 files changed, 240 insertions(+), 114 deletions(-)
 rename include/hw/{arm => core}/sysbus-fdt.h (100%)
 rename hw/{arm => core}/sysbus-fdt.c (99%)

-- 
2.35.1




[PATCH v5 1/6] hw/riscv: virt: Add a machine done notifier

2022-04-27 Thread Alistair Francis
From: Alistair Francis 

Move the binary and device tree loading code to the machine done
notifier. This allows us to prepare for editing the device tree as part
of the notifier.

This is based on similar code in the ARM virt machine.

Signed-off-by: Alistair Francis 
Reviewed-by: Edgar E. Iglesias 
Reviewed-by: Bin Meng 
---
 include/hw/riscv/virt.h |   1 +
 hw/riscv/virt.c | 191 +---
 2 files changed, 102 insertions(+), 90 deletions(-)

diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 78b058ec86..8b8db3fb7c 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -45,6 +45,7 @@ struct RISCVVirtState {
 MachineState parent;
 
 /*< public >*/
+Notifier machine_done;
 RISCVHartArrayState soc[VIRT_SOCKETS_MAX];
 DeviceState *irqchip[VIRT_SOCKETS_MAX];
 PFlashCFI01 *flash[2];
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index b49c5361bd..99ab3d4bca 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1162,6 +1162,100 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType 
aia_type, int aia_guests,
 return aplic_m;
 }
 
+static void virt_machine_done(Notifier *notifier, void *data)
+{
+RISCVVirtState *s = container_of(notifier, RISCVVirtState,
+ machine_done);
+const MemMapEntry *memmap = virt_memmap;
+MachineState *machine = MACHINE(s);
+target_ulong start_addr = memmap[VIRT_DRAM].base;
+target_ulong firmware_end_addr, kernel_start_addr;
+uint32_t fdt_load_addr;
+uint64_t kernel_entry;
+
+/*
+ * Only direct boot kernel is currently supported for KVM VM,
+ * so the "-bios" parameter is not supported when KVM is enabled.
+ */
+if (kvm_enabled()) {
+if (machine->firmware) {
+if (strcmp(machine->firmware, "none")) {
+error_report("Machine mode firmware is not supported in "
+ "combination with KVM.");
+exit(1);
+}
+} else {
+machine->firmware = g_strdup("none");
+}
+}
+
+if (riscv_is_32bit(&s->soc[0])) {
+firmware_end_addr = riscv_find_and_load_firmware(machine,
+RISCV32_BIOS_BIN, start_addr, NULL);
+} else {
+firmware_end_addr = riscv_find_and_load_firmware(machine,
+RISCV64_BIOS_BIN, start_addr, NULL);
+}
+
+if (machine->kernel_filename) {
+kernel_start_addr = riscv_calc_kernel_start_addr(&s->soc[0],
+ firmware_end_addr);
+
+kernel_entry = riscv_load_kernel(machine->kernel_filename,
+ kernel_start_addr, NULL);
+
+if (machine->initrd_filename) {
+hwaddr start;
+hwaddr end = riscv_load_initrd(machine->initrd_filename,
+   machine->ram_size, kernel_entry,
+   &start);
+qemu_fdt_setprop_cell(machine->fdt, "/chosen",
+  "linux,initrd-start", start);
+qemu_fdt_setprop_cell(machine->fdt, "/chosen", "linux,initrd-end",
+  end);
+}
+} else {
+   /*
+* If dynamic firmware is used, it doesn't know where is the next mode
+* if kernel argument is not set.
+*/
+kernel_entry = 0;
+}
+
+if (drive_get(IF_PFLASH, 0, 0)) {
+/*
+ * Pflash was supplied, let's overwrite the address we jump to after
+ * reset to the base of the flash.
+ */
+start_addr = virt_memmap[VIRT_FLASH].base;
+}
+
+/*
+ * Init fw_cfg.  Must be done before riscv_load_fdt, otherwise the device
+ * tree cannot be altered and we get FDT_ERR_NOSPACE.
+ */
+s->fw_cfg = create_fw_cfg(machine);
+rom_set_fw(s->fw_cfg);
+
+/* Compute the fdt load address in dram */
+fdt_load_addr = riscv_load_fdt(memmap[VIRT_DRAM].base,
+   machine->ram_size, machine->fdt);
+/* load the reset vector */
+riscv_setup_rom_reset_vec(machine, &s->soc[0], start_addr,
+  virt_memmap[VIRT_MROM].base,
+  virt_memmap[VIRT_MROM].size, kernel_entry,
+  fdt_load_addr, machine->fdt);
+
+/*
+ * Only direct boot kernel is currently supported for KVM VM,
+ * So here setup kernel start address and fdt address.
+ * TODO:Support firmware loading and integrate to TCG start
+ */
+if (kvm_enabled()) {
+riscv_setup_direct_kernel(kernel_entry, fdt_load_addr);
+}
+}
+
 static void virt_machine_init(MachineState *machine)
 {
 const MemMapEntry *memmap = virt_memmap;
@@ -1169,10 +1263,6 @@ static void virt_machine_init(MachineState *machine)
 MemoryRegion *system_memory = get_system_memory();
 Me

Re: [PATCH] target/riscv: add scalar crypto related extenstion strings to isa_string

2022-04-27 Thread Alistair Francis
On Tue, Apr 26, 2022 at 8:14 PM Weiwei Li  wrote:
>
>  - add zbk* and zk* strings to isa_edata_arr
>
> Signed-off-by: Weiwei Li 
> Signed-off-by: Junqiang Wang 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.c | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index dff4606585..ccacdee215 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1005,7 +1005,20 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
> **isa_str, int max_str_len)
>  ISA_EDATA_ENTRY(zba, ext_zba),
>  ISA_EDATA_ENTRY(zbb, ext_zbb),
>  ISA_EDATA_ENTRY(zbc, ext_zbc),
> +ISA_EDATA_ENTRY(zbkb, ext_zbkb),
> +ISA_EDATA_ENTRY(zbkc, ext_zbkc),
> +ISA_EDATA_ENTRY(zbkx, ext_zbkx),
>  ISA_EDATA_ENTRY(zbs, ext_zbs),
> +ISA_EDATA_ENTRY(zk, ext_zk),
> +ISA_EDATA_ENTRY(zkn, ext_zkn),
> +ISA_EDATA_ENTRY(zknd, ext_zknd),
> +ISA_EDATA_ENTRY(zkne, ext_zkne),
> +ISA_EDATA_ENTRY(zknh, ext_zknh),
> +ISA_EDATA_ENTRY(zkr, ext_zkr),
> +ISA_EDATA_ENTRY(zks, ext_zks),
> +ISA_EDATA_ENTRY(zksed, ext_zksed),
> +ISA_EDATA_ENTRY(zksh, ext_zksh),
> +ISA_EDATA_ENTRY(zkt, ext_zkt),
>  ISA_EDATA_ENTRY(zve32f, ext_zve32f),
>  ISA_EDATA_ENTRY(zve64f, ext_zve64f),
>  ISA_EDATA_ENTRY(svinval, ext_svinval),
> --
> 2.17.1
>
>



Re: [PATCH qemu v10 04/14] target/riscv: rvv: Add tail agnostic for vv instructions

2022-04-27 Thread Alistair Francis
On Thu, Apr 28, 2022 at 1:09 AM ~eopxd  wrote:
>
> From: eopXD 
>
> This is the first commit regarding the tail agnostic behavior.
> Added option 'rvv_ta_all_1s' to enable the behavior, the option
> is default to false.
>
> Signed-off-by: eop Chen 
> Reviewed-by: Frank Chang 
> ---
>  target/riscv/cpu.c  |   1 +
>  target/riscv/cpu.h  |   2 +
>  target/riscv/cpu_helper.c   |   2 +
>  target/riscv/insn_trans/trans_rvv.c.inc |  11 +
>  target/riscv/internals.h|   5 +-
>  target/riscv/translate.c|   2 +
>  target/riscv/vector_helper.c| 296 +---
>  7 files changed, 188 insertions(+), 131 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index ddda4906ff..cd4cf4b41e 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -810,6 +810,7 @@ static Property riscv_cpu_properties[] = {
>  DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
>
>  DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
> +DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),

This should be a seperate patch at the end of the series. The idea is
that you add the feature first (without allowing users to use it) then
expose the feature. Otherwise it's possible someone will enable the
feature when it's only half implemented.

Alistair

>  DEFINE_PROP_END_OF_LIST(),
>  };
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index c069fe85fa..8c4a79b5a0 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -369,6 +369,7 @@ struct RISCVCPUConfig {
>  bool ext_zhinxmin;
>  bool ext_zve32f;
>  bool ext_zve64f;
> +bool rvv_ta_all_1s;
>
>  /* Vendor-specific custom extensions */
>  bool ext_XVentanaCondOps;
> @@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
>  /* If PointerMasking should be applied */
>  FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
>  FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
> +FIELD(TB_FLAGS, VTA, 24, 1)
>
>  #ifdef TARGET_RISCV32
>  #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 1c60fb2e80..2941c88c31 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
> *pc,
>  flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
>  FIELD_EX64(env->vtype, VTYPE, VLMUL));
>  flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
> +flags = FIELD_DP32(flags, TB_FLAGS, VTA,
> +FIELD_EX64(env->vtype, VTYPE, VTA));
>  } else {
>  flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
>  }
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> b/target/riscv/insn_trans/trans_rvv.c.inc
> index 57953923d5..cc80bf00ff 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -1223,6 +1223,16 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
> *gvec_fn,
>  tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  if (a->vm && s->vl_eq_vlmax) {
> +if (s->vta && s->lmul < 0) {
> +/*
> + * tail elements may pass vlmax when lmul < 0
> + * set tail elements to 1s
> + */
> +uint32_t vlenb = s->cfg_ptr->vlen >> 3;
> +tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
> + vreg_ofs(s, a->rd), -1,
> + vlenb, vlenb);
> +}
>  gvec_fn(s->sew, vreg_ofs(s, a->rd),
>  vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
>  MAXSZ(s), MAXSZ(s));
> @@ -1231,6 +1241,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
> *gvec_fn,
>
>  data = FIELD_DP32(data, VDATA, VM, a->vm);
>  data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> +data = FIELD_DP32(data, VDATA, VTA, s->vta);
>  tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
> vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
> cpu_env, s->cfg_ptr->vlen / 8,
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index dbb322bfa7..512c6c30cf 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -24,8 +24,9 @@
>  /* share data between vector helpers and decode code */
>  FIELD(VDATA, VM, 0, 1)
>  FIELD(VDATA, LMUL, 1, 3)
> -FIELD(VDATA, NF, 4, 4)
> -FIELD(VDATA, WD, 4, 1)
> +FIELD(VDATA, VTA, 4, 1)
> +FIELD(VDATA, NF, 5, 4)
> +FIELD(VDATA, WD, 5, 1)
>
>  /* float point classify helpers */
>  target_ulong fclass_h(uint64_t frs1);
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index fac998a6b5..7775dade26 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -94,6 +94,7 @@ typedef struct DisasContext {
>   */
>  int8_t lmul;
>  

Re: [PATCH qemu v10 04/14] target/riscv: rvv: Add tail agnostic for vv instructions

2022-04-27 Thread Alistair Francis
On Thu, Apr 28, 2022 at 9:11 AM Alistair Francis  wrote:
>
> On Thu, Apr 28, 2022 at 1:09 AM ~eopxd  wrote:
> >
> > From: eopXD 
> >
> > This is the first commit regarding the tail agnostic behavior.
> > Added option 'rvv_ta_all_1s' to enable the behavior, the option
> > is default to false.
>
> I'm not sure I follow.
>
> The spec says that:
>
> "When a set is marked agnostic, the corresponding set of destination
> elements in any vector destination operand can either retain the value
> they previously held, or are overwritten with 1s."
>
> So this is allowing us to configure QEMU to be one or the other? Why
> do we need to support both options?

Sorry, I just read your cover letter which adds some details, do you
mind adding an explanation to this commit (so then it's in the git
history), including why this is useful?

Alistair

>
> Alistair
>
> >
> > Signed-off-by: eop Chen 
> > Reviewed-by: Frank Chang 
> > ---
> >  target/riscv/cpu.c  |   1 +
> >  target/riscv/cpu.h  |   2 +
> >  target/riscv/cpu_helper.c   |   2 +
> >  target/riscv/insn_trans/trans_rvv.c.inc |  11 +
> >  target/riscv/internals.h|   5 +-
> >  target/riscv/translate.c|   2 +
> >  target/riscv/vector_helper.c| 296 +---
> >  7 files changed, 188 insertions(+), 131 deletions(-)
> >
> > diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> > index ddda4906ff..cd4cf4b41e 100644
> > --- a/target/riscv/cpu.c
> > +++ b/target/riscv/cpu.c
> > @@ -810,6 +810,7 @@ static Property riscv_cpu_properties[] = {
> >  DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
> >
> >  DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
> > +DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
> >  DEFINE_PROP_END_OF_LIST(),
> >  };
> >
> > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> > index c069fe85fa..8c4a79b5a0 100644
> > --- a/target/riscv/cpu.h
> > +++ b/target/riscv/cpu.h
> > @@ -369,6 +369,7 @@ struct RISCVCPUConfig {
> >  bool ext_zhinxmin;
> >  bool ext_zve32f;
> >  bool ext_zve64f;
> > +bool rvv_ta_all_1s;
> >
> >  /* Vendor-specific custom extensions */
> >  bool ext_XVentanaCondOps;
> > @@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
> >  /* If PointerMasking should be applied */
> >  FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
> >  FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
> > +FIELD(TB_FLAGS, VTA, 24, 1)
> >
> >  #ifdef TARGET_RISCV32
> >  #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> > index 1c60fb2e80..2941c88c31 100644
> > --- a/target/riscv/cpu_helper.c
> > +++ b/target/riscv/cpu_helper.c
> > @@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, 
> > target_ulong *pc,
> >  flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
> >  FIELD_EX64(env->vtype, VTYPE, VLMUL));
> >  flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
> > +flags = FIELD_DP32(flags, TB_FLAGS, VTA,
> > +FIELD_EX64(env->vtype, VTYPE, VTA));
> >  } else {
> >  flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
> >  }
> > diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> > b/target/riscv/insn_trans/trans_rvv.c.inc
> > index 57953923d5..cc80bf00ff 100644
> > --- a/target/riscv/insn_trans/trans_rvv.c.inc
> > +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> > @@ -1223,6 +1223,16 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, 
> > GVecGen3Fn *gvec_fn,
> >  tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
> >
> >  if (a->vm && s->vl_eq_vlmax) {
> > +if (s->vta && s->lmul < 0) {
> > +/*
> > + * tail elements may pass vlmax when lmul < 0
> > + * set tail elements to 1s
> > + */
> > +uint32_t vlenb = s->cfg_ptr->vlen >> 3;
> > +tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
> > + vreg_ofs(s, a->rd), -1,
> > + vlenb, vlenb);
> > +}
> >  gvec_fn(s->sew, vreg_ofs(s, a->rd),
> >  vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
> >  MAXSZ(s), MAXSZ(s));
> > @@ -1231,6 +1241,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, 
> > GVecGen3Fn *gvec_fn,
> >
> >  data = FIELD_DP32(data, VDATA, VM, a->vm);
> >  data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> > +data = FIELD_DP32(data, VDATA, VTA, s->vta);
> >  tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
> > vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
> > cpu_env, s->cfg_ptr->vlen / 8,
> > diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> > index dbb322bfa7..512c6c30cf 100644
> > --- a/target/riscv/internals.h
> > +++ b/target/riscv/internals.h
> > @@ -24,8 +24,9 @@
> >  /* share dat

Re: [PATCH qemu v10 04/14] target/riscv: rvv: Add tail agnostic for vv instructions

2022-04-27 Thread Alistair Francis
On Thu, Apr 28, 2022 at 1:09 AM ~eopxd  wrote:
>
> From: eopXD 
>
> This is the first commit regarding the tail agnostic behavior.
> Added option 'rvv_ta_all_1s' to enable the behavior, the option
> is default to false.

I'm not sure I follow.

The spec says that:

"When a set is marked agnostic, the corresponding set of destination
elements in any vector destination operand can either retain the value
they previously held, or are overwritten with 1s."

So this is allowing us to configure QEMU to be one or the other? Why
do we need to support both options?

Alistair

>
> Signed-off-by: eop Chen 
> Reviewed-by: Frank Chang 
> ---
>  target/riscv/cpu.c  |   1 +
>  target/riscv/cpu.h  |   2 +
>  target/riscv/cpu_helper.c   |   2 +
>  target/riscv/insn_trans/trans_rvv.c.inc |  11 +
>  target/riscv/internals.h|   5 +-
>  target/riscv/translate.c|   2 +
>  target/riscv/vector_helper.c| 296 +---
>  7 files changed, 188 insertions(+), 131 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index ddda4906ff..cd4cf4b41e 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -810,6 +810,7 @@ static Property riscv_cpu_properties[] = {
>  DEFINE_PROP_BOOL("x-aia", RISCVCPU, cfg.aia, false),
>
>  DEFINE_PROP_UINT64("resetvec", RISCVCPU, cfg.resetvec, DEFAULT_RSTVEC),
> +DEFINE_PROP_BOOL("rvv_ta_all_1s", RISCVCPU, cfg.rvv_ta_all_1s, false),
>  DEFINE_PROP_END_OF_LIST(),
>  };
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index c069fe85fa..8c4a79b5a0 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -369,6 +369,7 @@ struct RISCVCPUConfig {
>  bool ext_zhinxmin;
>  bool ext_zve32f;
>  bool ext_zve64f;
> +bool rvv_ta_all_1s;
>
>  /* Vendor-specific custom extensions */
>  bool ext_XVentanaCondOps;
> @@ -516,6 +517,7 @@ FIELD(TB_FLAGS, XL, 20, 2)
>  /* If PointerMasking should be applied */
>  FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
>  FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
> +FIELD(TB_FLAGS, VTA, 24, 1)
>
>  #ifdef TARGET_RISCV32
>  #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 1c60fb2e80..2941c88c31 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -65,6 +65,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
> *pc,
>  flags = FIELD_DP32(flags, TB_FLAGS, LMUL,
>  FIELD_EX64(env->vtype, VTYPE, VLMUL));
>  flags = FIELD_DP32(flags, TB_FLAGS, VL_EQ_VLMAX, vl_eq_vlmax);
> +flags = FIELD_DP32(flags, TB_FLAGS, VTA,
> +FIELD_EX64(env->vtype, VTYPE, VTA));
>  } else {
>  flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
>  }
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> b/target/riscv/insn_trans/trans_rvv.c.inc
> index 57953923d5..cc80bf00ff 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -1223,6 +1223,16 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
> *gvec_fn,
>  tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  if (a->vm && s->vl_eq_vlmax) {
> +if (s->vta && s->lmul < 0) {
> +/*
> + * tail elements may pass vlmax when lmul < 0
> + * set tail elements to 1s
> + */
> +uint32_t vlenb = s->cfg_ptr->vlen >> 3;
> +tcg_gen_gvec_ori(s->sew, vreg_ofs(s, a->rd),
> + vreg_ofs(s, a->rd), -1,
> + vlenb, vlenb);
> +}
>  gvec_fn(s->sew, vreg_ofs(s, a->rd),
>  vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),
>  MAXSZ(s), MAXSZ(s));
> @@ -1231,6 +1241,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
> *gvec_fn,
>
>  data = FIELD_DP32(data, VDATA, VM, a->vm);
>  data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> +data = FIELD_DP32(data, VDATA, VTA, s->vta);
>  tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0),
> vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2),
> cpu_env, s->cfg_ptr->vlen / 8,
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index dbb322bfa7..512c6c30cf 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -24,8 +24,9 @@
>  /* share data between vector helpers and decode code */
>  FIELD(VDATA, VM, 0, 1)
>  FIELD(VDATA, LMUL, 1, 3)
> -FIELD(VDATA, NF, 4, 4)
> -FIELD(VDATA, WD, 4, 1)
> +FIELD(VDATA, VTA, 4, 1)
> +FIELD(VDATA, NF, 5, 4)
> +FIELD(VDATA, WD, 5, 1)
>
>  /* float point classify helpers */
>  target_ulong fclass_h(uint64_t frs1);
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index fac998a6b5..7775dade26 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translat

[PATCH] loader: support loading large files (>=2GB)

2022-04-27 Thread Peter Collingbourne
Currently the loader uses int as the return type for various APIs
that deal with file sizes, which leads to an error if the file
size is >=2GB, as it ends up being interpreted as a negative error
code. Furthermore, we do not tolerate short reads, which are possible
at least on Linux when attempting to read such large files in one
syscall.

Fix the first problem by switching to 64-bit types for file sizes,
and fix the second by introducing a loop around the read syscall.

Signed-off-by: Peter Collingbourne 
---
 hw/core/generic-loader.c |  2 +-
 hw/core/loader.c | 44 
 include/hw/loader.h  | 13 ++--
 3 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
index c666545aa0..0891fa73c3 100644
--- a/hw/core/generic-loader.c
+++ b/hw/core/generic-loader.c
@@ -67,7 +67,7 @@ static void generic_loader_realize(DeviceState *dev, Error 
**errp)
 GenericLoaderState *s = GENERIC_LOADER(dev);
 hwaddr entry;
 int big_endian;
-int size = 0;
+int64_t size = 0;
 
 s->set_pc = false;
 
diff --git a/hw/core/loader.c b/hw/core/loader.c
index ca2f2431fb..d07c79c400 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -115,17 +115,17 @@ ssize_t read_targphys(const char *name,
 return did;
 }
 
-int load_image_targphys(const char *filename,
-hwaddr addr, uint64_t max_sz)
+int64_t load_image_targphys(const char *filename,
+hwaddr addr, uint64_t max_sz)
 {
 return load_image_targphys_as(filename, addr, max_sz, NULL);
 }
 
 /* return the size or -1 if error */
-int load_image_targphys_as(const char *filename,
-   hwaddr addr, uint64_t max_sz, AddressSpace *as)
+int64_t load_image_targphys_as(const char *filename,
+   hwaddr addr, uint64_t max_sz, AddressSpace *as)
 {
-int size;
+int64_t size;
 
 size = get_image_size(filename);
 if (size < 0 || size > max_sz) {
@@ -139,9 +139,9 @@ int load_image_targphys_as(const char *filename,
 return size;
 }
 
-int load_image_mr(const char *filename, MemoryRegion *mr)
+int64_t load_image_mr(const char *filename, MemoryRegion *mr)
 {
-int size;
+int64_t size;
 
 if (!memory_access_is_direct(mr, false)) {
 /* Can only load an image into RAM or ROM */
@@ -963,7 +963,8 @@ int rom_add_file(const char *file, const char *fw_dir,
 {
 MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
 Rom *rom;
-int rc, fd = -1;
+int fd = -1;
+size_t bytes_read = 0;
 char devpath[100];
 
 if (as && mr) {
@@ -1003,11 +1004,17 @@ int rom_add_file(const char *file, const char *fw_dir,
 rom->datasize = rom->romsize;
 rom->data = g_malloc0(rom->datasize);
 lseek(fd, 0, SEEK_SET);
-rc = read(fd, rom->data, rom->datasize);
-if (rc != rom->datasize) {
-fprintf(stderr, "rom: file %-20s: read error: rc=%d (expected %zd)\n",
-rom->name, rc, rom->datasize);
-goto err;
+while (bytes_read < rom->datasize) {
+ssize_t rc =
+read(fd, rom->data + bytes_read, rom->datasize - bytes_read);
+if (rc <= 0) {
+fprintf(stderr,
+"rom: file %-20s: read error: rc=%zd at position %zd "
+"(expected size %zd)\n",
+rom->name, rc, bytes_read, rom->datasize);
+goto err;
+}
+bytes_read += rc;
 }
 close(fd);
 rom_insert(rom);
@@ -1671,7 +1678,7 @@ typedef struct {
 HexLine line;
 uint8_t *bin_buf;
 hwaddr *start_addr;
-int total_size;
+int64_t total_size;
 uint32_t next_address_to_write;
 uint32_t current_address;
 uint32_t current_rom_index;
@@ -1767,8 +1774,8 @@ static int handle_record_type(HexParser *parser)
 }
 
 /* return size or -1 if error */
-static int parse_hex_blob(const char *filename, hwaddr *addr, uint8_t 
*hex_blob,
-  size_t hex_blob_size, AddressSpace *as)
+static int64_t parse_hex_blob(const char *filename, hwaddr *addr, uint8_t 
*hex_blob,
+  size_t hex_blob_size, AddressSpace *as)
 {
 bool in_process = false; /* avoid re-enter and
   * check whether record begin with ':' */
@@ -1832,11 +1839,12 @@ out:
 }
 
 /* return size or -1 if error */
-int load_targphys_hex_as(const char *filename, hwaddr *entry, AddressSpace *as)
+int64_t load_targphys_hex_as(const char *filename, hwaddr *entry,
+ AddressSpace *as)
 {
 gsize hex_blob_size;
 gchar *hex_blob;
-int total_size = 0;
+int64_t total_size = 0;
 
 if (!g_file_get_contents(filename, &hex_blob, &hex_blob_size, NULL)) {
 return -1;
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 5572108ba5..7b09705940 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -40,8 +40,8 @@ ssize_

Re: [PATCH qemu v10 03/14] target/riscv: rvv: Early exit when vstart >= vl

2022-04-27 Thread Alistair Francis
On Thu, Apr 28, 2022 at 1:06 AM ~eopxd  wrote:
>
> From: eopXD 
>
> According to v-spec (section 5.4):
> When vstart ≥ vl, there are no body elements, and no elements are
> updated in any destination vector register group, including that
> no tail elements are updated with agnostic values.
>
> vmsbf.m, vmsif.m, vmsof.m, viota.m, vcompress instructions themselves
> require vstart to be zero. So they don't need the early exit.
>
> Signed-off-by: eop Chen 
> Reviewed-by: Frank Chang 

Acked-by: Alistair Francis 

Alistair

> ---
>  target/riscv/insn_trans/trans_rvv.c.inc | 27 +
>  1 file changed, 27 insertions(+)
>
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> b/target/riscv/insn_trans/trans_rvv.c.inc
> index 275fded6e4..57953923d5 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -652,6 +652,7 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, 
> uint32_t data,
>
>  TCGLabel *over = gen_new_label();
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  dest = tcg_temp_new_ptr();
>  mask = tcg_temp_new_ptr();
> @@ -818,6 +819,7 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, 
> uint32_t rs2,
>
>  TCGLabel *over = gen_new_label();
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  dest = tcg_temp_new_ptr();
>  mask = tcg_temp_new_ptr();
> @@ -925,6 +927,7 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, 
> uint32_t vs2,
>
>  TCGLabel *over = gen_new_label();
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  dest = tcg_temp_new_ptr();
>  mask = tcg_temp_new_ptr();
> @@ -1067,6 +1070,7 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, 
> uint32_t data,
>
>  TCGLabel *over = gen_new_label();
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  dest = tcg_temp_new_ptr();
>  mask = tcg_temp_new_ptr();
> @@ -1216,6 +1220,7 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn 
> *gvec_fn,
>  }
>
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  if (a->vm && s->vl_eq_vlmax) {
>  gvec_fn(s->sew, vreg_ofs(s, a->rd),
> @@ -1263,6 +1268,7 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, 
> uint32_t vs2, uint32_t vm,
>
>  TCGLabel *over = gen_new_label();
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  dest = tcg_temp_new_ptr();
>  mask = tcg_temp_new_ptr();
> @@ -1427,6 +1433,7 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, 
> uint32_t vs2, uint32_t vm,
>
>  TCGLabel *over = gen_new_label();
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  dest = tcg_temp_new_ptr();
>  mask = tcg_temp_new_ptr();
> @@ -1513,6 +1520,7 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
>  uint32_t data = 0;
>  TCGLabel *over = gen_new_label();
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  data = FIELD_DP32(data, VDATA, VM, a->vm);
>  data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> @@ -1593,6 +1601,7 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a,
>  uint32_t data = 0;
>  TCGLabel *over = gen_new_label();
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
>
>  data = FIELD_DP32(data, VDATA, VM, a->vm);
>  data = FIELD_DP32(data, VDATA, LMUL, s->lmul);
> @@ -1670,6 +1679,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)  
>\
>  }; \
>  TCGLabel *over = gen_new_label();  \
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);  \
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
> \
>  data = FIELD_DP32(data, VDATA, VM, a->vm); \
>  data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
> @@ -1851,6 +1861,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)  
>\
>  }; \
>  TCGLabel *over = gen_new_label();  \
>  tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over);  \
> +tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \
>   

Re: [PATCH qemu v10 02/14] target/riscv: rvv: Rename ambiguous esz

2022-04-27 Thread Alistair Francis
On Thu, Apr 28, 2022 at 1:09 AM ~eopxd  wrote:
>
> From: eopXD 
>
> No functional change intended in this commit.
>
> Signed-off-by: eop Chen 
> Reviewed-by: Frank Chang 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/vector_helper.c | 76 ++--
>  1 file changed, 38 insertions(+), 38 deletions(-)
>
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index e94caf1a3c..d0452a7756 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -125,9 +125,9 @@ static inline int32_t vext_lmul(uint32_t desc)
>  /*
>   * Get the maximum number of elements can be operated.
>   *
> - * esz: log2 of element size in bytes.
> + * log2_esz: log2 of element size in bytes.
>   */
> -static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz)
> +static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
>  {
>  /*
>   * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
> @@ -136,7 +136,7 @@ static inline uint32_t vext_max_elems(uint32_t desc, 
> uint32_t esz)
>  uint32_t vlenb = simd_maxsz(desc);
>
>  /* Return VLMAX */
> -int scale = vext_lmul(desc) - esz;
> +int scale = vext_lmul(desc) - log2_esz;
>  return scale < 0 ? vlenb >> -scale : vlenb << scale;
>  }
>
> @@ -231,11 +231,11 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
>   target_ulong stride, CPURISCVState *env,
>   uint32_t desc, uint32_t vm,
>   vext_ldst_elem_fn *ldst_elem,
> - uint32_t esz, uintptr_t ra, MMUAccessType access_type)
> + uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type)
>  {
>  uint32_t i, k;
>  uint32_t nf = vext_nf(desc);
> -uint32_t max_elems = vext_max_elems(desc, esz);
> +uint32_t max_elems = vext_max_elems(desc, log2_esz);
>
>  for (i = env->vstart; i < env->vl; i++, env->vstart++) {
>  if (!vm && !vext_elem_mask(v0, i)) {
> @@ -244,7 +244,7 @@ vext_ldst_stride(void *vd, void *v0, target_ulong base,
>
>  k = 0;
>  while (k < nf) {
> -target_ulong addr = base + stride * i + (k << esz);
> +target_ulong addr = base + stride * i + (k << log2_esz);
>  ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, 
> ra);
>  k++;
>  }
> @@ -289,18 +289,18 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
>  /* unmasked unit-stride load and store operation*/
>  static void
>  vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
> - vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl,
> + vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
>   uintptr_t ra, MMUAccessType access_type)
>  {
>  uint32_t i, k;
>  uint32_t nf = vext_nf(desc);
> -uint32_t max_elems = vext_max_elems(desc, esz);
> +uint32_t max_elems = vext_max_elems(desc, log2_esz);
>
>  /* load bytes from guest memory */
>  for (i = env->vstart; i < evl; i++, env->vstart++) {
>  k = 0;
>  while (k < nf) {
> -target_ulong addr = base + ((i * nf + k) << esz);
> +target_ulong addr = base + ((i * nf + k) << log2_esz);
>  ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, 
> ra);
>  k++;
>  }
> @@ -399,12 +399,12 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
>  void *vs2, CPURISCVState *env, uint32_t desc,
>  vext_get_index_addr get_index_addr,
>  vext_ldst_elem_fn *ldst_elem,
> -uint32_t esz, uintptr_t ra, MMUAccessType access_type)
> +uint32_t log2_esz, uintptr_t ra, MMUAccessType access_type)
>  {
>  uint32_t i, k;
>  uint32_t nf = vext_nf(desc);
>  uint32_t vm = vext_vm(desc);
> -uint32_t max_elems = vext_max_elems(desc, esz);
> +uint32_t max_elems = vext_max_elems(desc, log2_esz);
>
>  /* load bytes from guest memory */
>  for (i = env->vstart; i < env->vl; i++, env->vstart++) {
> @@ -414,7 +414,7 @@ vext_ldst_index(void *vd, void *v0, target_ulong base,
>
>  k = 0;
>  while (k < nf) {
> -abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz);
> +abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
>  ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, 
> ra);
>  k++;
>  }
> @@ -480,13 +480,13 @@ static inline void
>  vext_ldff(void *vd, void *v0, target_ulong base,
>CPURISCVState *env, uint32_t desc,
>vext_ldst_elem_fn *ldst_elem,
> -  uint32_t esz, uintptr_t ra)
> +  uint32_t log2_esz, uintptr_t ra)
>  {
>  void *host;
>  uint32_t i, k, vl = 0;
>  uint32_t nf = vext_nf(desc);
>  uint32_t vm = vext_vm(desc);
> -uint32_t max_elems = vext_max_elems(desc, esz);
> +uint32_t max_ele

Re: [PATCH qemu v10 01/14] target/riscv: rvv: Prune redundant ESZ, DSZ parameter passed

2022-04-27 Thread Alistair Francis
On Thu, Apr 28, 2022 at 1:14 AM ~eopxd  wrote:
>
> From: eopXD 
>
> No functional change intended in this commit.
>
> Signed-off-by: eop Chen 
> Reviewed-by: Frank Chang 

Can you please keep all previous tags when sending a new version

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/vector_helper.c | 1132 +-
>  1 file changed, 565 insertions(+), 567 deletions(-)
>
> diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
> index 3bd4aac9c9..e94caf1a3c 100644
> --- a/target/riscv/vector_helper.c
> +++ b/target/riscv/vector_helper.c
> @@ -710,7 +710,6 @@ RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
>
>  static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
> CPURISCVState *env, uint32_t desc,
> -   uint32_t esz, uint32_t dsz,
> opivv2_fn *fn)
>  {
>  uint32_t vm = vext_vm(desc);
> @@ -727,23 +726,23 @@ static void do_vext_vv(void *vd, void *v0, void *vs1, 
> void *vs2,
>  }
>
>  /* generate the helpers for OPIVV */
> -#define GEN_VEXT_VV(NAME, ESZ, DSZ)   \
> +#define GEN_VEXT_VV(NAME) \
>  void HELPER(NAME)(void *vd, void *v0, void *vs1,  \
>void *vs2, CPURISCVState *env,  \
>uint32_t desc)  \
>  { \
> -do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \
> +do_vext_vv(vd, v0, vs1, vs2, env, desc,   \
> do_##NAME);\
>  }
>
> -GEN_VEXT_VV(vadd_vv_b, 1, 1)
> -GEN_VEXT_VV(vadd_vv_h, 2, 2)
> -GEN_VEXT_VV(vadd_vv_w, 4, 4)
> -GEN_VEXT_VV(vadd_vv_d, 8, 8)
> -GEN_VEXT_VV(vsub_vv_b, 1, 1)
> -GEN_VEXT_VV(vsub_vv_h, 2, 2)
> -GEN_VEXT_VV(vsub_vv_w, 4, 4)
> -GEN_VEXT_VV(vsub_vv_d, 8, 8)
> +GEN_VEXT_VV(vadd_vv_b)
> +GEN_VEXT_VV(vadd_vv_h)
> +GEN_VEXT_VV(vadd_vv_w)
> +GEN_VEXT_VV(vadd_vv_d)
> +GEN_VEXT_VV(vsub_vv_b)
> +GEN_VEXT_VV(vsub_vv_h)
> +GEN_VEXT_VV(vsub_vv_w)
> +GEN_VEXT_VV(vsub_vv_d)
>
>  typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
>
> @@ -773,7 +772,6 @@ RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
>
>  static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
> CPURISCVState *env, uint32_t desc,
> -   uint32_t esz, uint32_t dsz,
> opivx2_fn fn)
>  {
>  uint32_t vm = vext_vm(desc);
> @@ -790,27 +788,27 @@ static void do_vext_vx(void *vd, void *v0, target_long 
> s1, void *vs2,
>  }
>
>  /* generate the helpers for OPIVX */
> -#define GEN_VEXT_VX(NAME, ESZ, DSZ)   \
> +#define GEN_VEXT_VX(NAME) \
>  void HELPER(NAME)(void *vd, void *v0, target_ulong s1,\
>void *vs2, CPURISCVState *env,  \
>uint32_t desc)  \
>  { \
> -do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ,  \
> +do_vext_vx(vd, v0, s1, vs2, env, desc,\
> do_##NAME);\
>  }
>
> -GEN_VEXT_VX(vadd_vx_b, 1, 1)
> -GEN_VEXT_VX(vadd_vx_h, 2, 2)
> -GEN_VEXT_VX(vadd_vx_w, 4, 4)
> -GEN_VEXT_VX(vadd_vx_d, 8, 8)
> -GEN_VEXT_VX(vsub_vx_b, 1, 1)
> -GEN_VEXT_VX(vsub_vx_h, 2, 2)
> -GEN_VEXT_VX(vsub_vx_w, 4, 4)
> -GEN_VEXT_VX(vsub_vx_d, 8, 8)
> -GEN_VEXT_VX(vrsub_vx_b, 1, 1)
> -GEN_VEXT_VX(vrsub_vx_h, 2, 2)
> -GEN_VEXT_VX(vrsub_vx_w, 4, 4)
> -GEN_VEXT_VX(vrsub_vx_d, 8, 8)
> +GEN_VEXT_VX(vadd_vx_b)
> +GEN_VEXT_VX(vadd_vx_h)
> +GEN_VEXT_VX(vadd_vx_w)
> +GEN_VEXT_VX(vadd_vx_d)
> +GEN_VEXT_VX(vsub_vx_b)
> +GEN_VEXT_VX(vsub_vx_h)
> +GEN_VEXT_VX(vsub_vx_w)
> +GEN_VEXT_VX(vsub_vx_d)
> +GEN_VEXT_VX(vrsub_vx_b)
> +GEN_VEXT_VX(vrsub_vx_h)
> +GEN_VEXT_VX(vrsub_vx_w)
> +GEN_VEXT_VX(vrsub_vx_d)
>
>  void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
>  {
> @@ -889,30 +887,30 @@ RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, 
> DO_ADD)
>  RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
>  RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
>  RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
> -GEN_VEXT_VV(vwaddu_vv_b, 1, 2)
> -GEN_VEXT_VV(vwaddu_vv_h, 2, 4)
> -GEN_VEXT_VV(vwaddu_vv_w, 4, 8)
> -GEN_VEXT_VV(vwsubu_vv_b, 1, 2)
> -GEN_VEXT_VV(vwsubu_vv_h, 2, 4)
> -GEN_VEXT_VV(vwsubu_vv_w, 4, 8)
> -GEN_VEXT_VV(vwadd_vv_b, 1, 2)
> -GEN_VEXT_VV(vwadd_vv_h, 2, 4)
> -GEN_VEXT_VV(vwadd_vv_w, 4, 8)
> -GEN_VEXT_VV(vwsub_vv_b, 1, 2)
> -GEN_VEXT_VV(vwsub_vv_h, 2, 4)
> -GEN_VEXT_VV(vwsub_vv_w, 4, 8)
> -GEN_VEXT_VV(vwaddu_wv_b, 1, 2)
> -GEN_VEXT_VV(vwaddu_wv_h, 2, 4)
> -GEN_VEXT_VV(vwaddu_wv_w, 4, 8)
> -GEN_VEXT_VV(vwsubu_wv_b, 1, 2)
> -GEN_VEXT_VV(vwsubu_wv_h, 2, 4)
> -GEN_VEXT_VV(vwsubu_wv_w, 4, 8)
> -GEN_VEXT_VV(vwadd_wv_b, 1, 2)
> -GEN_VEXT_VV(vwadd_wv_h, 2, 4)

Re: [RFC PATCH 5/7] target/ppc: Implemented xvf16ger*

2022-04-27 Thread Richard Henderson

On 4/27/22 14:11, Lucas Mateus Martins Araujo e Castro wrote:

Please do convert this from a macro.  Given that float16 and bfloat16 are 
addressed the
same, I think the only callback you need is the conversion from 
float16_to_float64.  Drop
the bf16 accessor to ppc_vsr_t.

Will do, although I'm considering instead of the callback being the conversion, maybe have 
it be a 4 float multiplication

     typedef float32 mul_4float(float16, float16, float16, float16);
Since float16 and bfloat16 are addressed the same, any thoughts?


The multiplication would be identical for the two types -- only the conversion 
is different.


r~



Re: [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions

2022-04-27 Thread Richard Henderson

On 4/27/22 13:24, Lucas Mateus Martins Araujo e Castro wrote:


On 26/04/2022 20:40, Richard Henderson wrote:


On 4/26/22 05:50, Lucas Mateus Castro(alqotel) wrote:

+%xx_at  23:3 !function=times_4
+@XX3_at .. ... .. . .  ... &XX3 xt=%xx_at xb=%xx_xb


Hmm.  Depends, I suppose on whether you want acc[0-7] or vsr[0-28]
I mostly used VSR function here, but since I'll change the patch 1 to your suggestion 
(which will require creating acc_full_offset) I'll make a few changes to create some 
functions for the accumulator



+/*
+ * Packed VSX Integer GER Flags
+ * 00 - no accumulation no saturation
+ * 01 - accumulate but no saturation
+ * 10 - no accumulation but with saturation
+ * 11 - accumulate with saturation
+ */
+static inline bool get_sat(uint32_t flags)
+{
+    return flags & 0x2;
+}
+
+static inline bool get_acc(uint32_t flags)
+{
+    return flags & 0x1;
+}


Better to have separate helpers for these?  They'd be immediate operands to the 
function
replacing XVIGER (see below) and thus optimize well.
Do you mean different functions or a function that receives packed_flags along with the 
callback functions?


I mean separate helper entry points, which use a common function that receives these as 
separate boolean arguments, along with the callbacks.  Use QEMU_FLATTEN on the helper 
entry points to ensure that everything is inlined and the constant args are optimized.


In this case it'd be necessary to receive 2 xviger_extract functions since XVI8GER4* 
multiply one value as signed and the other as unsigned (and other integer GER treat both 
as signed).


Certainly.



An alternative would be to isolate the innermost loop into a different 
function, like:

     typedef int64_t do_ger(int32_t a, int32_t b, int32_t at, int32_t pmsk);

     static int64_t ger_rank4(int32_t a, int32_t b, int32_t at, int32_t mask)
     {
         int64_t psum = 0, i;
         for (i = 0; i < 4; i++, mask >>= 1) {
         if (mask & 1) {
             psum += (sextract32(a, i * 8, 8)) * (extract32(b, i * 8, 8));
        }
         }
         return psum;
     }

That way we could avoid having 'rank' as a parameter, what do you think?


Reasonable.  I certainly like extracting uint32_t from the vector generically and not 
having to pass that on further.



Why are you passing register numbers instead of pointers, like everywhere else?
Because here we are not working only with 1 register per register number, the ACC uses 4 
and the XVF64GER* needs to use XA and XA+1, and while VSR is an array so I could do 
ppc_vsr_ptr+1 I thought it was better not to access memory I was not given a pointer to, 
so I passed XA so I can request cpu_vsr_ptr(env, xa) and cpu_vsr_ptr(env, xa + 1)


I think using cpu_vsr_ptr is the mistake.

It might be clarifying to define a ppc_acc_t, if only as a typedef of ppc_vsr_t.  The 
acc_full_offset function will compute the offset for this pointer and, importantly, will 
be the place to modify if and when the architecture changes to allow or require separate 
storage for the ACC registers.



r~



Re: [Qemu-devel] [PATCH 2/7] target/openrisc: add shutdown logic

2022-04-27 Thread Stafford Horne
On Wed, Apr 27, 2022 at 07:47:33PM +0100, Peter Maydell wrote:
> On Wed, 27 Apr 2022 at 18:46, Jason A. Donenfeld  wrote:
> >
> > Hey Stafford,
> >
> > On Mon, Apr 17, 2017 at 08:23:51AM +0900, Stafford Horne wrote:
> > > In openrisc simulators we use hooks like 'l.nop 1' to cause the
> > > simulator to exit.  Implement that for qemu too.
> > >
> > > Reported-by: Waldemar Brodkorb 
> > > Signed-off-by: Stafford Horne 
> >
> > I'm curious as to why this never got merged. I noticed I'm entirely able
> > to shutdown or to reboot (which is mostly what I care about) Linux from
> > OpenRISC. It just hangs.
> 
> This kind of thing needs to be either:
>  (1) we're modelling real hardware and that real hardware has a
> device or other mechanism guest code can prod to cause a power-off
> or reboot. Then we model that device, and guest code triggers a
> shutdown or reboot exactly as it would on the real hardware.
>  (2) there is an architecturally defined ABI for simulators, debug
> stubs, etc, that includes various operations typically including
> an "exit the simulator" function. (Arm semihosting is an example
> of this.) In that case we can implement that functionality,
> guarded by and controlled by the appropriate command line options.
> (This is generally not as nice as option 1, because the guest code
> has to be compiled to have support for semihosting and also because
> turning it on is usually also giving implicit permission for the
> guest code to read and write arbitrary host files, etc.)
> 
> Either way, undocumented random hacks aren't a good idea, which
> is why this wasn't merged.

Yes, this is what was brought up before.  At that time semihosting was mentioned
and I tried to understand what it was but didn't really understand it as a 
general
concept.  Is this something arm specific?

Since the qemu or1k-sim defines our "simulator", I suspect I could add a
definition of our simulator ABI to the OpenRISC architecture specification.  The
simulation uses of l.nop N as ABI hooks is a de-facto standard for OpenRISC.
>From the way you describe this now I take it if we document this as a
architecture simulation ABI the patch would be accepted.

-Stafford



Re: [PATCH v3 3/3] nbd/server: Allow MULTI_CONN for shared writable exports

2022-04-27 Thread Eric Blake
On Wed, Apr 27, 2022 at 05:52:09PM +0200, Kevin Wolf wrote:
> Am 14.03.2022 um 21:38 hat Eric Blake geschrieben:
> > According to the NBD spec, a server that advertises
> > NBD_FLAG_CAN_MULTI_CONN promises that multiple client connections will
> > not see any cache inconsistencies: when properly separated by a single
> > flush, actions performed by one client will be visible to another
> > client, regardless of which client did the flush.  We satisfy these
> > conditions in qemu when our block layer is backed by the local
> > filesystem (by virtue of the semantics of fdatasync(), and the fact
> > that qemu itself is not buffering writes beyond flushes).  It is
> > harder to state whether we satisfy these conditions for network-based
> > protocols, so the safest course of action is to allow users to opt-in
> > to advertising multi-conn.
> 
> Do you have an example of how this could be unsafe?

Nothing direct.  I tried to turn this on unconditionally in an earlier
version, and we waffled about whether we could prove that network
block backends (such as gluster) provide us the safety that the NBD
spec demands:

https://lists.gnu.org/archive/html/qemu-devel/2021-09/msg00038.html
https://lists.gnu.org/archive/html/qemu-devel/2021-10/msg06744.html

> 
> As I understand it, the NBD server has a single BlockBackend and
> therefore is a single client for the backend, be it file-posix or any
> network-based protocol. It doesn't really make a difference for the
> storage from how many different NBD clients the requests are coming.
> 
> I would have expected that cache coherency of the protocol level driver
> would only matter if you had two QEMU processes accessing the same file
> concurrently.

Or a multi-pathed connection to network storage, where one QEMU
process accesses the network device, but those accesses may
round-robin which server they reach, and where any caching at an
individual server may be inconsistent with what is seen on another
server unless flushing is used to force the round-robin access to
synchronize between the multi-path views.

> 
> In fact, I don't think we even need the flush restriction from the NBD
> spec. All clients see the same state (that of the NBD server
> BlockBackend) even without anyone issuing any flush. The flush is only
> needed to make sure that cached data is written to the backing storage
> when writeback caches are involved.
> 
> Please correct me if I'm misunderstanding something here.

Likewise me, if I'm being overly cautious.

I can certainly write a simpler v4 that just always advertises
MULTI_CONN if we allow more than one client, without any knob to
override it; it's just that it is harder to write a commit message
justifying why I think it is safe to do so.

> 
> > We may later tweak defaults to advertise
> > by default when the block layer can confirm that the underlying
> > protocol driver is cache consistent between multiple writers, but for
> > now, this at least allows savvy users (such as virt-v2v or nbdcopy) to
> > explicitly start qemu-nbd (new -m command-line option) or
> > qemu-storage-daemon (new qapi field 'multi-conn') with multi-conn
> > advertisement in a known-safe setup where the client end can then
> > benefit from parallel clients.
> > 
> > Note, however, that we don't want to advertise MULTI_CONN when we know
> > that a second client cannot connect (for historical reasons, qemu-nbd
> > defaults to a single connection while nbd-server-add and QMP commands
> > default to unlimited connections; but we already have existing means
> > to let either style of NBD server creation alter those defaults).  The
> > harder part of this patch is setting up an iotest to demonstrate
> > behavior of multiple NBD clients to a single server.  It might be
> > possible with parallel qemu-io processes, but I found it easier to do
> > in python with the help of libnbd, and help from Nir and Vladimir in
> > writing the test.
> > 
> > Signed-off-by: Eric Blake 
> > Suggested-by: Nir Soffer 
> > Suggested-by: Vladimir Sementsov-Ogievskiy 
> 
> > @@ -709,6 +714,17 @@ int main(int argc, char **argv)
> >  exit(EXIT_FAILURE);
> >  }
> >  break;
> > +case 'm':
> > +{
> > +Error *err = NULL;
> > +multi_conn = qapi_enum_parse(&OnOffAuto_lookup, optarg,
> > + ON_OFF_AUTO_AUTO, &err);
> > +if (err) {
> > +error_report_err(err);
> > +exit(EXIT_FAILURE);
> > +}
> 
> I think this is the same as passing &error_fatal.

Yes, sounds right.

> 
> > +break;
> > +}
> >  case 'f':
> >  fmt = optarg;
> >  break;
> > diff --git a/tests/qemu-iotests/tests/nbd-multiconn 
> > b/tests/qemu-iotests/tests/nbd-multiconn
> > new file mode 100755
> > index ..7d1179b33b05
> > --- /dev/null
> > +++ b/tests/qemu-iotests/tests/nbd-multiconn
> > @@ -0,0 +1,157 @@
> > +#

Re: [RFC PATCH 5/7] target/ppc: Implemented xvf16ger*

2022-04-27 Thread Lucas Mateus Martins Araujo e Castro


On 26/04/2022 21:26, Richard Henderson wrote:

On 4/26/22 05:50, Lucas Mateus Castro(alqotel) wrote:
+#define VSXGER16(NAME, ORIG_T, 
OR_EL)   \
+    void NAME(CPUPPCState *env, uint32_t a_r, uint32_t 
b_r, \
+  uint32_t  at_r, uint32_t mask, uint32_t 
packed_flags) \

+ { \
+    ppc_vsr_t 
*at;  \
+    float32 psum, aux_acc, va, vb, vc, 
vd;  \
+    int i, j, xmsk_bit, 
ymsk_bit;   \
+    uint8_t xmsk = mask & 
0x0F; \
+    uint8_t ymsk = (mask >> 4) & 
0x0F;  \
+    uint8_t pmsk = (mask >> 8) & 
0x3;   \
+    ppc_vsr_t *b = cpu_vsr_ptr(env, 
b_r);   \
+    ppc_vsr_t *a = cpu_vsr_ptr(env, 
a_r);   \
+    float_status *excp_ptr = 
&env->fp_status;   \
+    bool acc = 
ger_acc_flag(packed_flags);  \
+    bool neg_acc = 
ger_neg_acc_flag(packed_flags);  \
+    bool neg_mul = 
ger_neg_mul_flag(packed_flags);  \
+    for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) 
{    \
+    at = cpu_vsr_ptr(env, at_r + 
i);    \
+    for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 
1) {\
+    if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) 
{   \
+    va = !(pmsk & 2) ? float32_zero 
:   \
+   GET_VSR(Vsr##OR_EL, 
a,   \
+   2 * i, ORIG_T, 
float32); \
+    vb = !(pmsk & 2) ? float32_zero 
:   \
+   GET_VSR(Vsr##OR_EL, 
b,   \
+   2 * j, ORIG_T, 
float32); \
+    vc = !(pmsk & 1) ? float32_zero 
:   \
+   GET_VSR(Vsr##OR_EL, 
a,   \
+    2 * i + 1, ORIG_T, 
float32);\
+    vd = !(pmsk & 1) ? float32_zero 
:   \
+   GET_VSR(Vsr##OR_EL, 
b,   \
+    2 * j + 1, ORIG_T, 
float32);\
+    psum = float32_mul(va, vb, 
excp_ptr);   \
+    psum = float32_muladd(vc, vd, psum, 0, 
excp_ptr);   \


This isn't correct -- the intermediate 'prod' (the first multiply) is 
not rounded.  I
think the correct way to implement this (barring new softfloat 
functions) is to compute
the intermediate product as float64 with float_round_to_odd, then 
float64r32_muladd into

the correct rounding mode to finish.

While not mentioned in the pseudocode the instruction description says:

- Let prod be the single-precision product of src10 and src20

Which I understand as the result of the first multiplication being 
stored in a float32


But in xvbf16ger2* it's different (and I think this is the reason the 
last patch is resulting in the wrong signal in some 0 and inf results), 
the description says:


- Let prod be the product of src10 and src20, having infinite precision 
and unbounded exponent range. - Let psum be the sum of the product, 
src11 multiplied by src21, and prod, having infinite precision and 
unbounded exponent range.
- Let r1 be the value psum with its significand rounded to 24-bit 
precision using the rounding mode specified by RN, but retaining 
unbounded exponent range (i.e., cannot overflow or underflow).




+    if (acc) 
{  \
+    if (neg_mul) 
{  \
+    psum = 
float32_neg(psum);   \

+ }   \
+    if (neg_acc) 
{  \
+    aux_acc = 
float32_neg(at->VsrSF(j));    \
+    } else 
{    \
+    aux_acc = 
at->VsrSF(j); \

+ }   \
+    at->VsrSF(j) = float32_add(psum, 
aux_acc,   \

+ excp_ptr);   \


This one, thankfully, uses the rounded intermediate result 'msum', so 
is ok.
Yes this one is the easier one to deal with, in the description for the 
xvf16ger2* it specifies that msum and the result is rounded to 
single-precision and in the description for the xvbf16ger2 it specifies 
that r1 is 'rounded to a 24-bit significand precision and 8-bit exponent 
range (i.e., single-precision)'


Please do convert this from a macro.  Given that float16 and bfloat16 
are addressed the
same, I think the

Re: [PATCH] linux-user: Add PowerPC ISA 3.1 and MMA to hwcap

2022-04-27 Thread Daniel Henrique Barboza




On 4/27/22 03:42, Joel Stanley wrote:

These are new hwcap bits added for power10.

Signed-off-by: Joel Stanley 
---
MMA support for TCG is on the list so I think it makes sense for this to
land after those are merged.


I believe you mean this series:


[RFC PATCH 0/7] VSX MMA Implementation


In that case I'll queue this patch together with it.



Thanks,


Daniel




I tested my patch with this program:

  https://github.com/shenki/p10_tests

$ qemu-ppc64le -cpu power10  -L ~/ppc64le/ ./test -c
HWCAP: 0x58000580 HWCAP2: 0x8ee6
ISAv3.1: Yes
MMA: Yes

$ qemu-ppc64le -cpu power9  -L ~/ppc64le/ ./test -c
HWCAP: 0x58000580 HWCAP2: 0x8ee0
ISAv3.1: No
MMA: No

  linux-user/elfload.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 61063fd974e5..0908692e62b3 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -779,6 +779,8 @@ enum {
  QEMU_PPC_FEATURE2_DARN = 0x0020, /* darn random number insn */
  QEMU_PPC_FEATURE2_SCV = 0x0010, /* scv syscall */
  QEMU_PPC_FEATURE2_HTM_NO_SUSPEND = 0x0008, /* TM w/o suspended state 
*/
+QEMU_PPC_FEATURE2_ARCH_3_1 = 0x0004, /* ISA 3.1 */
+QEMU_PPC_FEATURE2_MMA = 0x0002, /* Matrix-Multiply Assist */
  };
  
  #define ELF_HWCAP get_elf_hwcap()

@@ -836,6 +838,8 @@ static uint32_t get_elf_hwcap2(void)
QEMU_PPC_FEATURE2_VEC_CRYPTO);
  GET_FEATURE2(PPC2_ISA300, QEMU_PPC_FEATURE2_ARCH_3_00 |
   QEMU_PPC_FEATURE2_DARN | QEMU_PPC_FEATURE2_HAS_IEEE128);
+GET_FEATURE2(PPC2_ISA310, QEMU_PPC_FEATURE2_ARCH_3_1 |
+ QEMU_PPC_FEATURE2_MMA);
  
  #undef GET_FEATURE

  #undef GET_FEATURE2




[RESEND PATCH] hw/dma: fix crash caused by race condition

2022-04-27 Thread Tong Zhang
assert(dbs->acb) is meant to check the return value of io_func per
documented in commit 6bee44ea34 ("dma: the passed io_func does not
return NULL"). However, there is a chance that after calling
aio_context_release(dbs->ctx); the dma_blk_cb function is called before
the assertion and dbs->acb is set to NULL again at line 121. Thus when
we run assert at line 181 it will fail.

  softmmu/dma-helpers.c:181: dma_blk_cb: Assertion `dbs->acb' failed.

Reported-by: Francisco Londono 
Signed-off-by: Tong Zhang 
---
 softmmu/dma-helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/softmmu/dma-helpers.c b/softmmu/dma-helpers.c
index 7820fec54c..cb81017928 100644
--- a/softmmu/dma-helpers.c
+++ b/softmmu/dma-helpers.c
@@ -177,8 +177,8 @@ static void dma_blk_cb(void *opaque, int ret)
 aio_context_acquire(dbs->ctx);
 dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
 dma_blk_cb, dbs, dbs->io_func_opaque);
-aio_context_release(dbs->ctx);
 assert(dbs->acb);
+aio_context_release(dbs->ctx);
 }
 
 static void dma_aio_cancel(BlockAIOCB *acb)
-- 
2.25.1



Re: [PATCH v4 2/6] 9pfs: fix qemu_mknodat(S_IFSOCK) on macOS

2022-04-27 Thread Greg Kurz
On Wed, 27 Apr 2022 20:54:17 +0200
Christian Schoenebeck  wrote:

> mknod() on macOS does not support creating sockets, so divert to
> call sequence socket(), bind() and fchmodat() respectively if S_IFSOCK
> was passed with mode argument.
> 
> Link: https://lore.kernel.org/qemu-devel/17933734.zYzKuhC07K@silver/
> Signed-off-by: Christian Schoenebeck 
> ---
>  hw/9pfs/9p-util-darwin.c | 45 +++-
>  1 file changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
> index e24d09763a..7d00db47a9 100644
> --- a/hw/9pfs/9p-util-darwin.c
> +++ b/hw/9pfs/9p-util-darwin.c
> @@ -74,6 +74,45 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, 
> const char *name,
>   */
>  #if defined CONFIG_PTHREAD_FCHDIR_NP
>  
> +static int create_socket_file_at_cwd(const char *filename, mode_t mode) {
> +int fd, err;
> +struct sockaddr_un addr = {
> +.sun_family = AF_UNIX
> +};
> +
> +/*
> + * sun_path is only 104 bytes, explicit filename length check required
> + */
> +if (sizeof(addr.sun_path) - 1 < strlen(filename) + 2) {

True but I was a bit puzzled by the math until I realized the '+ 2' was
for the prepended "./" ;-)

> +errno = ENAMETOOLONG;
> +return -1;
> +}
> +fd = socket(PF_UNIX, SOCK_DGRAM, 0);
> +if (fd == -1) {
> +return fd;
> +}
> +snprintf(addr.sun_path, sizeof(addr.sun_path), "./%s", filename);

What about the more generic approach of checking snprintf()'s return
value ? If it is >= sizeof(addr.sun_path) then truncation occured.

> +err = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
> +if (err == -1) {
> +goto out;
> +}
> +/*
> + * FIXME: Should rather be using descriptor-based fchmod() on the
> + * socket file descriptor above (preferably before bind() call),
> + * instead of path-based fchmodat(), to prevent concurrent transient
> + * state issues between creating the named FIFO file at bind() and
> + * delayed adjustment of permissions at fchmodat(). However currently
> + * macOS (12.x) does not support such operations on socket file
> + * descriptors yet.
> + *
> + * Filed report with Apple: FB9997731
> + */
> +err = fchmodat(AT_FDCWD, filename, mode, AT_SYMLINK_NOFOLLOW_ANY);
> +out:
> +close_preserve_errno(fd);

You could close(fd) earlier now, but you might want to keep the code
as is in case FB9997731 gets proper attention.

Anyway, this should do the job so:

Reviewed-by: Greg Kurz 

> +return err;
> +}
> +
>  int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
>  {
>  int preserved_errno, err;
> @@ -93,7 +132,11 @@ int qemu_mknodat(int dirfd, const char *filename, mode_t 
> mode, dev_t dev)
>  if (pthread_fchdir_np(dirfd) < 0) {
>  return -1;
>  }
> -err = mknod(filename, mode, dev);
> +if (S_ISSOCK(mode)) {
> +err = create_socket_file_at_cwd(filename, mode);
> +} else {
> +err = mknod(filename, mode, dev);
> +}
>  preserved_errno = errno;
>  /* Stop using the thread-local cwd */
>  pthread_fchdir_np(-1);




Re: [RFC PATCH 2/7] target/ppc: Implemented xvi*ger* instructions

2022-04-27 Thread Lucas Mateus Martins Araujo e Castro


On 26/04/2022 20:40, Richard Henderson wrote:


On 4/26/22 05:50, Lucas Mateus Castro(alqotel) wrote:

+%xx_at  23:3 !function=times_4
+@XX3_at .. ... .. . .  ... &XX3 
xt=%xx_at xb=%xx_xb


Hmm.  Depends, I suppose on whether you want acc[0-7] or vsr[0-28]
I mostly used VSR function here, but since I'll change the patch 1 to 
your suggestion (which will require creating acc_full_offset) I'll make 
a few changes to create some functions for the accumulator



+/*
+ * Packed VSX Integer GER Flags
+ * 00 - no accumulation no saturation
+ * 01 - accumulate but no saturation
+ * 10 - no accumulation but with saturation
+ * 11 - accumulate with saturation
+ */
+static inline bool get_sat(uint32_t flags)
+{
+    return flags & 0x2;
+}
+
+static inline bool get_acc(uint32_t flags)
+{
+    return flags & 0x1;
+}


Better to have separate helpers for these?  They'd be immediate 
operands to the function

replacing XVIGER (see below) and thus optimize well.
Do you mean different functions or a function that receives packed_flags 
along with the callback functions?


+#define GET_VsrN(a, i) (extract32(a->VsrB((i) / 2), (i) % 2 ? 4 : 0, 
4))

+#define GET_VsrB(a, i) a->VsrB(i)
+#define GET_VsrH(a, i) a->VsrH(i)
+
+#define GET_VsrSN(a, i) (sextract32(a->VsrSB((i) / 2), (i) % 2 ? 4 : 
0, 4))

+#define GET_VsrSB(a, i) a->VsrSB(i)
+#define GET_VsrSH(a, i) a->VsrSH(i)


These can be made into functions of the form

    typedef int32_t xviger_extract(ppc_vsr_t *a, int i);

In this case it'd be necessary to receive 2 xviger_extract functions 
since XVI8GER4* multiply one value as signed and the other as unsigned 
(and other integer GER treat both as signed).


An alternative would be to isolate the innermost loop into a different 
function, like:


    typedef int64_t do_ger(int32_t a, int32_t b, int32_t at, int32_t pmsk);

    static int64_t ger_rank4(int32_t a, int32_t b, int32_t at, int32_t 
mask)

    {
        int64_t psum = 0, i;
        for (i = 0; i < 4; i++, mask >>= 1) {
        if (mask & 1) {
            psum += (sextract32(a, i * 8, 8)) * (extract32(b, i * 
8, 8));

       }
        }
        return psum;
    }

That way we could avoid having 'rank' as a parameter, what do you think?





diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index 8094e0b033..a994d98238 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -291,4 +291,32 @@ G_NORETURN void 
ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,

  uintptr_t retaddr);
  #endif

+/*
+ * Auxiliary functions to pack/unpack masks for GER instructions.
+ *
+ * Packed format:
+ *  Bits 0-3: xmsk
+ *  Bits 4-7: ymsk
+ *  Bits 8-15: pmsk
+ */
+static inline uint8_t ger_get_xmsk(uint32_t packed_masks)
+{
+    return packed_masks & 0xF;
+}
+
+static inline uint8_t ger_get_ymsk(uint32_t packed_masks)
+{
+    return (packed_masks >> 4) & 0xF;
+}
+
+static inline uint8_t ger_get_pmsk(uint32_t packed_masks)
+{
+    return (packed_masks >> 8) & 0xFF;
+}
+
+static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk)
+{
+    return (pmsk & 0xFF) << 8 | (ymsk & 0xF) << 4 | (xmsk & 0xF);
+}


Use hw/registerfields.h.  C.f. PREDDESC in target/arm/internals.h.

Ok, will do



+static bool do_ger_XX3(DisasContext *ctx, arg_XX3 *a, uint32_t op,
+ void (*helper)(TCGv_env, TCGv_i32, 
TCGv_i32,
+    TCGv_i32, TCGv_i32, 
TCGv_i32))

+{
+    uint32_t mask;
+    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
+    REQUIRE_VSX(ctx);
+    if (unlikely((a->xa / 4 == a->xt / 4) || (a->xb / 4 == a->xt / 
4))) {

+    gen_invalid(ctx);
+    return true;
+    }
+
+    mask = 0x;
+    helper(cpu_env, tcg_constant_i32(a->xa), tcg_constant_i32(a->xb),
+   tcg_constant_i32(a->xt), tcg_constant_i32(mask),
+   tcg_constant_i32(op));
+    return true;
+}


Why are you passing register numbers instead of pointers, like 
everywhere else?
Because here we are not working only with 1 register per register 
number, the ACC uses 4 and the XVF64GER* needs to use XA and XA+1, and 
while VSR is an array so I could do ppc_vsr_ptr+1 I thought it was 
better not to access memory I was not given a pointer to, so I passed XA 
so I can request cpu_vsr_ptr(env, xa) and cpu_vsr_ptr(env, xa + 1)



r~

--
Lucas Mateus M. Araujo e Castro
Instituto de Pesquisas ELDORADO 


Departamento Computação Embarcada
Analista de Software Trainee
Aviso Legal - Disclaimer 

Re: [PATCH v4 1/6] 9pfs: fix qemu_mknodat(S_IFREG) on macOS

2022-04-27 Thread Greg Kurz
On Wed, 27 Apr 2022 20:54:04 +0200
Christian Schoenebeck  wrote:

> mknod() on macOS does not support creating regular files, so
> divert to openat_file() if S_IFREG is passed with mode argument.
> 
> Furthermore, 'man 2 mknodat' on Linux says: "Zero file type is
> equivalent to type S_IFREG".
> 

Thinking again I have mixed feelings about this... qemu_mknodat()
should certainly match POSIX semantics, even non-portable, as
described in [1] but I'm not sure it should mimic linux-specific
behaviors.

[1] https://pubs.opengroup.org/onlinepubs/9699919799/functions/mknod.html

> Link: https://lore.kernel.org/qemu-devel/17933734.zYzKuhC07K@silver/
> Signed-off-by: Christian Schoenebeck 
> Reviewed-by: Will Cohen 
> Reviewed-by: Greg Kurz 
> ---
>  hw/9pfs/9p-util-darwin.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
> index bec0253474..e24d09763a 100644
> --- a/hw/9pfs/9p-util-darwin.c
> +++ b/hw/9pfs/9p-util-darwin.c
> @@ -77,6 +77,15 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, 
> const char *name,
>  int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
>  {
>  int preserved_errno, err;
> +
> +if (S_ISREG(mode) || !(mode & S_IFMT)) {

... so maybe I'd just check S_ISREG() here. Not a request, just food
for thought : sticking to POSIX semantics might help to make the code
more portable across all the new host supports that are showing up
these days.

> +int fd = openat_file(dirfd, filename, O_CREAT, mode);
> +if (fd == -1) {
> +return fd;
> +}
> +close(fd);
> +return 0;
> +}
>  if (!pthread_fchdir_np) {
>  error_report_once("pthread_fchdir_np() not available on this version 
> of macOS");
>  return -ENOTSUP;




Re: [PATCH 2/6] virtio-scsi: don't waste CPU polling the event virtqueue

2022-04-27 Thread Nir Soffer
On Wed, Apr 27, 2022 at 5:35 PM Stefan Hajnoczi  wrote:
>
> The virtio-scsi event virtqueue is not emptied by its handler function.
> This is typical for rx virtqueues where the device uses buffers when
> some event occurs (e.g. a packet is received, an error condition
> happens, etc).
>
> Polling non-empty virtqueues wastes CPU cycles. We are not waiting for
> new buffers to become available, we are waiting for an event to occur,
> so it's a misuse of CPU resources to poll for buffers.
>
> Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API,
> which is identical to virtio_queue_aio_attach_host_notifier() except
> that it does not poll the virtqueue.
>
> Before this patch the following command-line consumed 100% CPU in the
> IOThread polling and calling virtio_scsi_handle_event():
>
>   $ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \
>   --object iothread,id=iothread0 \
>   --device virtio-scsi-pci,iothread=iothread0 \
>   --blockdev 
> file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \
>   --device scsi-hd,drive=drive0
>
> After this patch CPU is no longer wasted.
>
> Reported-by: Nir Soffer 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  include/hw/virtio/virtio.h  |  1 +
>  hw/scsi/virtio-scsi-dataplane.c |  2 +-
>  hw/virtio/virtio.c  | 13 +
>  3 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index b31c4507f5..b62a35fdca 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -317,6 +317,7 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue 
> *vq);
>  void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled);
>  void virtio_queue_host_notifier_read(EventNotifier *n);
>  void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx);
> +void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext 
> *ctx);
>  void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx);
>  VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
>  VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
> diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
> index 29575cbaf6..8bb6e6acfc 100644
> --- a/hw/scsi/virtio-scsi-dataplane.c
> +++ b/hw/scsi/virtio-scsi-dataplane.c
> @@ -138,7 +138,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
>
>  aio_context_acquire(s->ctx);
>  virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
> -virtio_queue_aio_attach_host_notifier(vs->event_vq, s->ctx);
> +virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
>
>  for (i = 0; i < vs->conf.num_queues; i++) {
>  virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 9d637e043e..67a873f54a 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -3534,6 +3534,19 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue 
> *vq, AioContext *ctx)
>  virtio_queue_host_notifier_aio_poll_end);
>  }
>
> +/*
> + * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use
> + * this for rx virtqueues and similar cases where the virtqueue handler
> + * function does not pop all elements. When the virtqueue is left non-empty
> + * polling consumes CPU cycles and should not be used.
> + */
> +void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext 
> *ctx)
> +{
> +aio_set_event_notifier(ctx, &vq->host_notifier, true,
> +   virtio_queue_host_notifier_read,
> +   NULL, NULL);
> +}
> +
>  void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx)
>  {
>  aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL);
> --
> 2.35.1
>

I tested patches 1 and 2 on top of 34723f59371f3fd02ea59b94674314b875504426
and it solved the issue.

Tested-by: Nir Soffer 

Nir




Re: [PATCH 1/6] virtio-scsi: fix ctrl and event handler functions in dataplane mode

2022-04-27 Thread Michael Tokarev

27.04.2022 17:35, Stefan Hajnoczi wrote:

Commit f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare
virtio_scsi_handle_cmd for dataplane") prepared the virtio-scsi cmd
virtqueue handler function to by used in both the dataplane and


Nitpick: "to BE used".

/mjt



Re: [PATCH] target/riscv: add scalar crypto related extenstion strings to isa_string

2022-04-27 Thread Jiatai He
This patch could work successfully in qemu, "zk" could be found in linux device 
tree.

Tested-by: Jiatai He 




Re: [RFC 0/2] introduce QEMUMachind.cmd()

2022-04-27 Thread John Snow
On Fri, Apr 8, 2022 at 1:02 PM Vladimir Sementsov-Ogievskiy
 wrote:
>
> Hi all!
>
> I always dreamed about getting rid of pattern
>
> result = self.vm.qmp(...)
> self.assert_qmp(result, 'return', {})
>
> Here is a suggestion to switch to
>
> self.vm.cmd(...)
>
> pattern instead.

Yeah, I am absolutely on board for this!

>
> I'm not sure we really want to update so many tests. May be just commit
> patch 01, and use new interface for new code. On the other hand, old
> code always used as an example to write the new one.

I think it's worth updating all the old tests ... especially if you've
already done it here. We could even do something like what I did with
qemu_img() and qemu_io() and have the uncaught exception print a bunch
of information to the screen to help make it extremely obvious as to
what failed and why.

If you can rebase this, I'd love to review it more carefully - it
aligns with my own selfish goals and interests :) The Python branch
was merged recently and so we should be all set.

>
> The series is based on John's python branch.
>
> Vladimir Sementsov-Ogievskiy (2):
>   python/machine.py: upgrade vm.command() method
>   iotests: use vm.cmd() instead of vm.qmp() where appropriate
>
>  python/qemu/machine/machine.py|  16 +-
>  tests/qemu-iotests/030| 168 +++
>  tests/qemu-iotests/040| 167 +++---
>  tests/qemu-iotests/041| 474 --
>  tests/qemu-iotests/045|  15 +-
>  tests/qemu-iotests/055|  61 +--
>  tests/qemu-iotests/056|  23 +-
>  tests/qemu-iotests/093|  41 +-
>  tests/qemu-iotests/118| 221 
>  tests/qemu-iotests/124|  69 ++-
>  tests/qemu-iotests/129|  13 +-
>  tests/qemu-iotests/132|   5 +-
>  tests/qemu-iotests/139|  43 +-
>  tests/qemu-iotests/147|  30 +-
>  tests/qemu-iotests/151|  40 +-
>  tests/qemu-iotests/155|  53 +-
>  tests/qemu-iotests/165|   7 +-
>  tests/qemu-iotests/196|   3 +-
>  tests/qemu-iotests/205|   6 +-
>  tests/qemu-iotests/245| 245 -
>  tests/qemu-iotests/256|  34 +-
>  tests/qemu-iotests/257|  36 +-
>  tests/qemu-iotests/264|  31 +-
>  tests/qemu-iotests/281|  21 +-
>  tests/qemu-iotests/295|  27 +-
>  tests/qemu-iotests/296|  14 +-
>  tests/qemu-iotests/298|  13 +-
>  tests/qemu-iotests/300|  50 +-
>  tests/qemu-iotests/iotests.py |   6 +-
>  .../tests/migrate-bitmaps-postcopy-test   |  31 +-
>  tests/qemu-iotests/tests/migrate-bitmaps-test |  37 +-
>  .../qemu-iotests/tests/migrate-during-backup  |  40 +-
>  .../qemu-iotests/tests/migration-permissions  |   9 +-
>  tests/qemu-iotests/tests/mirror-top-perms |  15 +-
>  34 files changed, 821 insertions(+), 1243 deletions(-)

Is there anything missing, to your knowledge?

--js




[PATCH v4 6/6] 9pfs: fix qemu_mknodat() to always return -1 on error on macOS host

2022-04-27 Thread Christian Schoenebeck
qemu_mknodat() is expected to behave according to its POSIX API, and
therefore should always return exactly -1 on any error, and errno
should be set for the actual error code.

Signed-off-by: Christian Schoenebeck 
Reviewed-by: Greg Kurz 
---
 hw/9pfs/9p-util-darwin.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
index 7d00db47a9..649a3ec61c 100644
--- a/hw/9pfs/9p-util-darwin.c
+++ b/hw/9pfs/9p-util-darwin.c
@@ -127,7 +127,8 @@ int qemu_mknodat(int dirfd, const char *filename, mode_t 
mode, dev_t dev)
 }
 if (!pthread_fchdir_np) {
 error_report_once("pthread_fchdir_np() not available on this version 
of macOS");
-return -ENOTSUP;
+errno = ENOTSUP;
+return -1;
 }
 if (pthread_fchdir_np(dirfd) < 0) {
 return -1;
-- 
2.32.0 (Apple Git-132)




[PATCH v4 4/6] 9pfs: fix wrong errno being sent to Linux client on macOS host

2022-04-27 Thread Christian Schoenebeck
Linux and macOS only share some errno definitions with equal macro
name and value. In fact most mappings for errno are completely
different on the two systems.

This patch converts some important errno values from macOS host to
corresponding Linux errno values before eventually sending such error
codes along with 'Rlerror' replies (if 9p2000.L is used that is). Not
having translated errnos before violated the 9p2000.L protocol spec,
which says:

  "
  size[4] Rlerror tag[2] ecode[4]

  ... ecode is a numerical Linux errno.
  "

  https://github.com/chaos/diod/wiki/protocol#lerrorreturn-error-code

This patch fixes a bunch of misbehaviours when running a Linux client
on macOS host. For instance this patch fixes:

  mount -t 9p -o posixacl ...

on Linux guest if security_mode=mapped was used for 9p server, which
refused to mount successfully, because macOS returned ENOATTR==93
when client tried to retrieve POSIX ACL xattrs, because errno 93
is defined as EPROTONOSUPPORT==93 on Linux, so Linux client believed
that xattrs were not supported by filesystem on host in general.

Signed-off-by: Christian Schoenebeck 
Link: https://lore.kernel.org/qemu-devel/20220421124835.3e664669@bahia/
Reviewed-by: Greg Kurz 
---
 hw/9pfs/9p-util.h | 30 ++
 hw/9pfs/9p.c  |  2 ++
 2 files changed, 32 insertions(+)

diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index 2cc9a5dbfb..c3526144c9 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -58,6 +58,36 @@ static inline uint64_t host_dev_to_dotl_dev(dev_t dev)
 #endif
 }
 
+/* Translates errno from host -> Linux if needed */
+static inline int errno_to_dotl(int err) {
+#if defined(CONFIG_LINUX)
+/* nothing to translate (Linux -> Linux) */
+#elif defined(CONFIG_DARWIN)
+/*
+ * translation mandatory for macOS hosts
+ *
+ * FIXME: Only most important errnos translated here yet, this should be
+ * extended to as many errnos being translated as possible in future.
+ */
+if (err == ENAMETOOLONG) {
+err = 36; /* ==ENAMETOOLONG on Linux */
+} else if (err == ENOTEMPTY) {
+err = 39; /* ==ENOTEMPTY on Linux */
+} else if (err == ELOOP) {
+err = 40; /* ==ELOOP on Linux */
+} else if (err == ENOATTR) {
+err = 61; /* ==ENODATA on Linux */
+} else if (err == ENOTSUP) {
+err = 95; /* ==EOPNOTSUPP on Linux */
+} else if (err == EOPNOTSUPP) {
+err = 95; /* ==EOPNOTSUPP on Linux */
+}
+#else
+#error Missing errno translation to Linux for this host system
+#endif
+return err;
+}
+
 #ifdef CONFIG_DARWIN
 #define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0)
 #define qemu_lgetxattr(...) getxattr(__VA_ARGS__, 0, XATTR_NOFOLLOW)
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 4a296a0b94..0cd0c14c2a 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1054,6 +1054,8 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, 
ssize_t len)
 }
 len += ret;
 id = P9_RERROR;
+} else {
+err = errno_to_dotl(err);
 }
 
 ret = pdu_marshal(pdu, len, "d", err);
-- 
2.32.0 (Apple Git-132)




[PATCH v4 1/6] 9pfs: fix qemu_mknodat(S_IFREG) on macOS

2022-04-27 Thread Christian Schoenebeck
mknod() on macOS does not support creating regular files, so
divert to openat_file() if S_IFREG is passed with mode argument.

Furthermore, 'man 2 mknodat' on Linux says: "Zero file type is
equivalent to type S_IFREG".

Link: https://lore.kernel.org/qemu-devel/17933734.zYzKuhC07K@silver/
Signed-off-by: Christian Schoenebeck 
Reviewed-by: Will Cohen 
Reviewed-by: Greg Kurz 
---
 hw/9pfs/9p-util-darwin.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
index bec0253474..e24d09763a 100644
--- a/hw/9pfs/9p-util-darwin.c
+++ b/hw/9pfs/9p-util-darwin.c
@@ -77,6 +77,15 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, 
const char *name,
 int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
 {
 int preserved_errno, err;
+
+if (S_ISREG(mode) || !(mode & S_IFMT)) {
+int fd = openat_file(dirfd, filename, O_CREAT, mode);
+if (fd == -1) {
+return fd;
+}
+close(fd);
+return 0;
+}
 if (!pthread_fchdir_np) {
 error_report_once("pthread_fchdir_np() not available on this version 
of macOS");
 return -ENOTSUP;
-- 
2.32.0 (Apple Git-132)




[PATCH v4 5/6] 9pfs: fix removing non-existent POSIX ACL xattr on macOS host

2022-04-27 Thread Christian Schoenebeck
When mapped POSIX ACL is used, we are ignoring errors when trying
to remove a POSIX ACL xattr that does not exist. On Linux hosts we
would get ENODATA in such cases, on macOS hosts however we get
ENOATTR instead.

As we can be sure that ENOATTR is defined as being identical on Linux
hosts (at least by qemu/xattr.h), it is safe to fix this issue by
simply comparing against ENOATTR instead of ENODATA.

This patch fixes e.g. a command on Linux guest like:

  cp --preserve=mode old new

Signed-off-by: Christian Schoenebeck 
Link: https://lore.kernel.org/qemu-devel/2866993.yOYK24bMf6@silver/
Reviewed-by: Greg Kurz 
---
 hw/9pfs/9p-posix-acl.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/9p-posix-acl.c b/hw/9pfs/9p-posix-acl.c
index eadae270dd..4b2cb3c66c 100644
--- a/hw/9pfs/9p-posix-acl.c
+++ b/hw/9pfs/9p-posix-acl.c
@@ -65,7 +65,11 @@ static int mp_pacl_removexattr(FsContext *ctx,
 int ret;
 
 ret = local_removexattr_nofollow(ctx, path, MAP_ACL_ACCESS);
-if (ret == -1 && errno == ENODATA) {
+/*
+ * macOS returns ENOATTR (!=ENODATA on macOS), whereas Linux returns
+ * ENODATA (==ENOATTR on Linux), so checking for ENOATTR is fine
+ */
+if (ret == -1 && errno == ENOATTR) {
 /*
  * We don't get ENODATA error when trying to remove a
  * posix acl that is not present. So don't throw the error
@@ -115,7 +119,11 @@ static int mp_dacl_removexattr(FsContext *ctx,
 int ret;
 
 ret = local_removexattr_nofollow(ctx, path, MAP_ACL_DEFAULT);
-if (ret == -1 && errno == ENODATA) {
+/*
+ * macOS returns ENOATTR (!=ENODATA on macOS), whereas Linux returns
+ * ENODATA (==ENOATTR on Linux), so checking for ENOATTR is fine
+ */
+if (ret == -1 && errno == ENOATTR) {
 /*
  * We don't get ENODATA error when trying to remove a
  * posix acl that is not present. So don't throw the error
-- 
2.32.0 (Apple Git-132)




[PATCH v4 3/6] 9pfs: fix wrong encoding of rdev field in Rgetattr on macOS

2022-04-27 Thread Christian Schoenebeck
The 'rdev' field in 9p reponse 'Rgetattr' is of type dev_t,
which is actually a system dependant type and therefore both the
size and encoding of dev_t differ between macOS and Linux.

So far we have sent 'rdev' to guest in host's dev_t format as-is,
which caused devices to appear with wrong device numbers on
guests running on macOS hosts, eventually leading to various
misbehaviours on guest in conjunction with device files.

This patch fixes this issue by converting the device number from
host's dev_t format to Linux dev_t format. As 9p request
'Tgettattr' is exclusive to protocol version 9p2000.L, it should
be fair to assume that 'rdev' field is assumed to be in Linux dev_t
format by client as well.

Signed-off-by: Christian Schoenebeck 
Link: https://lore.kernel.org/qemu-devel/20220421093056.5ab1e7ed@bahia/
Reviewed-by: Greg Kurz 
---
 hw/9pfs/9p-util.h | 39 +++
 hw/9pfs/9p.c  |  2 +-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index 97e681e167..2cc9a5dbfb 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -19,6 +19,45 @@
 #define O_PATH_9P_UTIL 0
 #endif
 
+#if !defined(CONFIG_LINUX)
+
+/*
+ * Generates a Linux device number (a.k.a. dev_t) for given device major
+ * and minor numbers.
+ *
+ * To be more precise: it generates a device number in glibc's format
+ * (_Mmmm_mmmM_MMmm, 64 bits) actually, which is compatible with
+ * Linux's format (mmmM_MMmm, 32 bits), as described in .
+ */
+static inline uint64_t makedev_dotl(uint32_t dev_major, uint32_t dev_minor)
+{
+uint64_t dev;
+
+// from glibc sysmacros.h:
+dev  = (((uint64_t) (dev_major & 0x0fffu)) <<  8);
+dev |= (((uint64_t) (dev_major & 0xf000u)) << 32);
+dev |= (((uint64_t) (dev_minor & 0x00ffu)) <<  0);
+dev |= (((uint64_t) (dev_minor & 0xff00u)) << 12);
+return dev;
+}
+
+#endif
+
+/*
+ * Converts given device number from host's device number format to Linux
+ * device number format. As both the size of type dev_t and encoding of
+ * dev_t is system dependant, we have to convert them for Linux guests if
+ * host is not running Linux.
+ */
+static inline uint64_t host_dev_to_dotl_dev(dev_t dev)
+{
+#ifdef CONFIG_LINUX
+return dev;
+#else
+return makedev_dotl(major(dev), minor(dev));
+#endif
+}
+
 #ifdef CONFIG_DARWIN
 #define qemu_fgetxattr(...) fgetxattr(__VA_ARGS__, 0, 0)
 #define qemu_lgetxattr(...) getxattr(__VA_ARGS__, 0, XATTR_NOFOLLOW)
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 225f31fc31..4a296a0b94 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1327,7 +1327,7 @@ static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct 
stat *stbuf,
 v9lstat->st_nlink = stbuf->st_nlink;
 v9lstat->st_uid = stbuf->st_uid;
 v9lstat->st_gid = stbuf->st_gid;
-v9lstat->st_rdev = stbuf->st_rdev;
+v9lstat->st_rdev = host_dev_to_dotl_dev(stbuf->st_rdev);
 v9lstat->st_size = stbuf->st_size;
 v9lstat->st_blksize = stat_to_iounit(pdu, stbuf);
 v9lstat->st_blocks = stbuf->st_blocks;
-- 
2.32.0 (Apple Git-132)




[PATCH v4 2/6] 9pfs: fix qemu_mknodat(S_IFSOCK) on macOS

2022-04-27 Thread Christian Schoenebeck
mknod() on macOS does not support creating sockets, so divert to
call sequence socket(), bind() and fchmodat() respectively if S_IFSOCK
was passed with mode argument.

Link: https://lore.kernel.org/qemu-devel/17933734.zYzKuhC07K@silver/
Signed-off-by: Christian Schoenebeck 
---
 hw/9pfs/9p-util-darwin.c | 45 +++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/hw/9pfs/9p-util-darwin.c b/hw/9pfs/9p-util-darwin.c
index e24d09763a..7d00db47a9 100644
--- a/hw/9pfs/9p-util-darwin.c
+++ b/hw/9pfs/9p-util-darwin.c
@@ -74,6 +74,45 @@ int fsetxattrat_nofollow(int dirfd, const char *filename, 
const char *name,
  */
 #if defined CONFIG_PTHREAD_FCHDIR_NP
 
+static int create_socket_file_at_cwd(const char *filename, mode_t mode) {
+int fd, err;
+struct sockaddr_un addr = {
+.sun_family = AF_UNIX
+};
+
+/*
+ * sun_path is only 104 bytes, explicit filename length check required
+ */
+if (sizeof(addr.sun_path) - 1 < strlen(filename) + 2) {
+errno = ENAMETOOLONG;
+return -1;
+}
+fd = socket(PF_UNIX, SOCK_DGRAM, 0);
+if (fd == -1) {
+return fd;
+}
+snprintf(addr.sun_path, sizeof(addr.sun_path), "./%s", filename);
+err = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
+if (err == -1) {
+goto out;
+}
+/*
+ * FIXME: Should rather be using descriptor-based fchmod() on the
+ * socket file descriptor above (preferably before bind() call),
+ * instead of path-based fchmodat(), to prevent concurrent transient
+ * state issues between creating the named FIFO file at bind() and
+ * delayed adjustment of permissions at fchmodat(). However currently
+ * macOS (12.x) does not support such operations on socket file
+ * descriptors yet.
+ *
+ * Filed report with Apple: FB9997731
+ */
+err = fchmodat(AT_FDCWD, filename, mode, AT_SYMLINK_NOFOLLOW_ANY);
+out:
+close_preserve_errno(fd);
+return err;
+}
+
 int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
 {
 int preserved_errno, err;
@@ -93,7 +132,11 @@ int qemu_mknodat(int dirfd, const char *filename, mode_t 
mode, dev_t dev)
 if (pthread_fchdir_np(dirfd) < 0) {
 return -1;
 }
-err = mknod(filename, mode, dev);
+if (S_ISSOCK(mode)) {
+err = create_socket_file_at_cwd(filename, mode);
+} else {
+err = mknod(filename, mode, dev);
+}
 preserved_errno = errno;
 /* Stop using the thread-local cwd */
 pthread_fchdir_np(-1);
-- 
2.32.0 (Apple Git-132)




[PATCH v4 0/6] 9pfs: macOS host fixes

2022-04-27 Thread Christian Schoenebeck
A bunch of fixes for recently (in QEMU 7.0) added 9p support on macOS hosts.

Note: there are still issues to address with case-insensitive file systems
on macOS hosts. I sent a separate RFC on that icase issue:
https://lore.kernel.org/qemu-devel/1757498.AyhHxzoH2B@silver/

v3 -> v4:

  * Use fchmodat(AT_SYMLINK_NOFOLLOW_ANY) instead of chmod().
[patch 2]

Christian Schoenebeck (6):
  9pfs: fix qemu_mknodat(S_IFREG) on macOS
  9pfs: fix qemu_mknodat(S_IFSOCK) on macOS
  9pfs: fix wrong encoding of rdev field in Rgetattr on macOS
  9pfs: fix wrong errno being sent to Linux client on macOS host
  9pfs: fix removing non-existent POSIX ACL xattr on macOS host
  9pfs: fix qemu_mknodat() to always return -1 on error on macOS host

 hw/9pfs/9p-posix-acl.c   | 12 +--
 hw/9pfs/9p-util-darwin.c | 57 +++--
 hw/9pfs/9p-util.h| 69 
 hw/9pfs/9p.c |  4 ++-
 4 files changed, 137 insertions(+), 5 deletions(-)

-- 
2.32.0 (Apple Git-132)




Re: [PULL 0/9] Kraxel 20220427 patches

2022-04-27 Thread Richard Henderson

On 4/27/22 10:29, Gerd Hoffmann wrote:

The following changes since commit a74782936dc6e979ce371dabda4b1c05624ea87f:

   Merge tag 'pull-migration-20220421a' of https://gitlab.com/dagrh/qemu into 
staging (2022-04-21 18:48:18 -0700)

are available in the Git repository at:

   git://git.kraxel.org/qemu tags/kraxel-20220427-pull-request

for you to fetch changes up to a8152c4e4613c70c2f0573a82babbc8acc00cf90:

   i386: firmware parsing and sev setup for -bios loaded firmware (2022-04-27 
07:51:01 +0200)


vnc: add display-update monitor command.
screendump: add png support.
vmsvga: screen update fix.
i386: sev setup for -bios loaded firmware


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as 
appropriate.


r~






Carwyn Ellis (1):
   hw/display/vmware_vga: do not discard screen updates

Gerd Hoffmann (3):
   i386: move bios load error message
   i386: factor out x86_firmware_configure()
   i386: firmware parsing and sev setup for -bios loaded firmware

Kshitij Suri (2):
   Replacing CONFIG_VNC_PNG with CONFIG_PNG
   Added parameter to take screenshot with screendump as PNG

Vladimir Sementsov-Ogievskiy (3):
   ui/vnc: refactor arrays of addresses to SocketAddressList
   qapi/ui: add 'display-update' command for changing listen address
   avocado/vnc: add test_change_listen

  meson_options.txt |   4 +-
  tests/avocado/vnc.py  |  63 +++
  include/hw/i386/x86.h |   3 +
  include/ui/console.h  |   1 +
  ui/vnc.h  |   2 +-
  hw/display/vmware_vga.c   |  45 ++---
  hw/i386/pc_sysfw.c|  36 ++--
  hw/i386/x86.c |  32 +++-
  monitor/hmp-cmds.c|  12 +-
  monitor/qmp-cmds.c|  15 ++
  ui/console.c  | 101 +++-
  ui/vnc-enc-tight.c|  18 +-
  ui/vnc.c  | 156 +-
  docs/about/removed-features.rst   |   3 +-
  hmp-commands.hx   |  11 +-
  hw/display/trace-events   |   1 +
  meson.build   |  12 +-
  qapi/ui.json  |  89 +-
  .../ci/org.centos/stream/8/x86_64/configure   |   4 +-
  scripts/coverity-scan/run-coverity-scan   |   2 +-
  scripts/meson-buildoptions.sh |   6 +-
  21 files changed, 458 insertions(+), 158 deletions(-)






Re: [Qemu-devel] [PATCH 2/7] target/openrisc: add shutdown logic

2022-04-27 Thread Peter Maydell
On Wed, 27 Apr 2022 at 18:46, Jason A. Donenfeld  wrote:
>
> Hey Stafford,
>
> On Mon, Apr 17, 2017 at 08:23:51AM +0900, Stafford Horne wrote:
> > In openrisc simulators we use hooks like 'l.nop 1' to cause the
> > simulator to exit.  Implement that for qemu too.
> >
> > Reported-by: Waldemar Brodkorb 
> > Signed-off-by: Stafford Horne 
>
> I'm curious as to why this never got merged. I noticed I'm entirely able
> to shutdown or to reboot (which is mostly what I care about) Linux from
> OpenRISC. It just hangs.

This kind of thing needs to be either:
 (1) we're modelling real hardware and that real hardware has a
device or other mechanism guest code can prod to cause a power-off
or reboot. Then we model that device, and guest code triggers a
shutdown or reboot exactly as it would on the real hardware.
 (2) there is an architecturally defined ABI for simulators, debug
stubs, etc, that includes various operations typically including
an "exit the simulator" function. (Arm semihosting is an example
of this.) In that case we can implement that functionality,
guarded by and controlled by the appropriate command line options.
(This is generally not as nice as option 1, because the guest code
has to be compiled to have support for semihosting and also because
turning it on is usually also giving implicit permission for the
guest code to read and write arbitrary host files, etc.)

Either way, undocumented random hacks aren't a good idea, which
is why this wasn't merged.

thanks
-- PMM



Re: [PATCH v2 2/5] 9pfs: fix qemu_mknodat(S_IFSOCK) on macOS

2022-04-27 Thread Christian Schoenebeck
On Mittwoch, 27. April 2022 19:37:39 CEST Greg Kurz wrote:
> On Wed, 27 Apr 2022 18:18:31 +0200
> 
> Christian Schoenebeck  wrote:
> > On Mittwoch, 27. April 2022 15:31:42 CEST Greg Kurz wrote:
> > > On Wed, 27 Apr 2022 14:32:53 +0200
> > > 
> > > Christian Schoenebeck  wrote:
> > > > On Mittwoch, 27. April 2022 12:18:10 CEST Greg Kurz wrote:
> > > > > On Wed, 27 Apr 2022 11:27:28 +0900
> > > > > 
> > > > > Akihiko Odaki  wrote:
> > > > > > On 2022/04/26 21:38, Greg Kurz wrote:
> > [...]
> > 
> > > > > > Considering the transient states are tolerated in 9pfs, we need to
> > > > > > design this function to be tolerant with transient states as well.
> > > > > > The
> > > > > > use of chmod() is not safe when we consider about transient
> > > > > > states. A
> > > > > > malicious actor may replace the file at the path with a symlink
> > > > > > which
> > > > > > may escape the shared directory and chmod() will naively follow
> > > > > > it.
> > > > > 
> > > > > You get a point here. Thanks for your tenacity ! :-)
> > > > 
> > > > Yep, I send a v4 with fchmodat_nofollow() instead of chmod(), thanks!
> > > > 
> > > > BTW, why is it actually allowed for client to create a symlink
> > > > pointing
> > > > outside exported directory tree with security_model=passthrough/none?
> > > > Did
> > > > anybody want that?
> > > 
> > > The target argument to symlink() is merely a string that is stored in
> > > the inode. It is only evaluated as a path at the time an action is
> > > made on the link. Checking at symlink() time is thus useless.
> > > 
> > > Anyway, we're safe on this side since it's the client's job to
> > > resolve links and we explicitly don't follow them in the server.
> > 
> > I wouldn't call it useless, because it is easier to keep exactly 1 hole
> > stuffed instead of being forced to continuously stuff N holes as new ones
> > may popup up over time, as this case shows (mea culpa).
> > 
> > So you are trading (probably minor) performance for security.
> > 
> > But my question was actually meant seriously: whether there was anybody in
> > the past who probably actually wanted to create relative symlinks outside
> > the exported tree. For instance for another service with wider host
> > filesystem access.
> 
> I took the question seriously :-), the problem is that even if you
> do a check on the target at symlink() time, you don't know how it
> will be evaluated in the end.
> 
> Quick demonstration:
> 
> $ cd /var/tmp/
> $ mkdir foo
> $ cd foo/
> $ # Suppose foo is the jail
> $ mkdir bar
> $ ln -sf .. bar/link
> $ realpath bar/link
> /var/tmp/foo
> $ # Good, we're still under foo
> $ mv bar/link .
> $ realpath link
> /var/tmp
> $ # Ouch we've escaped
> 
> So in the end, the only real fix is to ban path based syscalls and
> pass AT_SYMLINK_NOFOLLOW everywhere. This was the justification for
> rewriting nearly all 9p local in order to fix CVE-2016-9602.
> 
> https://lists.gnu.org/archive/html/qemu-devel/2017-01/msg06225.html

Touché :) Agreed, it's not worth it.

I mean this simple example could still be addressed by catching the move, but 
if you have like several nested directories, each with a huge number of 
chained symlinks, on top of it non-atomic issues etc., then things would get 
way expensive to check, as you would actually have to traverse an entire tree 
and validate an even bigger amount of symlink pathes for every single symlink 
modification attempt on guest, probably even with exclusive locks, and so on.

> > [...]
> > 
> > > > > This brings up a new problem I hadn't realized before : the
> > > > > fchmodat_nofollow() implementation in 9p-local.c is really
> > > > > a linux only thing to cope with AT_SYMLINK_NOFOLLOW not being
> > > > > supported with fchmodat(). It looks that this should move to
> > > > > 9p-util-linux.c and a proper version should be added for macOS
> > > > > in 9p-util-darwin.c
> > > > 
> > > > Like already agreed on the other thread, yes, that makes sense. But I
> > > > think
> > > > this can be handled with a follow-up, separate from this series.
> > > 
> > > Fair enough if you want to handle fchmodat_nofollow() later but you
> > > must at least use fchmodat(AT_SYMLINK_NOFOLLOW) in this patch
> > > instead of chmod().
> > 
> > Sounds like a quick and easy workaround. However looking at 'man fchmodat'
> > on macOS, this probably does not exactly do what you wanted it to, at
> > least not> 
> > in this particular case:
> >  AT_SYMLINK_NOFOLLOW
> >  
> >  If path names a symbolic link, then the mode of the symbolic
> >  link is changed.>  
> >  AT_SYMLINK_NOFOLLOW_ANY
> >  
> >  If path names a symbolic link, then the mode of the symbolic
> >  link is changed and if if the path has any other symbolic
> >  links, an error is returned.> 
> > So if somebody replaced the socket file by a 1st order symlink, you would
> > adjust the symlink's permissions with both AT_SYMLINK_NOFOLLOW as well as
> > wi

[PATCH] hw/arm: add versioning to sbsa-ref machine DT

2022-04-27 Thread Leif Lindholm
The sbsa-ref machine is continuously evolving. Some of the changes we
want to make in the near future, to align with real components (e.g.
the GIC-700), will break compatibility for existing firmware.

Introduce two new properties to the DT generated on machine generation:
- machine-version-major
  To be incremented when a platform change makes the machine
  incompatible with existing firmware.
- machine-version-minor
  To be incremented when functionality is added to the machine
  without causing incompatibility with existing firmware.
  to be reset to 0 when machine-version-major is incremented.

These properties are both introduced with the value 0.
(Hence, a machine where the DT is lacking these nodes is equivalent
to version 0.0.)

Signed-off-by: Leif Lindholm 
Cc: Peter Maydell 
Cc: Radoslaw Biernacki 
Cc: Cédric Le Goater 
---
 hw/arm/sbsa-ref.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index 2387401963..e05f6056c7 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -190,6 +190,9 @@ static void create_fdt(SBSAMachineState *sms)
 qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 0x2);
 qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 0x2);
 
+qemu_fdt_setprop_cell(fdt, "/", "machine-version-major", 0);
+qemu_fdt_setprop_cell(fdt, "/", "machine-version-minor", 0);
+
 if (ms->numa_state->have_numa_distance) {
 int size = nb_numa_nodes * nb_numa_nodes * 3 * sizeof(uint32_t);
 uint32_t *matrix = g_malloc0(size);
-- 
2.30.2




[PATCH] MAINTAINERS/.mailmap: update email for Leif Lindholm

2022-04-27 Thread Leif Lindholm
NUVIA was acquired by Qualcomm in March 2021, but kept functioning on
separate infrastructure for a transitional period. We've now switched
over to contributing as Qualcomm Innocation Center (quicinc), so update
my email address to reflect this.

Signed-off-by: Leif Lindholm 
Cc: Leif Lindholm 
Cc: Peter Maydell 
---
 .mailmap| 1 +
 MAINTAINERS | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 2976a675ea..6b28c98a90 100644
--- a/.mailmap
+++ b/.mailmap
@@ -63,6 +63,7 @@ Huacai Chen  
 Huacai Chen  
 James Hogan  
 Leif Lindholm  
+Leif Lindholm  
 Radoslaw Biernacki  
 Paul Burton  
 Paul Burton  
diff --git a/MAINTAINERS b/MAINTAINERS
index 294c88ace9..02042a7955 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -890,7 +890,7 @@ F: include/hw/ssi/imx_spi.h
 SBSA-REF
 M: Radoslaw Biernacki 
 M: Peter Maydell 
-R: Leif Lindholm 
+R: Leif Lindholm 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/arm/sbsa-ref.c
-- 
2.30.2




Re: [PATCH v2 2/5] 9pfs: fix qemu_mknodat(S_IFSOCK) on macOS

2022-04-27 Thread Christian Schoenebeck
On Mittwoch, 27. April 2022 19:12:15 CEST Will Cohen wrote:
> On Wed, Apr 27, 2022 at 12:18 PM Christian Schoenebeck <
> 
> qemu_...@crudebyte.com> wrote:
> > On Mittwoch, 27. April 2022 15:31:42 CEST Greg Kurz wrote:
> > > On Wed, 27 Apr 2022 14:32:53 +0200
> > > 
> > > Christian Schoenebeck  wrote:
> > > > On Mittwoch, 27. April 2022 12:18:10 CEST Greg Kurz wrote:
> > > > > On Wed, 27 Apr 2022 11:27:28 +0900
> > > > > 
> > > > > Akihiko Odaki  wrote:
> > > > > > On 2022/04/26 21:38, Greg Kurz wrote:
> > [...]
> > 
> > > > > > Considering the transient states are tolerated in 9pfs, we need to
> > > > > > design this function to be tolerant with transient states as well.
> > 
> > The
> > 
> > > > > > use of chmod() is not safe when we consider about transient
> > 
> > states. A
> > 
> > > > > > malicious actor may replace the file at the path with a symlink
> > 
> > which
> > 
> > > > > > may escape the shared directory and chmod() will naively follow
> > > > > > it.
> > > > > 
> > > > > You get a point here. Thanks for your tenacity ! :-)
> > > > 
> > > > Yep, I send a v4 with fchmodat_nofollow() instead of chmod(), thanks!
> > > > 
> > > > BTW, why is it actually allowed for client to create a symlink
> > > > pointing
> > > > outside exported directory tree with security_model=passthrough/none?
> > 
> > Did
> > 
> > > > anybody want that?
> > > 
> > > The target argument to symlink() is merely a string that is stored in
> > > the inode. It is only evaluated as a path at the time an action is
> > > made on the link. Checking at symlink() time is thus useless.
> > > 
> > > Anyway, we're safe on this side since it's the client's job to
> > > resolve links and we explicitly don't follow them in the server.
> > 
> > I wouldn't call it useless, because it is easier to keep exactly 1 hole
> > stuffed instead of being forced to continuously stuff N holes as new ones
> > may
> > popup up over time, as this case shows (mea culpa).
> > 
> > So you are trading (probably minor) performance for security.
> > 
> > But my question was actually meant seriously: whether there was anybody in
> > the
> > past who probably actually wanted to create relative symlinks outside the
> > exported tree. For instance for another service with wider host filesystem
> > access.
> 
> For what it's worth, the inability to follow symlinks read-write outside of
> the tree appears to be, at the moment, the primary pain point for new users
> of 9pfs in containerization software (see the later comments in
> https://github.com/lima-vm/lima/pull/726 and to a lesser extent
> https://github.com/containers/podman/issues/13784).
> 
> To my knowledge, neither podman nor lima have come up with conclusive
> preferred solutions for how to address this, much less had a robust
> discussion with the QEMU team.
> The lima discussion notes that it works read-only with passthrough/none, so
> I'd suggest that if there weren't users of it before, there are now! As I
> understand it, one partial solution for downstream software that allows for
> read-write may just be to more proactively mount larger directories to
> minimize the number of external paths that symlinks might get tripped up
> on. That said, this will stop working when it comes to linking to
> additional mounts, since /Volumes on darwin will never line up with /mnt.

That's a different thing. People in those discussions were using 
security_model=mapped where symlinks on guest are virtually mapped as textual 
file content (try 'cat ' on host). So in this mode symlinks on host 
and symlinks on guest are intentionally separated from each other.

The issue I was referring to was about security_model=passthrough|none which 
has the exact same symlinks accessible both on host and guest side, and more 
specifically I meant: symlinks created by guest that would point to a location 
*above* the 9p export root. E.g. say guest has access to the following host 
directory via 9p, that is access *below* the following directory on host:

  /vm/foo/

And say guest now mounts that host directory and creates a symlink like:

  mount -t 9p foo /mnt
  cd /mnt
  ln -s ../bar bar

That symlink would now point to /bar from guest's PoV, and to /vm/bar from 
host's PoV (i.e. a location on host where guest should not have access to at 
all).

BTW some of the other issues mentioned in the linked discussion, like the 
timeout errors, are fixed with this patch set.

Best regards,
Christian Schoenebeck





Re: [PATCH] MAINTAINERS/.mailmap: update email for Leif Lindholm

2022-04-27 Thread Leif Lindholm
On Wed, Apr 27, 2022 at 7:13 PM Leif Lindholm 
wrote:
>
> NUVIA was acquired by Qualcomm in March 2021, but kept functioning on
> separate infrastructure for a transitional period. We've now switched
> over to contributing as Qualcomm Innocation Center (quicinc), so update
> my email address to reflect this.
>
> Signed-off-by: Leif Lindholm 
> Cc: Leif Lindholm 
> Cc: Peter Maydell 

Reviewed-by: Leif Lindholm 

> ---
>  .mailmap| 1 +
>  MAINTAINERS | 2 +-
>  2 files changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/.mailmap b/.mailmap
> index 2976a675ea..6b28c98a90 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -63,6 +63,7 @@ Huacai Chen  
>  Huacai Chen  
>  James Hogan  
>  Leif Lindholm  
> +Leif Lindholm  
>  Radoslaw Biernacki  
>  Paul Burton  
>  Paul Burton  
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 294c88ace9..02042a7955 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -890,7 +890,7 @@ F: include/hw/ssi/imx_spi.h
>  SBSA-REF
>  M: Radoslaw Biernacki 
>  M: Peter Maydell 
> -R: Leif Lindholm 
> +R: Leif Lindholm 
>  L: qemu-...@nongnu.org
>  S: Maintained
>  F: hw/arm/sbsa-ref.c
> --
> 2.30.2


Re: [Qemu-devel] [PATCH 2/7] target/openrisc: add shutdown logic

2022-04-27 Thread Jason A. Donenfeld
Hey Stafford,

On Mon, Apr 17, 2017 at 08:23:51AM +0900, Stafford Horne wrote:
> In openrisc simulators we use hooks like 'l.nop 1' to cause the
> simulator to exit.  Implement that for qemu too.
> 
> Reported-by: Waldemar Brodkorb 
> Signed-off-by: Stafford Horne 

I'm curious as to why this never got merged. I noticed I'm entirely able
to shutdown or to reboot (which is mostly what I care about) Linux from
OpenRISC. It just hangs.

Thanks,
Jason



Re: [PATCH v2 2/5] 9pfs: fix qemu_mknodat(S_IFSOCK) on macOS

2022-04-27 Thread Greg Kurz
On Wed, 27 Apr 2022 18:18:31 +0200
Christian Schoenebeck  wrote:

> On Mittwoch, 27. April 2022 15:31:42 CEST Greg Kurz wrote:
> > On Wed, 27 Apr 2022 14:32:53 +0200
> > 
> > Christian Schoenebeck  wrote:
> > > On Mittwoch, 27. April 2022 12:18:10 CEST Greg Kurz wrote:
> > > > On Wed, 27 Apr 2022 11:27:28 +0900
> > > > 
> > > > Akihiko Odaki  wrote:
> > > > > On 2022/04/26 21:38, Greg Kurz wrote:
> [...]
> > > > > Considering the transient states are tolerated in 9pfs, we need to
> > > > > design this function to be tolerant with transient states as well. The
> > > > > use of chmod() is not safe when we consider about transient states. A
> > > > > malicious actor may replace the file at the path with a symlink which
> > > > > may escape the shared directory and chmod() will naively follow it.
> > > > 
> > > > You get a point here. Thanks for your tenacity ! :-)
> > > 
> > > Yep, I send a v4 with fchmodat_nofollow() instead of chmod(), thanks!
> > > 
> > > BTW, why is it actually allowed for client to create a symlink pointing
> > > outside exported directory tree with security_model=passthrough/none? Did
> > > anybody want that?
> > 
> > The target argument to symlink() is merely a string that is stored in
> > the inode. It is only evaluated as a path at the time an action is
> > made on the link. Checking at symlink() time is thus useless.
> > 
> > Anyway, we're safe on this side since it's the client's job to
> > resolve links and we explicitly don't follow them in the server.
> 
> I wouldn't call it useless, because it is easier to keep exactly 1 hole
> stuffed instead of being forced to continuously stuff N holes as new ones may
> popup up over time, as this case shows (mea culpa).
> 
> So you are trading (probably minor) performance for security.
> 
> But my question was actually meant seriously: whether there was anybody in the
> past who probably actually wanted to create relative symlinks outside the
> exported tree. For instance for another service with wider host filesystem
> access.
> 

I took the question seriously :-), the problem is that even if you
do a check on the target at symlink() time, you don't know how it
will be evaluated in the end.

Quick demonstration:

$ cd /var/tmp/
$ mkdir foo
$ cd foo/
$ # Suppose foo is the jail
$ mkdir bar
$ ln -sf .. bar/link
$ realpath bar/link
/var/tmp/foo
$ # Good, we're still under foo
$ mv bar/link .
$ realpath link
/var/tmp
$ # Ouch we've escaped

So in the end, the only real fix is to ban path based syscalls and
pass AT_SYMLINK_NOFOLLOW everywhere. This was the justification for
rewriting nearly all 9p local in order to fix CVE-2016-9602.

https://lists.gnu.org/archive/html/qemu-devel/2017-01/msg06225.html

> [...]
> > > > This brings up a new problem I hadn't realized before : the
> > > > fchmodat_nofollow() implementation in 9p-local.c is really
> > > > a linux only thing to cope with AT_SYMLINK_NOFOLLOW not being
> > > > supported with fchmodat(). It looks that this should move to
> > > > 9p-util-linux.c and a proper version should be added for macOS
> > > > in 9p-util-darwin.c
> > > 
> > > Like already agreed on the other thread, yes, that makes sense. But I
> > > think
> > > this can be handled with a follow-up, separate from this series.
> > 
> > Fair enough if you want to handle fchmodat_nofollow() later but you
> > must at least use fchmodat(AT_SYMLINK_NOFOLLOW) in this patch
> > instead of chmod().
> 
> Sounds like a quick and easy workaround. However looking at 'man fchmodat' on
> macOS, this probably does not exactly do what you wanted it to, at least not
> in this particular case:
> 
>  AT_SYMLINK_NOFOLLOW
>  If path names a symbolic link, then the mode of the symbolic 
> link is changed.
> 
>  AT_SYMLINK_NOFOLLOW_ANY
>  If path names a symbolic link, then the mode of the symbolic 
> link is changed and
>  if if the path has any other symbolic links, an error is 
> returned.
> 
> So if somebody replaced the socket file by a 1st order symlink, you would
> adjust the symlink's permissions with both AT_SYMLINK_NOFOLLOW as well as 
> with 
> AT_SYMLINK_NOFOLLOW_ANY. I mean it's better than chmod(), but acceptable?
> 

As long as the link is not followed outside, we're good : it will change the
symlink mode and then what ?

> Using our existing fchmodat_nofollow() instead is no viable alternative
> either, as it uses operations that are not supported on socket files on macOS
> (tested).
> 
> Best regards,
> Christian Schoenebeck
> 
> 




[PULL 9/9] i386: firmware parsing and sev setup for -bios loaded firmware

2022-04-27 Thread Gerd Hoffmann
Don't register firmware as rom, not needed (see comment).
Add x86_firmware_configure() call for proper sev initialization.

Signed-off-by: Gerd Hoffmann 
Tested-by: Xiaoyao Li 
Reviewed-by: Daniel P. Berrangé 
Tested-by: Daniel P. Berrangé 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Michael S. Tsirkin 
Message-Id: <20220425135051.551037-4-kra...@redhat.com>
---
 hw/i386/x86.c | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index ced31f67b9a8..79ebdface6e2 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1115,12 +1115,25 @@ void x86_bios_rom_init(MachineState *ms, const char 
*default_firmware,
 }
 bios = g_malloc(sizeof(*bios));
 memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
-if (!isapc_ram_fw) {
-memory_region_set_readonly(bios, true);
-}
-ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
-if (ret != 0) {
-goto bios_error;
+if (sev_enabled()) {
+/*
+ * The concept of a "reset" simply doesn't exist for
+ * confidential computing guests, we have to destroy and
+ * re-launch them instead.  So there is no need to register
+ * the firmware as rom to properly re-initialize on reset.
+ * Just go for a straight file load instead.
+ */
+void *ptr = memory_region_get_ram_ptr(bios);
+load_image_size(filename, ptr, bios_size);
+x86_firmware_configure(ptr, bios_size);
+} else {
+if (!isapc_ram_fw) {
+memory_region_set_readonly(bios, true);
+}
+ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
+if (ret != 0) {
+goto bios_error;
+}
 }
 g_free(filename);
 
-- 
2.35.1




[PULL 7/9] i386: move bios load error message

2022-04-27 Thread Gerd Hoffmann
Switch to usual goto-end-of-function error handling style.
No functional change.

Signed-off-by: Gerd Hoffmann 
Tested-by: Xiaoyao Li 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Michael S. Tsirkin 
Message-Id: <20220425135051.551037-2-kra...@redhat.com>
---
 hw/i386/x86.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index bb6727279097..ced31f67b9a8 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1120,9 +1120,7 @@ void x86_bios_rom_init(MachineState *ms, const char 
*default_firmware,
 }
 ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
 if (ret != 0) {
-bios_error:
-fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name);
-exit(1);
+goto bios_error;
 }
 g_free(filename);
 
@@ -1143,6 +1141,11 @@ void x86_bios_rom_init(MachineState *ms, const char 
*default_firmware,
 memory_region_add_subregion(rom_memory,
 (uint32_t)(-bios_size),
 bios);
+return;
+
+bios_error:
+fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name);
+exit(1);
 }
 
 bool x86_machine_is_smm_enabled(const X86MachineState *x86ms)
-- 
2.35.1




[PULL 8/9] i386: factor out x86_firmware_configure()

2022-04-27 Thread Gerd Hoffmann
move sev firmware setup to separate function so it can be used from
other code paths.  No functional change.

Signed-off-by: Gerd Hoffmann 
Tested-by: Xiaoyao Li 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Michael S. Tsirkin 
Message-Id: <20220425135051.551037-3-kra...@redhat.com>
---
 include/hw/i386/x86.h |  3 +++
 hw/i386/pc_sysfw.c| 36 ++--
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 916cc325eeb1..4841a49f86c0 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -140,4 +140,7 @@ void gsi_handler(void *opaque, int n, int level);
 void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name);
 DeviceState *ioapic_init_secondary(GSIState *gsi_state);
 
+/* pc_sysfw.c */
+void x86_firmware_configure(void *ptr, int size);
+
 #endif
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 0540047bad22..c8d9e71b889b 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -147,7 +147,6 @@ static void pc_system_flash_map(PCMachineState *pcms,
 MemoryRegion *flash_mem;
 void *flash_ptr;
 int flash_size;
-int ret;
 
 assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled);
 
@@ -195,19 +194,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
 if (sev_enabled()) {
 flash_ptr = memory_region_get_ram_ptr(flash_mem);
 flash_size = memory_region_size(flash_mem);
-/*
- * OVMF places a GUIDed structures in the flash, so
- * search for them
- */
-pc_system_parse_ovmf_flash(flash_ptr, flash_size);
-
-ret = sev_es_save_reset_vector(flash_ptr, flash_size);
-if (ret) {
-error_report("failed to locate and/or save reset vector");
-exit(1);
-}
-
-sev_encrypt_flash(flash_ptr, flash_size, &error_fatal);
+x86_firmware_configure(flash_ptr, flash_size);
 }
 }
 }
@@ -259,3 +246,24 @@ void pc_system_firmware_init(PCMachineState *pcms,
 
 pc_system_flash_cleanup_unused(pcms);
 }
+
+void x86_firmware_configure(void *ptr, int size)
+{
+int ret;
+
+/*
+ * OVMF places a GUIDed structures in the flash, so
+ * search for them
+ */
+pc_system_parse_ovmf_flash(ptr, size);
+
+if (sev_enabled()) {
+ret = sev_es_save_reset_vector(ptr, size);
+if (ret) {
+error_report("failed to locate and/or save reset vector");
+exit(1);
+}
+
+sev_encrypt_flash(ptr, size, &error_fatal);
+}
+}
-- 
2.35.1




[PULL 6/9] avocado/vnc: add test_change_listen

2022-04-27 Thread Gerd Hoffmann
From: Vladimir Sementsov-Ogievskiy 

Add simple test-case for new display-update qmp command.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Daniel P. Berrangé 
Message-Id: <20220401143936.356460-4-vsement...@openvz.org>
Signed-off-by: Gerd Hoffmann 
---
 tests/avocado/vnc.py | 63 
 1 file changed, 63 insertions(+)

diff --git a/tests/avocado/vnc.py b/tests/avocado/vnc.py
index 096432988fbb..187fd3febca4 100644
--- a/tests/avocado/vnc.py
+++ b/tests/avocado/vnc.py
@@ -8,9 +8,48 @@
 # This work is licensed under the terms of the GNU GPL, version 2 or
 # later.  See the COPYING file in the top-level directory.
 
+import socket
+from typing import List
+
 from avocado_qemu import QemuSystemTest
 
 
+VNC_ADDR = '127.0.0.1'
+VNC_PORT_START = 32768
+VNC_PORT_END = VNC_PORT_START + 1024
+
+
+def check_bind(port: int) -> bool:
+with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+try:
+sock.bind((VNC_ADDR, port))
+except OSError:
+return False
+
+return True
+
+
+def check_connect(port: int) -> bool:
+with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+try:
+sock.connect((VNC_ADDR, port))
+except ConnectionRefusedError:
+return False
+
+return True
+
+
+def find_free_ports(count: int) -> List[int]:
+result = []
+for port in range(VNC_PORT_START, VNC_PORT_END):
+if check_bind(port):
+result.append(port)
+if len(result) >= count:
+break
+assert len(result) == count
+return result
+
+
 class Vnc(QemuSystemTest):
 """
 :avocado: tags=vnc,quick
@@ -51,3 +90,27 @@ def test_change_password(self):
 set_password_response = self.vm.qmp('change-vnc-password',
 password='new_password')
 self.assertEqual(set_password_response['return'], {})
+
+def test_change_listen(self):
+a, b, c = find_free_ports(3)
+self.assertFalse(check_connect(a))
+self.assertFalse(check_connect(b))
+self.assertFalse(check_connect(c))
+
+self.vm.add_args('-nodefaults', '-S', '-vnc', f'{VNC_ADDR}:{a - 5900}')
+self.vm.launch()
+self.assertEqual(self.vm.qmp('query-vnc')['return']['service'], str(a))
+self.assertTrue(check_connect(a))
+self.assertFalse(check_connect(b))
+self.assertFalse(check_connect(c))
+
+res = self.vm.qmp('display-update', type='vnc',
+  addresses=[{'type': 'inet', 'host': VNC_ADDR,
+  'port': str(b)},
+ {'type': 'inet', 'host': VNC_ADDR,
+  'port': str(c)}])
+self.assertEqual(res['return'], {})
+self.assertEqual(self.vm.qmp('query-vnc')['return']['service'], str(b))
+self.assertFalse(check_connect(a))
+self.assertTrue(check_connect(b))
+self.assertTrue(check_connect(c))
-- 
2.35.1




[PULL 1/9] hw/display/vmware_vga: do not discard screen updates

2022-04-27 Thread Gerd Hoffmann
From: Carwyn Ellis 

In certain circumstances, typically when there is lots changing on the
screen, updates will be discarded resulting in garbled output.

This change simplifies the traversal of the display update FIFO queue
when applying updates. We just track the queue length and iterate up to
the end of the queue.

Additionally when adding updates to the queue, if the buffer reaches
capacity we force a flush before accepting further events.

Signed-off-by: Carwyn Ellis 
Message-Id: <20220206183956.10694-3-carwynel...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 hw/display/vmware_vga.c | 45 +++--
 hw/display/trace-events |  1 +
 2 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index 45d06cbe2544..cedbbde522ec 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -80,7 +80,7 @@ struct vmsvga_state_s {
 struct vmsvga_rect_s {
 int x, y, w, h;
 } redraw_fifo[REDRAW_FIFO_LEN];
-int redraw_fifo_first, redraw_fifo_last;
+int redraw_fifo_last;
 };
 
 #define TYPE_VMWARE_SVGA "vmware-svga"
@@ -380,35 +380,41 @@ static inline void vmsvga_update_rect(struct 
vmsvga_state_s *s,
 dpy_gfx_update(s->vga.con, x, y, w, h);
 }
 
+static inline void vmsvga_update_rect_flush(struct vmsvga_state_s *s)
+{
+struct vmsvga_rect_s *rect;
+
+if (s->invalidated) {
+s->redraw_fifo_last = 0;
+return;
+}
+/* Overlapping region updates can be optimised out here - if someone
+ * knows a smart algorithm to do that, please share.  */
+for (int i = 0; i < s->redraw_fifo_last; i++) {
+rect = &s->redraw_fifo[i];
+vmsvga_update_rect(s, rect->x, rect->y, rect->w, rect->h);
+}
+
+s->redraw_fifo_last = 0;
+}
+
 static inline void vmsvga_update_rect_delayed(struct vmsvga_state_s *s,
 int x, int y, int w, int h)
 {
+
+if (s->redraw_fifo_last >= REDRAW_FIFO_LEN) {
+trace_vmware_update_rect_delayed_flush();
+vmsvga_update_rect_flush(s);
+}
+
 struct vmsvga_rect_s *rect = &s->redraw_fifo[s->redraw_fifo_last++];
 
-s->redraw_fifo_last &= REDRAW_FIFO_LEN - 1;
 rect->x = x;
 rect->y = y;
 rect->w = w;
 rect->h = h;
 }
 
-static inline void vmsvga_update_rect_flush(struct vmsvga_state_s *s)
-{
-struct vmsvga_rect_s *rect;
-
-if (s->invalidated) {
-s->redraw_fifo_first = s->redraw_fifo_last;
-return;
-}
-/* Overlapping region updates can be optimised out here - if someone
- * knows a smart algorithm to do that, please share.  */
-while (s->redraw_fifo_first != s->redraw_fifo_last) {
-rect = &s->redraw_fifo[s->redraw_fifo_first++];
-s->redraw_fifo_first &= REDRAW_FIFO_LEN - 1;
-vmsvga_update_rect(s, rect->x, rect->y, rect->w, rect->h);
-}
-}
-
 #ifdef HW_RECT_ACCEL
 static inline int vmsvga_copy_rect(struct vmsvga_state_s *s,
 int x0, int y0, int x1, int y1, int w, int h)
@@ -1161,7 +1167,6 @@ static void vmsvga_reset(DeviceState *dev)
 s->config = 0;
 s->svgaid = SVGA_ID;
 s->cursor.on = 0;
-s->redraw_fifo_first = 0;
 s->redraw_fifo_last = 0;
 s->syncing = 0;
 
diff --git a/hw/display/trace-events b/hw/display/trace-events
index 91efc88f04f5..0c0ffcbe42c1 100644
--- a/hw/display/trace-events
+++ b/hw/display/trace-events
@@ -24,6 +24,7 @@ vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp) "%dx%d @ 
%d bpp"
 vmware_verify_rect_less_than_zero(const char *name, const char *param, int x) 
"%s: %s was < 0 (%d)"
 vmware_verify_rect_greater_than_bound(const char *name, const char *param, int 
bound, int x) "%s: %s was > %d (%d)"
 vmware_verify_rect_surface_bound_exceeded(const char *name, const char 
*component, int bound, const char *param1, int value1, const char *param2, int 
value2) "%s: %s > %d (%s: %d, %s: %d)"
+vmware_update_rect_delayed_flush(void) "display update FIFO full - forcing 
flush"
 
 # virtio-gpu-base.c
 virtio_gpu_features(bool virgl) "virgl %d"
-- 
2.35.1




[PULL 5/9] qapi/ui: add 'display-update' command for changing listen address

2022-04-27 Thread Gerd Hoffmann
From: Vladimir Sementsov-Ogievskiy 

Add possibility to change addresses where VNC server listens for new
connections. Prior to 6.0 this functionality was available through
'change' qmp command which was deleted.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Daniel P. Berrangé 
Message-Id: <20220401143936.356460-3-vsement...@openvz.org>
Signed-off-by: Gerd Hoffmann 
---
 include/ui/console.h|  1 +
 monitor/qmp-cmds.c  | 15 
 ui/vnc.c| 23 
 docs/about/removed-features.rst |  3 +-
 qapi/ui.json| 65 +
 5 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index 0f84861933e1..c44b28a972ca 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -518,6 +518,7 @@ int vnc_display_pw_expire(const char *id, time_t expires);
 void vnc_parse(const char *str);
 int vnc_init_func(void *opaque, QemuOpts *opts, Error **errp);
 bool vnc_display_reload_certs(const char *id,  Error **errp);
+bool vnc_display_update(DisplayUpdateOptionsVNC *arg, Error **errp);
 
 /* input.c */
 int index_from_key(const char *key, size_t key_length);
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index 5e7302cbb995..1ebb89f46c12 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -346,6 +346,21 @@ void qmp_display_reload(DisplayReloadOptions *arg, Error 
**errp)
 }
 }
 
+void qmp_display_update(DisplayUpdateOptions *arg, Error **errp)
+{
+switch (arg->type) {
+case DISPLAY_UPDATE_TYPE_VNC:
+#ifdef CONFIG_VNC
+vnc_display_update(&arg->u.vnc, errp);
+#else
+error_setg(errp, "vnc is invalid, missing 'CONFIG_VNC'");
+#endif
+break;
+default:
+abort();
+}
+}
+
 static int qmp_x_query_rdma_foreach(Object *obj, void *opaque)
 {
 RdmaProvider *rdma;
diff --git a/ui/vnc.c b/ui/vnc.c
index 77a660fccb3f..b02cb3f405b9 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3981,6 +3981,29 @@ static int vnc_display_listen(VncDisplay *vd,
 return 0;
 }
 
+bool vnc_display_update(DisplayUpdateOptionsVNC *arg, Error **errp)
+{
+VncDisplay *vd = vnc_display_find(NULL);
+
+if (!vd) {
+error_setg(errp, "Can not find vnc display");
+return false;
+}
+
+if (arg->has_addresses) {
+if (vd->listener) {
+qio_net_listener_disconnect(vd->listener);
+object_unref(OBJECT(vd->listener));
+vd->listener = NULL;
+}
+
+if (vnc_display_listen(vd, arg->addresses, NULL, errp) < 0) {
+return false;
+}
+}
+
+return true;
+}
 
 void vnc_display_open(const char *id, Error **errp)
 {
diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
index 4b831ea29176..b367418ca7da 100644
--- a/docs/about/removed-features.rst
+++ b/docs/about/removed-features.rst
@@ -355,7 +355,8 @@ documentation of ``query-hotpluggable-cpus`` for additional 
details.
 ``change`` (removed in 6.0)
 '''
 
-Use ``blockdev-change-medium`` or ``change-vnc-password`` instead.
+Use ``blockdev-change-medium`` or ``change-vnc-password`` or
+``display-update`` instead.
 
 ``query-events`` (removed in 6.0)
 '
diff --git a/qapi/ui.json b/qapi/ui.json
index 596f37fc37aa..059302a5efcb 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -1468,3 +1468,68 @@
 { 'command': 'display-reload',
   'data': 'DisplayReloadOptions',
   'boxed' : true }
+
+##
+# @DisplayUpdateType:
+#
+# Available DisplayUpdate types.
+#
+# @vnc: VNC display
+#
+# Since: 7.1
+#
+##
+{ 'enum': 'DisplayUpdateType',
+  'data': ['vnc'] }
+
+##
+# @DisplayUpdateOptionsVNC:
+#
+# Specify the VNC reload options.
+#
+# @addresses: If specified, change set of addresses
+# to listen for connections. Addresses configured
+# for websockets are not touched.
+#
+# Since: 7.1
+#
+##
+{ 'struct': 'DisplayUpdateOptionsVNC',
+  'data': { '*addresses': ['SocketAddress'] } }
+
+##
+# @DisplayUpdateOptions:
+#
+# Options of the display configuration reload.
+#
+# @type: Specify the display type.
+#
+# Since: 7.1
+#
+##
+{ 'union': 'DisplayUpdateOptions',
+  'base': {'type': 'DisplayUpdateType'},
+  'discriminator': 'type',
+  'data': { 'vnc': 'DisplayUpdateOptionsVNC' } }
+
+##
+# @display-update:
+#
+# Update display configuration.
+#
+# Returns: Nothing on success.
+#
+# Since: 7.1
+#
+# Example:
+#
+# -> { "execute": "display-update",
+#  "arguments": { "type": "vnc", "addresses":
+# [ { "type": "inet", "host": "0.0.0.0",
+# "port": "5901" } ] } }
+# <- { "return": {} }
+#
+##
+{ 'command': 'display-update',
+  'data': 'DisplayUpdateOptions',
+  'boxed' : true }
-- 
2.35.1




[PULL 3/9] Added parameter to take screenshot with screendump as PNG

2022-04-27 Thread Gerd Hoffmann
From: Kshitij Suri 

Currently screendump only supports PPM format, which is un-compressed. Added
a "format" parameter to QMP and HMP screendump command to support PNG image
capture using libpng.

QMP example usage:
{ "execute": "screendump", "arguments": { "filename": "/tmp/image",
"format":"png" } }

HMP example usage:
screendump /tmp/image -f png

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/718

Signed-off-by: Kshitij Suri 

Reviewed-by: Daniel P. Berrangé 
Acked-by: Markus Armbruster 
Acked-by: Dr. David Alan Gilbert 
Message-Id: <20220408071336.99839-3-kshitij.s...@nutanix.com>
Signed-off-by: Gerd Hoffmann 
---
 monitor/hmp-cmds.c |  12 +-
 ui/console.c   | 101 +++--
 hmp-commands.hx|  11 ++---
 qapi/ui.json   |  24 +--
 4 files changed, 136 insertions(+), 12 deletions(-)

diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 634968498b58..2442bfa98984 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -1720,9 +1720,19 @@ hmp_screendump(Monitor *mon, const QDict *qdict)
 const char *filename = qdict_get_str(qdict, "filename");
 const char *id = qdict_get_try_str(qdict, "device");
 int64_t head = qdict_get_try_int(qdict, "head", 0);
+const char *input_format  = qdict_get_try_str(qdict, "format");
 Error *err = NULL;
+ImageFormat format;
 
-qmp_screendump(filename, id != NULL, id, id != NULL, head, &err);
+format = qapi_enum_parse(&ImageFormat_lookup, input_format,
+  IMAGE_FORMAT_PPM, &err);
+if (err) {
+goto end;
+}
+
+qmp_screendump(filename, id != NULL, id, id != NULL, head,
+   input_format != NULL, format, &err);
+end:
 hmp_handle_error(mon, err);
 }
 
diff --git a/ui/console.c b/ui/console.c
index 1752f2ec8897..15d0f6affd4c 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -37,6 +37,9 @@
 #include "exec/memory.h"
 #include "io/channel-file.h"
 #include "qom/object.h"
+#ifdef CONFIG_PNG
+#include 
+#endif
 
 #define DEFAULT_BACKSCROLL 512
 #define CONSOLE_CURSOR_PERIOD 500
@@ -291,6 +294,89 @@ void graphic_hw_invalidate(QemuConsole *con)
 }
 }
 
+#ifdef CONFIG_PNG
+/**
+ * png_save: Take a screenshot as PNG
+ *
+ * Saves screendump as a PNG file
+ *
+ * Returns true for success or false for error.
+ *
+ * @fd: File descriptor for PNG file.
+ * @image: Image data in pixman format.
+ * @errp: Pointer to an error.
+ */
+static bool png_save(int fd, pixman_image_t *image, Error **errp)
+{
+int width = pixman_image_get_width(image);
+int height = pixman_image_get_height(image);
+g_autofree png_struct *png_ptr = NULL;
+g_autofree png_info *info_ptr = NULL;
+g_autoptr(pixman_image_t) linebuf =
+qemu_pixman_linebuf_create(PIXMAN_a8r8g8b8, width);
+uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf);
+FILE *f = fdopen(fd, "wb");
+int y;
+if (!f) {
+error_setg_errno(errp, errno,
+ "Failed to create file from file descriptor");
+return false;
+}
+
+png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL,
+  NULL, NULL);
+if (!png_ptr) {
+error_setg(errp, "PNG creation failed. Unable to write struct");
+fclose(f);
+return false;
+}
+
+info_ptr = png_create_info_struct(png_ptr);
+
+if (!info_ptr) {
+error_setg(errp, "PNG creation failed. Unable to write info");
+fclose(f);
+png_destroy_write_struct(&png_ptr, &info_ptr);
+return false;
+}
+
+png_init_io(png_ptr, f);
+
+png_set_IHDR(png_ptr, info_ptr, width, height, 8,
+ PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE,
+ PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
+
+png_write_info(png_ptr, info_ptr);
+
+for (y = 0; y < height; ++y) {
+qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
+png_write_row(png_ptr, buf);
+}
+qemu_pixman_image_unref(linebuf);
+
+png_write_end(png_ptr, NULL);
+
+png_destroy_write_struct(&png_ptr, &info_ptr);
+
+if (fclose(f) != 0) {
+error_setg_errno(errp, errno,
+ "PNG creation failed. Unable to close file");
+return false;
+}
+
+return true;
+}
+
+#else /* no png support */
+
+static bool png_save(int fd, pixman_image_t *image, Error **errp)
+{
+error_setg(errp, "Enable PNG support with libpng for screendump");
+return false;
+}
+
+#endif /* CONFIG_PNG */
+
 static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
 {
 int width = pixman_image_get_width(image);
@@ -329,7 +415,8 @@ static void graphic_hw_update_bh(void *con)
 /* Safety: coroutine-only, concurrent-coroutine safe, main thread only */
 void coroutine_fn
 qmp_screendump(const char *filename, bool has_device, const char *device,
-   bool has_head, int64_t head, Error **errp)
+   

[PULL 4/9] ui/vnc: refactor arrays of addresses to SocketAddressList

2022-04-27 Thread Gerd Hoffmann
From: Vladimir Sementsov-Ogievskiy 

Let's use SocketAddressList instead of dynamic arrays.
Benefits:
 - Automatic cleanup: don't need specific freeing function and drop
   some gotos.
 - Less indirection: no triple asterix anymore!
 - Prepare for the following commit, which will reuse new interface of
   vnc_display_listen().

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Daniel P. Berrangé 
Message-Id: <20220401143936.356460-2-vsement...@openvz.org>
Signed-off-by: Gerd Hoffmann 
---
 ui/vnc.c | 129 ++-
 1 file changed, 51 insertions(+), 78 deletions(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 7d55e1500a42..77a660fccb3f 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3820,30 +3820,19 @@ static int vnc_display_get_address(const char *addrstr,
 return ret;
 }
 
-static void vnc_free_addresses(SocketAddress ***retsaddr,
-   size_t *retnsaddr)
-{
-size_t i;
-
-for (i = 0; i < *retnsaddr; i++) {
-qapi_free_SocketAddress((*retsaddr)[i]);
-}
-g_free(*retsaddr);
-
-*retsaddr = NULL;
-*retnsaddr = 0;
-}
-
 static int vnc_display_get_addresses(QemuOpts *opts,
  bool reverse,
- SocketAddress ***retsaddr,
- size_t *retnsaddr,
- SocketAddress ***retwsaddr,
- size_t *retnwsaddr,
+ SocketAddressList **saddr_list_ret,
+ SocketAddressList **wsaddr_list_ret,
  Error **errp)
 {
 SocketAddress *saddr = NULL;
 SocketAddress *wsaddr = NULL;
+g_autoptr(SocketAddressList) saddr_list = NULL;
+SocketAddressList **saddr_tail = &saddr_list;
+SocketAddress *single_saddr = NULL;
+g_autoptr(SocketAddressList) wsaddr_list = NULL;
+SocketAddressList **wsaddr_tail = &wsaddr_list;
 QemuOptsIter addriter;
 const char *addr;
 int to = qemu_opt_get_number(opts, "to", 0);
@@ -3852,23 +3841,16 @@ static int vnc_display_get_addresses(QemuOpts *opts,
 bool ipv4 = qemu_opt_get_bool(opts, "ipv4", false);
 bool ipv6 = qemu_opt_get_bool(opts, "ipv6", false);
 int displaynum = -1;
-int ret = -1;
-
-*retsaddr = NULL;
-*retnsaddr = 0;
-*retwsaddr = NULL;
-*retnwsaddr = 0;
 
 addr = qemu_opt_get(opts, "vnc");
 if (addr == NULL || g_str_equal(addr, "none")) {
-ret = 0;
-goto cleanup;
+return 0;
 }
 if (qemu_opt_get(opts, "websocket") &&
 !qcrypto_hash_supports(QCRYPTO_HASH_ALG_SHA1)) {
 error_setg(errp,
"SHA1 hash support is required for websockets");
-goto cleanup;
+return -1;
 }
 
 qemu_opt_iter_init(&addriter, opts, "vnc");
@@ -3879,7 +3861,7 @@ static int vnc_display_get_addresses(QemuOpts *opts,
  ipv4, ipv6,
  &saddr, errp);
 if (rv < 0) {
-goto cleanup;
+return -1;
 }
 /* Historical compat - first listen address can be used
  * to set the default websocket port
@@ -3887,13 +3869,16 @@ static int vnc_display_get_addresses(QemuOpts *opts,
 if (displaynum == -1) {
 displaynum = rv;
 }
-*retsaddr = g_renew(SocketAddress *, *retsaddr, *retnsaddr + 1);
-(*retsaddr)[(*retnsaddr)++] = saddr;
+QAPI_LIST_APPEND(saddr_tail, saddr);
 }
 
-/* If we had multiple primary displays, we don't do defaults
- * for websocket, and require explicit config instead. */
-if (*retnsaddr > 1) {
+if (saddr_list && !saddr_list->next) {
+single_saddr = saddr_list->value;
+} else {
+/*
+ * If we had multiple primary displays, we don't do defaults
+ * for websocket, and require explicit config instead.
+ */
 displaynum = -1;
 }
 
@@ -3903,57 +3888,50 @@ static int vnc_display_get_addresses(QemuOpts *opts,
 has_ipv4, has_ipv6,
 ipv4, ipv6,
 &wsaddr, errp) < 0) {
-goto cleanup;
+return -1;
 }
 
 /* Historical compat - if only a single listen address was
  * provided, then this is used to set the default listen
  * address for websocket too
  */
-if (*retnsaddr == 1 &&
-(*retsaddr)[0]->type == SOCKET_ADDRESS_TYPE_INET &&
+if (single_saddr &&
+single_saddr->type == SOCKET_ADDRESS_TYPE_INET &&
 wsaddr->type == SOCKET_ADDRESS_TYPE_INET &&
 g_str_equal(wsaddr->u.inet.host, "") &&
-!g_str_equal((*retsaddr)[0]->u.inet.host, "")) {
+!g_str_equal(single_saddr->u.inet.host, "")) {
 

  1   2   3   >