date:20210818

Re: [Patch 2/2] hw/arm/xlnx-zynqmp: Add unimplemented APU mmio

2021-08-18 Thread Alistair Francis

On Thu, Aug 19, 2021 at 1:22 PM Tong Ho  wrote:
>
> Add unimplemented APU mmio region to xlnx-zynqmp for booting
> bare-metal guests built with standalone bsp published at:
>   
> https://github.com/Xilinx/embeddedsw/tree/master/lib/bsp/standalone/src/arm/ARMv8/64bit
>
> Signed-off-by: Tong Ho 

Acked-by: Alistair Francis 

Alistair

> ---
>  hw/arm/xlnx-zynqmp.c | 32 
>  include/hw/arm/xlnx-zynqmp.h |  7 +++
>  2 files changed, 39 insertions(+)
>
> diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
> index 3597e8db4d..790df2b6f1 100644
> --- a/hw/arm/xlnx-zynqmp.c
> +++ b/hw/arm/xlnx-zynqmp.c
> @@ -20,6 +20,7 @@
>  #include "qemu/module.h"
>  #include "hw/arm/xlnx-zynqmp.h"
>  #include "hw/intc/arm_gic_common.h"
> +#include "hw/misc/unimp.h"
>  #include "hw/boards.h"
>  #include "sysemu/kvm.h"
>  #include "sysemu/sysemu.h"
> @@ -56,6 +57,9 @@
>  #define DPDMA_ADDR  0xfd4c
>  #define DPDMA_IRQ   116
>
> +#define APU_ADDR0xfd5c
> +#define APU_SIZE0x100
> +
>  #define IPI_ADDR0xFF30
>  #define IPI_IRQ 64
>
> @@ -222,6 +226,32 @@ static void xlnx_zynqmp_create_rpu(MachineState *ms, 
> XlnxZynqMPState *s,
>  qdev_realize(DEVICE(&s->rpu_cluster), NULL, &error_fatal);
>  }
>
> +static void xlnx_zynqmp_create_unimp_mmio(XlnxZynqMPState *s)
> +{
> +static const struct UnimpInfo {
> +const char *name;
> +hwaddr base;
> +hwaddr size;
> +} unimp_areas[ARRAY_SIZE(s->mr_unimp)] = {
> +{ .name = "apu", APU_ADDR, APU_SIZE },
> +};
> +
> +unsigned nr;
> +
> +for (nr = 0; nr < ARRAY_SIZE(unimp_areas); nr++) {
> +const struct UnimpInfo *info = &unimp_areas[nr];
> +DeviceState *dev = qdev_new(TYPE_UNIMPLEMENTED_DEVICE);
> +SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
> +
> +qdev_prop_set_string(dev, "name", info->name);
> +qdev_prop_set_uint64(dev, "size", info->size);
> +object_property_add_child(OBJECT(s), info->name, OBJECT(dev));
> +
> +sysbus_realize_and_unref(sbd, &error_fatal);
> +sysbus_mmio_map(sbd, 0, info->base);
> +}
> +}
> +
>  static void xlnx_zynqmp_init(Object *obj)
>  {
>  MachineState *ms = MACHINE(qdev_get_machine());
> @@ -616,6 +646,8 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
> **errp)
>  sysbus_mmio_map(SYS_BUS_DEVICE(&s->rtc), 0, RTC_ADDR);
>  sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0, gic_spi[RTC_IRQ]);
>
> +xlnx_zynqmp_create_unimp_mmio(s);
> +
>  for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) {
>  if (!object_property_set_uint(OBJECT(&s->gdma[i]), "bus-width", 128,
>errp)) {
> diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
> index d3e2ef97f6..c84fe15996 100644
> --- a/include/hw/arm/xlnx-zynqmp.h
> +++ b/include/hw/arm/xlnx-zynqmp.h
> @@ -79,6 +79,11 @@ OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP)
>  #define XLNX_ZYNQMP_MAX_RAM_SIZE (XLNX_ZYNQMP_MAX_LOW_RAM_SIZE + \
>XLNX_ZYNQMP_MAX_HIGH_RAM_SIZE)
>
> +/*
> + * Unimplemented mmio regions needed to boot some images.
> + */
> +#define XLNX_ZYNQMP_NUM_UNIMP_AREAS 1
> +
>  struct XlnxZynqMPState {
>  /*< private >*/
>  DeviceState parent_obj;
> @@ -96,6 +101,8 @@ struct XlnxZynqMPState {
>  MemoryRegion *ddr_ram;
>  MemoryRegion ddr_ram_low, ddr_ram_high;
>
> +MemoryRegion mr_unimp[XLNX_ZYNQMP_NUM_UNIMP_AREAS];
> +
>  CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
>  CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
>  XlnxZynqMPCANState can[XLNX_ZYNQMP_NUM_CAN];
> --
> 2.25.1
>
>

Re: [Patch 1/2] hw/arm/xlnx-versal: Add unimplemented APU mmio

2021-08-18 Thread Alistair Francis

On Thu, Aug 19, 2021 at 1:20 PM Tong Ho  wrote:
>
> Add unimplemented APU mmio region to xlnx-versal for booting
> bare-metal guests built with standalone bsp published at:
>   
> https://github.com/Xilinx/embeddedsw/tree/master/lib/bsp/standalone/src/arm/ARMv8/64bit
>
> Signed-off-by: Tong Ho 

Acked-by: Alistair Francis 

Alistair

> ---
>  hw/arm/xlnx-versal.c | 2 ++
>  include/hw/arm/xlnx-versal.h | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
> index fb776834f7..cb6ec0a4a0 100644
> --- a/hw/arm/xlnx-versal.c
> +++ b/hw/arm/xlnx-versal.c
> @@ -376,6 +376,8 @@ static void versal_unimp(Versal *s)
>  MM_CRL, MM_CRL_SIZE);
>  versal_unimp_area(s, "crf", &s->mr_ps,
>  MM_FPD_CRF, MM_FPD_CRF_SIZE);
> +versal_unimp_area(s, "apu", &s->mr_ps,
> +MM_FPD_FPD_APU, MM_FPD_FPD_APU_SIZE);
>  versal_unimp_area(s, "crp", &s->mr_ps,
>  MM_PMC_CRP, MM_PMC_CRP_SIZE);
>  versal_unimp_area(s, "iou-scntr", &s->mr_ps,
> diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
> index 22a8fa5d11..9b79051747 100644
> --- a/include/hw/arm/xlnx-versal.h
> +++ b/include/hw/arm/xlnx-versal.h
> @@ -167,6 +167,8 @@ struct Versal {
>  #define MM_IOU_SCNTRS_SIZE  0x1
>  #define MM_FPD_CRF  0xfd1aU
>  #define MM_FPD_CRF_SIZE 0x14
> +#define MM_FPD_FPD_APU  0xfd5c
> +#define MM_FPD_FPD_APU_SIZE 0x100
>
>  #define MM_PMC_SD0  0xf104U
>  #define MM_PMC_SD0_SIZE 0x1
> --
> 2.25.1
>
>

Re: [PATCH v2 07/21] target/riscv: Use gen_arith for mulh and mulhu

2021-08-18 Thread Alistair Francis

On Wed, Aug 18, 2021 at 7:23 AM Richard Henderson
 wrote:
>
> Split out gen_mulh and gen_mulhu and use the common helper.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/insn_trans/trans_rvm.c.inc | 40 +++--
>  1 file changed, 18 insertions(+), 22 deletions(-)
>
> diff --git a/target/riscv/insn_trans/trans_rvm.c.inc 
> b/target/riscv/insn_trans/trans_rvm.c.inc
> index 3d93b24c25..80552be7a3 100644
> --- a/target/riscv/insn_trans/trans_rvm.c.inc
> +++ b/target/riscv/insn_trans/trans_rvm.c.inc
> @@ -25,20 +25,18 @@ static bool trans_mul(DisasContext *ctx, arg_mul *a)
>  return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl);
>  }
>
> +static void gen_mulh(TCGv ret, TCGv s1, TCGv s2)
> +{
> +TCGv discard = tcg_temp_new();
> +
> +tcg_gen_muls2_tl(discard, ret, s1, s2);
> +tcg_temp_free(discard);
> +}
> +
>  static bool trans_mulh(DisasContext *ctx, arg_mulh *a)
>  {
>  REQUIRE_EXT(ctx, RVM);
> -TCGv source1 = tcg_temp_new();
> -TCGv source2 = tcg_temp_new();
> -gen_get_gpr(ctx, source1, a->rs1);
> -gen_get_gpr(ctx, source2, a->rs2);
> -
> -tcg_gen_muls2_tl(source2, source1, source1, source2);
> -
> -gen_set_gpr(ctx, a->rd, source1);
> -tcg_temp_free(source1);
> -tcg_temp_free(source2);
> -return true;
> +return gen_arith(ctx, a, EXT_NONE, gen_mulh);
>  }
>
>  static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
> @@ -47,20 +45,18 @@ static bool trans_mulhsu(DisasContext *ctx, arg_mulhsu *a)
>  return gen_arith(ctx, a, EXT_NONE, gen_mulhsu);
>  }
>
> +static void gen_mulhu(TCGv ret, TCGv s1, TCGv s2)
> +{
> +TCGv discard = tcg_temp_new();
> +
> +tcg_gen_mulu2_tl(discard, ret, s1, s2);
> +tcg_temp_free(discard);
> +}
> +
>  static bool trans_mulhu(DisasContext *ctx, arg_mulhu *a)
>  {
>  REQUIRE_EXT(ctx, RVM);
> -TCGv source1 = tcg_temp_new();
> -TCGv source2 = tcg_temp_new();
> -gen_get_gpr(ctx, source1, a->rs1);
> -gen_get_gpr(ctx, source2, a->rs2);
> -
> -tcg_gen_mulu2_tl(source2, source1, source1, source2);
> -
> -gen_set_gpr(ctx, a->rd, source1);
> -tcg_temp_free(source1);
> -tcg_temp_free(source2);
> -return true;
> +return gen_arith(ctx, a, EXT_NONE, gen_mulhu);
>  }
>
>  static bool trans_div(DisasContext *ctx, arg_div *a)
> --
> 2.25.1
>
>

Re: [PATCH v2 06/21] target/riscv: Remove gen_arith_div*

2021-08-18 Thread Alistair Francis

On Wed, Aug 18, 2021 at 7:21 AM Richard Henderson
 wrote:
>
> Use ctx->w and the enhanced gen_arith function.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/translate.c| 42 -
>  target/riscv/insn_trans/trans_rvm.c.inc | 16 +-
>  2 files changed, 8 insertions(+), 50 deletions(-)
>
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 4819682bf1..e337dca01b 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -491,48 +491,6 @@ static bool gen_arith_imm_tl(DisasContext *ctx, arg_i 
> *a, DisasExtend ext,
>  return true;
>  }
>
> -static bool gen_arith_div_w(DisasContext *ctx, arg_r *a,
> -void(*func)(TCGv, TCGv, TCGv))
> -{
> -TCGv source1, source2;
> -source1 = tcg_temp_new();
> -source2 = tcg_temp_new();
> -
> -gen_get_gpr(ctx, source1, a->rs1);
> -gen_get_gpr(ctx, source2, a->rs2);
> -tcg_gen_ext32s_tl(source1, source1);
> -tcg_gen_ext32s_tl(source2, source2);
> -
> -(*func)(source1, source1, source2);
> -
> -tcg_gen_ext32s_tl(source1, source1);
> -gen_set_gpr(ctx, a->rd, source1);
> -tcg_temp_free(source1);
> -tcg_temp_free(source2);
> -return true;
> -}
> -
> -static bool gen_arith_div_uw(DisasContext *ctx, arg_r *a,
> -void(*func)(TCGv, TCGv, TCGv))
> -{
> -TCGv source1, source2;
> -source1 = tcg_temp_new();
> -source2 = tcg_temp_new();
> -
> -gen_get_gpr(ctx, source1, a->rs1);
> -gen_get_gpr(ctx, source2, a->rs2);
> -tcg_gen_ext32u_tl(source1, source1);
> -tcg_gen_ext32u_tl(source2, source2);
> -
> -(*func)(source1, source1, source2);
> -
> -tcg_gen_ext32s_tl(source1, source1);
> -gen_set_gpr(ctx, a->rd, source1);
> -tcg_temp_free(source1);
> -tcg_temp_free(source2);
> -return true;
> -}
> -
>  static void gen_pack(TCGv ret, TCGv arg1, TCGv arg2)
>  {
>  tcg_gen_deposit_tl(ret, arg1, arg2,
> diff --git a/target/riscv/insn_trans/trans_rvm.c.inc 
> b/target/riscv/insn_trans/trans_rvm.c.inc
> index 013b3f7009..3d93b24c25 100644
> --- a/target/riscv/insn_trans/trans_rvm.c.inc
> +++ b/target/riscv/insn_trans/trans_rvm.c.inc
> @@ -99,30 +99,30 @@ static bool trans_divw(DisasContext *ctx, arg_divw *a)
>  {
>  REQUIRE_64BIT(ctx);
>  REQUIRE_EXT(ctx, RVM);
> -
> -return gen_arith_div_w(ctx, a, &gen_div);
> +ctx->w = true;
> +return gen_arith(ctx, a, EXT_SIGN, gen_div);
>  }
>
>  static bool trans_divuw(DisasContext *ctx, arg_divuw *a)
>  {
>  REQUIRE_64BIT(ctx);
>  REQUIRE_EXT(ctx, RVM);
> -
> -return gen_arith_div_uw(ctx, a, &gen_divu);
> +ctx->w = true;
> +return gen_arith(ctx, a, EXT_ZERO, gen_divu);
>  }
>
>  static bool trans_remw(DisasContext *ctx, arg_remw *a)
>  {
>  REQUIRE_64BIT(ctx);
>  REQUIRE_EXT(ctx, RVM);
> -
> -return gen_arith_div_w(ctx, a, &gen_rem);
> +ctx->w = true;
> +return gen_arith(ctx, a, EXT_SIGN, gen_rem);
>  }
>
>  static bool trans_remuw(DisasContext *ctx, arg_remuw *a)
>  {
>  REQUIRE_64BIT(ctx);
>  REQUIRE_EXT(ctx, RVM);
> -
> -return gen_arith_div_uw(ctx, a, &gen_remu);
> +ctx->w = true;
> +return gen_arith(ctx, a, EXT_ZERO, gen_remu);
>  }
> --
> 2.25.1
>
>

Re: [PATCH v2 05/21] target/riscv: Add DisasExtend to gen_arith*

2021-08-18 Thread Alistair Francis

On Wed, Aug 18, 2021 at 7:23 AM Richard Henderson
 wrote:
>
> Most arithmetic does not require extending the inputs.
> Exceptions include division, comparison and minmax.
>
> Begin using ctx->w, which allows elimination of gen_addw,
> gen_subw, gen_mulw.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/translate.c| 69 +++--
>  target/riscv/insn_trans/trans_rvb.c.inc | 30 +--
>  target/riscv/insn_trans/trans_rvi.c.inc | 39 --
>  target/riscv/insn_trans/trans_rvm.c.inc | 16 +++---
>  4 files changed, 64 insertions(+), 90 deletions(-)
>
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index d5cf5e5826..4819682bf1 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -229,7 +229,7 @@ static void gen_get_gpr(DisasContext *ctx, TCGv t, int 
> reg_num)
>  tcg_gen_mov_tl(t, get_gpr(ctx, reg_num, EXT_NONE));
>  }
>
> -static TCGv __attribute__((unused)) dest_gpr(DisasContext *ctx, int reg_num)
> +static TCGv dest_gpr(DisasContext *ctx, int reg_num)
>  {
>  if (reg_num == 0 || ctx->w) {
>  return temp_new(ctx);
> @@ -466,57 +466,31 @@ static int ex_rvc_shifti(DisasContext *ctx, int imm)
>  /* Include the auto-generated decoder for 32 bit insn */
>  #include "decode-insn32.c.inc"
>
> -static bool gen_arith_imm_fn(DisasContext *ctx, arg_i *a,
> +static bool gen_arith_imm_fn(DisasContext *ctx, arg_i *a, DisasExtend ext,
>   void (*func)(TCGv, TCGv, target_long))
>  {
> -TCGv source1;
> -source1 = tcg_temp_new();
> +TCGv dest = dest_gpr(ctx, a->rd);
> +TCGv src1 = get_gpr(ctx, a->rs1, ext);
>
> -gen_get_gpr(ctx, source1, a->rs1);
> +func(dest, src1, a->imm);
>
> -(*func)(source1, source1, a->imm);
> -
> -gen_set_gpr(ctx, a->rd, source1);
> -tcg_temp_free(source1);
> +gen_set_gpr(ctx, a->rd, dest);
>  return true;
>  }
>
> -static bool gen_arith_imm_tl(DisasContext *ctx, arg_i *a,
> +static bool gen_arith_imm_tl(DisasContext *ctx, arg_i *a, DisasExtend ext,
>   void (*func)(TCGv, TCGv, TCGv))
>  {
> -TCGv source1, source2;
> -source1 = tcg_temp_new();
> -source2 = tcg_temp_new();
> +TCGv dest = dest_gpr(ctx, a->rd);
> +TCGv src1 = get_gpr(ctx, a->rs1, ext);
> +TCGv src2 = tcg_constant_tl(a->imm);
>
> -gen_get_gpr(ctx, source1, a->rs1);
> -tcg_gen_movi_tl(source2, a->imm);
> +func(dest, src1, src2);
>
> -(*func)(source1, source1, source2);
> -
> -gen_set_gpr(ctx, a->rd, source1);
> -tcg_temp_free(source1);
> -tcg_temp_free(source2);
> +gen_set_gpr(ctx, a->rd, dest);
>  return true;
>  }
>
> -static void gen_addw(TCGv ret, TCGv arg1, TCGv arg2)
> -{
> -tcg_gen_add_tl(ret, arg1, arg2);
> -tcg_gen_ext32s_tl(ret, ret);
> -}
> -
> -static void gen_subw(TCGv ret, TCGv arg1, TCGv arg2)
> -{
> -tcg_gen_sub_tl(ret, arg1, arg2);
> -tcg_gen_ext32s_tl(ret, ret);
> -}
> -
> -static void gen_mulw(TCGv ret, TCGv arg1, TCGv arg2)
> -{
> -tcg_gen_mul_tl(ret, arg1, arg2);
> -tcg_gen_ext32s_tl(ret, ret);
> -}
> -
>  static bool gen_arith_div_w(DisasContext *ctx, arg_r *a,
>  void(*func)(TCGv, TCGv, TCGv))
>  {
> @@ -782,21 +756,16 @@ static void gen_add_uw(TCGv ret, TCGv arg1, TCGv arg2)
>  tcg_gen_add_tl(ret, arg1, arg2);
>  }
>
> -static bool gen_arith(DisasContext *ctx, arg_r *a,
> -  void(*func)(TCGv, TCGv, TCGv))
> +static bool gen_arith(DisasContext *ctx, arg_r *a, DisasExtend ext,
> +  void (*func)(TCGv, TCGv, TCGv))
>  {
> -TCGv source1, source2;
> -source1 = tcg_temp_new();
> -source2 = tcg_temp_new();
> +TCGv dest = dest_gpr(ctx, a->rd);
> +TCGv src1 = get_gpr(ctx, a->rs1, ext);
> +TCGv src2 = get_gpr(ctx, a->rs2, ext);
>
> -gen_get_gpr(ctx, source1, a->rs1);
> -gen_get_gpr(ctx, source2, a->rs2);
> +func(dest, src1, src2);
>
> -(*func)(source1, source1, source2);
> -
> -gen_set_gpr(ctx, a->rd, source1);
> -tcg_temp_free(source1);
> -tcg_temp_free(source2);
> +gen_set_gpr(ctx, a->rd, dest);
>  return true;
>  }
>
> diff --git a/target/riscv/insn_trans/trans_rvb.c.inc 
> b/target/riscv/insn_trans/trans_rvb.c.inc
> index 260e15b47d..217a7d1f26 100644
> --- a/target/riscv/insn_trans/trans_rvb.c.inc
> +++ b/target/riscv/insn_trans/trans_rvb.c.inc
> @@ -38,61 +38,61 @@ static bool trans_cpop(DisasContext *ctx, arg_cpop *a)
>  static bool trans_andn(DisasContext *ctx, arg_andn *a)
>  {
>  REQUIRE_EXT(ctx, RVB);
> -return gen_arith(ctx, a, tcg_gen_andc_tl);
> +return gen_arith(ctx, a, EXT_NONE, tcg_gen_andc_tl);
>  }
>
>  static bool trans_orn(DisasContext *ctx, arg_orn *a)
>  {
>  REQUIRE_EXT(ctx, RVB);
> -return gen_arith(ctx, a, tcg_gen_orc_tl);
> +return gen_arith(ctx, a, EXT_NONE, tcg_gen_orc_tl);
>  }
>
>  static bool trans_xnor(Dis

Re: [PATCH v2 04/21] target/riscv: Introduce DisasExtend and new helpers

2021-08-18 Thread Alistair Francis

On Wed, Aug 18, 2021 at 7:23 AM Richard Henderson
 wrote:
>
> Introduce get_gpr, dest_gpr, temp_new -- new helpers that do not force
> tcg globals into temps, returning a constant 0 for $zero as source and
> a new temp for $zero as destination.
>
> Introduce ctx->w for simplifying word operations, such as addw.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/translate.c | 102 +++
>  1 file changed, 82 insertions(+), 20 deletions(-)
>
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index d540c85a1a..d5cf5e5826 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -39,15 +39,25 @@ static TCGv load_val;
>
>  #include "exec/gen-icount.h"
>
> +/*
> + * If an operation is being performed on less than TARGET_LONG_BITS,
> + * it may require the inputs to be sign- or zero-extended; which will
> + * depend on the exact operation being performed.
> + */
> +typedef enum {
> +EXT_NONE,
> +EXT_SIGN,
> +EXT_ZERO,
> +} DisasExtend;
> +
>  typedef struct DisasContext {
>  DisasContextBase base;
>  /* pc_succ_insn points to the instruction following base.pc_next */
>  target_ulong pc_succ_insn;
>  target_ulong priv_ver;
> -bool virt_enabled;
> +target_ulong misa;
>  uint32_t opcode;
>  uint32_t mstatus_fs;
> -target_ulong misa;
>  uint32_t mem_idx;
>  /* Remember the rounding mode encoded in the previous fp instruction,
> which we have already installed into env->fp_status.  Or -1 for
> @@ -55,6 +65,8 @@ typedef struct DisasContext {
> to any system register, which includes CSR_FRM, so we do not have
> to reset this known value.  */
>  int frm;
> +bool w;
> +bool virt_enabled;
>  bool ext_ifencei;
>  bool hlsx;
>  /* vector extension */
> @@ -64,7 +76,10 @@ typedef struct DisasContext {
>  uint16_t vlen;
>  uint16_t mlen;
>  bool vl_eq_vlmax;
> +uint8_t ntemp;
>  CPUState *cs;
> +TCGv zero;
> +TCGv temp[4];
>  } DisasContext;
>
>  static inline bool has_ext(DisasContext *ctx, uint32_t ext)
> @@ -172,27 +187,64 @@ static void gen_goto_tb(DisasContext *ctx, int n, 
> target_ulong dest)
>  }
>  }
>
> -/* Wrapper for getting reg values - need to check of reg is zero since
> - * cpu_gpr[0] is not actually allocated
> +/*
> + * Wrappers for getting reg values.
> + *
> + * The $zero register does not have cpu_gpr[0] allocated -- we supply the
> + * constant zero as a source, and an uninitialized sink as destination.
> + *
> + * Further, we may provide an extension for word operations.
>   */
> -static void gen_get_gpr(DisasContext *ctx, TCGv t, int reg_num)
> +static TCGv temp_new(DisasContext *ctx)
>  {
> -if (reg_num == 0) {
> -tcg_gen_movi_tl(t, 0);
> -} else {
> -tcg_gen_mov_tl(t, cpu_gpr[reg_num]);
> -}
> +assert(ctx->ntemp < ARRAY_SIZE(ctx->temp));
> +return ctx->temp[ctx->ntemp++] = tcg_temp_new();
>  }
>
> -/* Wrapper for setting reg values - need to check of reg is zero since
> - * cpu_gpr[0] is not actually allocated. this is more for safety purposes,
> - * since we usually avoid calling the OP_TYPE_gen function if we see a write 
> to
> - * $zero
> - */
> -static void gen_set_gpr(DisasContext *ctx, int reg_num_dst, TCGv t)
> +static TCGv get_gpr(DisasContext *ctx, int reg_num, DisasExtend ext)
>  {
> -if (reg_num_dst != 0) {
> -tcg_gen_mov_tl(cpu_gpr[reg_num_dst], t);
> +TCGv t;
> +
> +if (reg_num == 0) {
> +return ctx->zero;
> +}
> +
> +switch (ctx->w ? ext : EXT_NONE) {
> +case EXT_NONE:
> +return cpu_gpr[reg_num];
> +case EXT_SIGN:
> +t = temp_new(ctx);
> +tcg_gen_ext32s_tl(t, cpu_gpr[reg_num]);
> +return t;
> +case EXT_ZERO:
> +t = temp_new(ctx);
> +tcg_gen_ext32u_tl(t, cpu_gpr[reg_num]);
> +return t;
> +}
> +g_assert_not_reached();
> +}
> +
> +static void gen_get_gpr(DisasContext *ctx, TCGv t, int reg_num)
> +{
> +tcg_gen_mov_tl(t, get_gpr(ctx, reg_num, EXT_NONE));
> +}
> +
> +static TCGv __attribute__((unused)) dest_gpr(DisasContext *ctx, int reg_num)
> +{
> +if (reg_num == 0 || ctx->w) {
> +return temp_new(ctx);
> +}
> +return cpu_gpr[reg_num];
> +}
> +
> +static void gen_set_gpr(DisasContext *ctx, int reg_num, TCGv t)
> +{
> +if (reg_num != 0) {
> +if (ctx->w) {
> +tcg_gen_ext32s_tl(cpu_gpr[reg_num], t);
> +} else {
> +tcg_gen_mov_tl(cpu_gpr[reg_num], t);
> +}
>  }
>  }
>
> @@ -927,8 +979,11 @@ static void riscv_tr_init_disas_context(DisasContextBase 
> *dcbase, CPUState *cs)
>  ctx->cs = cs;
>  }
>
> -static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
> +static void riscv_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
>  {
> +DisasContext *ctx = container_of(dcbase, DisasContext, base);
> +

Re: [PATCH v2 14/21] target/riscv: Use {get, dest}_gpr for integer load/store

2021-08-18 Thread Bin Meng

On Wed, Aug 18, 2021 at 5:29 AM Richard Henderson
 wrote:
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/insn_trans/trans_rvi.c.inc | 36 +
>  1 file changed, 19 insertions(+), 17 deletions(-)
>

Reviewed-by: Bin Meng

Re: [PATCH v2 03/21] target/riscv: Add DisasContext to gen_get_gpr, gen_set_gpr

2021-08-18 Thread Alistair Francis

On Wed, Aug 18, 2021 at 7:21 AM Richard Henderson
 wrote:
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/translate.c| 58 -
>  target/riscv/insn_trans/trans_rva.c.inc | 18 
>  target/riscv/insn_trans/trans_rvb.c.inc |  4 +-
>  target/riscv/insn_trans/trans_rvd.c.inc | 32 +++---
>  target/riscv/insn_trans/trans_rvf.c.inc | 32 +++---
>  target/riscv/insn_trans/trans_rvh.c.inc | 52 +++---
>  target/riscv/insn_trans/trans_rvi.c.inc | 44 +--
>  target/riscv/insn_trans/trans_rvm.c.inc | 12 ++---
>  target/riscv/insn_trans/trans_rvv.c.inc | 36 +++
>  9 files changed, 144 insertions(+), 144 deletions(-)
>
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 6ae7e140d0..d540c85a1a 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -175,7 +175,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, 
> target_ulong dest)
>  /* Wrapper for getting reg values - need to check of reg is zero since
>   * cpu_gpr[0] is not actually allocated
>   */
> -static inline void gen_get_gpr(TCGv t, int reg_num)
> +static void gen_get_gpr(DisasContext *ctx, TCGv t, int reg_num)
>  {
>  if (reg_num == 0) {
>  tcg_gen_movi_tl(t, 0);
> @@ -189,7 +189,7 @@ static inline void gen_get_gpr(TCGv t, int reg_num)
>   * since we usually avoid calling the OP_TYPE_gen function if we see a write 
> to
>   * $zero
>   */
> -static inline void gen_set_gpr(int reg_num_dst, TCGv t)
> +static void gen_set_gpr(DisasContext *ctx, int reg_num_dst, TCGv t)
>  {
>  if (reg_num_dst != 0) {
>  tcg_gen_mov_tl(cpu_gpr[reg_num_dst], t);
> @@ -420,11 +420,11 @@ static bool gen_arith_imm_fn(DisasContext *ctx, arg_i 
> *a,
>  TCGv source1;
>  source1 = tcg_temp_new();
>
> -gen_get_gpr(source1, a->rs1);
> +gen_get_gpr(ctx, source1, a->rs1);
>
>  (*func)(source1, source1, a->imm);
>
> -gen_set_gpr(a->rd, source1);
> +gen_set_gpr(ctx, a->rd, source1);
>  tcg_temp_free(source1);
>  return true;
>  }
> @@ -436,12 +436,12 @@ static bool gen_arith_imm_tl(DisasContext *ctx, arg_i 
> *a,
>  source1 = tcg_temp_new();
>  source2 = tcg_temp_new();
>
> -gen_get_gpr(source1, a->rs1);
> +gen_get_gpr(ctx, source1, a->rs1);
>  tcg_gen_movi_tl(source2, a->imm);
>
>  (*func)(source1, source1, source2);
>
> -gen_set_gpr(a->rd, source1);
> +gen_set_gpr(ctx, a->rd, source1);
>  tcg_temp_free(source1);
>  tcg_temp_free(source2);
>  return true;
> @@ -472,15 +472,15 @@ static bool gen_arith_div_w(DisasContext *ctx, arg_r *a,
>  source1 = tcg_temp_new();
>  source2 = tcg_temp_new();
>
> -gen_get_gpr(source1, a->rs1);
> -gen_get_gpr(source2, a->rs2);
> +gen_get_gpr(ctx, source1, a->rs1);
> +gen_get_gpr(ctx, source2, a->rs2);
>  tcg_gen_ext32s_tl(source1, source1);
>  tcg_gen_ext32s_tl(source2, source2);
>
>  (*func)(source1, source1, source2);
>
>  tcg_gen_ext32s_tl(source1, source1);
> -gen_set_gpr(a->rd, source1);
> +gen_set_gpr(ctx, a->rd, source1);
>  tcg_temp_free(source1);
>  tcg_temp_free(source2);
>  return true;
> @@ -493,15 +493,15 @@ static bool gen_arith_div_uw(DisasContext *ctx, arg_r 
> *a,
>  source1 = tcg_temp_new();
>  source2 = tcg_temp_new();
>
> -gen_get_gpr(source1, a->rs1);
> -gen_get_gpr(source2, a->rs2);
> +gen_get_gpr(ctx, source1, a->rs1);
> +gen_get_gpr(ctx, source2, a->rs2);
>  tcg_gen_ext32u_tl(source1, source1);
>  tcg_gen_ext32u_tl(source2, source2);
>
>  (*func)(source1, source1, source2);
>
>  tcg_gen_ext32s_tl(source1, source1);
> -gen_set_gpr(a->rd, source1);
> +gen_set_gpr(ctx, a->rd, source1);
>  tcg_temp_free(source1);
>  tcg_temp_free(source2);
>  return true;
> @@ -591,7 +591,7 @@ static bool gen_grevi(DisasContext *ctx, arg_grevi *a)
>  TCGv source1 = tcg_temp_new();
>  TCGv source2;
>
> -gen_get_gpr(source1, a->rs1);
> +gen_get_gpr(ctx, source1, a->rs1);
>
>  if (a->shamt == (TARGET_LONG_BITS - 8)) {
>  /* rev8, byte swaps */
> @@ -603,7 +603,7 @@ static bool gen_grevi(DisasContext *ctx, arg_grevi *a)
>  tcg_temp_free(source2);
>  }
>
> -gen_set_gpr(a->rd, source1);
> +gen_set_gpr(ctx, a->rd, source1);
>  tcg_temp_free(source1);
>  return true;
>  }
> @@ -737,12 +737,12 @@ static bool gen_arith(DisasContext *ctx, arg_r *a,
>  source1 = tcg_temp_new();
>  source2 = tcg_temp_new();
>
> -gen_get_gpr(source1, a->rs1);
> -gen_get_gpr(source2, a->rs2);
> +gen_get_gpr(ctx, source1, a->rs1);
> +gen_get_gpr(ctx, source2, a->rs2);
>
>  (*func)(source1, source1, source2);
>
> -gen_set_gpr(a->rd, source1);
> +gen_set_gpr(ctx, a->rd, source1);
>  tcg_temp_free(source1);
>  tcg_temp_free(source2);
>  return true;
> @@ -754,13 +754,13

Re: [PATCH v2 13/21] target/riscv: Use get_gpr in branches

2021-08-18 Thread Bin Meng

On Wed, Aug 18, 2021 at 5:26 AM Richard Henderson
 wrote:
>
> Narrow the scope of t0 in trans_jalr.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/insn_trans/trans_rvi.c.inc | 25 ++---
>  1 file changed, 10 insertions(+), 15 deletions(-)
>

Reviewed-by: Bin Meng

Re: [PATCH v3 58/66] include/exec: Move cpu_signal_handler declaration

2021-08-18 Thread Alistair Francis

On Thu, Aug 19, 2021 at 6:14 AM Richard Henderson
 wrote:
>
> There is nothing target specific about this.  The implementation
> is host specific, but the declaration is 100% common.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  include/exec/exec-all.h | 13 +
>  target/alpha/cpu.h  |  6 --
>  target/arm/cpu.h|  7 ---
>  target/avr/cpu.h|  2 --
>  target/cris/cpu.h   |  8 
>  target/hexagon/cpu.h|  3 ---
>  target/hppa/cpu.h   |  3 ---
>  target/i386/cpu.h   |  7 ---
>  target/m68k/cpu.h   |  8 
>  target/microblaze/cpu.h |  7 ---
>  target/mips/cpu.h   |  3 ---
>  target/mips/internal.h  |  2 --
>  target/nios2/cpu.h  |  2 --
>  target/openrisc/cpu.h   |  2 --
>  target/ppc/cpu.h|  7 ---
>  target/riscv/cpu.h  |  2 --
>  target/rx/cpu.h |  4 
>  target/s390x/cpu.h  |  7 ---
>  target/sh4/cpu.h|  3 ---
>  target/sparc/cpu.h  |  2 --
>  target/tricore/cpu.h|  2 --
>  target/xtensa/cpu.h |  2 --
>  22 files changed, 13 insertions(+), 89 deletions(-)
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 5d1b6d80fb..9d5987ba04 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -662,6 +662,19 @@ static inline tb_page_addr_t 
> get_page_addr_code_hostp(CPUArchState *env,
>  }
>  return addr;
>  }
> +
> +/**
> + * cpu_signal_handler
> + * @signum: host signal number
> + * @pinfo: host siginfo_t
> + * @puc: host ucontext_t
> + *
> + * To be called from the SIGBUS and SIGSEGV signal handler to inform the
> + * virtual cpu of exceptions.  Returns true if the signal was handled by
> + * the virtual CPU.
> + */
> +int cpu_signal_handler(int signum, void *pinfo, void *puc);
> +
>  #else
>  static inline void mmap_lock(void) {}
>  static inline void mmap_unlock(void) {}
> diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
> index d9099ea188..dfa34f93b4 100644
> --- a/target/alpha/cpu.h
> +++ b/target/alpha/cpu.h
> @@ -287,7 +287,6 @@ void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr 
> addr,
> uintptr_t retaddr) QEMU_NORETURN;
>
>  #define cpu_list alpha_cpu_list
> -#define cpu_signal_handler cpu_alpha_signal_handler
>
>  typedef CPUAlphaState CPUArchState;
>  typedef AlphaCPU ArchCPU;
> @@ -442,11 +441,6 @@ void alpha_translate_init(void);
>  #define CPU_RESOLVING_TYPE TYPE_ALPHA_CPU
>
>  void alpha_cpu_list(void);
> -/* you can call this signal handler from your SIGBUS and SIGSEGV
> -   signal handlers to inform the virtual CPU of exceptions. non zero
> -   is returned if the signal was handled by the virtual CPU.  */
> -int cpu_alpha_signal_handler(int host_signum, void *pinfo,
> - void *puc);
>  bool alpha_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
>  MMUAccessType access_type, int mmu_idx,
>  bool probe, uintptr_t retaddr);
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 9f0a5f84d5..48f0cc490e 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -1117,12 +1117,6 @@ static inline bool is_a64(CPUARMState *env)
>  return env->aarch64;
>  }
>
> -/* you can call this signal handler from your SIGBUS and SIGSEGV
> -   signal handlers to inform the virtual CPU of exceptions. non zero
> -   is returned if the signal was handled by the virtual CPU.  */
> -int cpu_arm_signal_handler(int host_signum, void *pinfo,
> -   void *puc);
> -
>  /**
>   * pmu_op_start/finish
>   * @env: CPUARMState
> @@ -3002,7 +2996,6 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
>  #define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
>  #define CPU_RESOLVING_TYPE TYPE_ARM_CPU
>
> -#define cpu_signal_handler cpu_arm_signal_handler
>  #define cpu_list arm_cpu_list
>
>  /* ARM has the following "translation regimes" (as the ARM ARM calls them):
> diff --git a/target/avr/cpu.h b/target/avr/cpu.h
> index 93e3faa0a9..dceacf3cd7 100644
> --- a/target/avr/cpu.h
> +++ b/target/avr/cpu.h
> @@ -175,7 +175,6 @@ static inline void set_avr_feature(CPUAVRState *env, int 
> feature)
>  }
>
>  #define cpu_list avr_cpu_list
> -#define cpu_signal_handler cpu_avr_signal_handler
>  #define cpu_mmu_index avr_cpu_mmu_index
>
>  static inline int avr_cpu_mmu_index(CPUAVRState *env, bool ifetch)
> @@ -187,7 +186,6 @@ void avr_cpu_tcg_init(void);
>
>  void avr_cpu_list(void);
>  int cpu_avr_exec(CPUState *cpu);
> -int cpu_avr_signal_handler(int host_signum, void *pinfo, void *puc);
>  int avr_cpu_memory_rw_debug(CPUState *cs, vaddr address, uint8_t *buf,
>  int len, bool is_write);
>
> diff --git a/target/cris/cpu.h b/target/cris/cpu.h
> index d3b6492909..c87987e95c 100644
> --- a/target/cris/cpu.h
> +++ b/target/cris/cpu.h
> @@ -199,12 +199,6 @@ int crisv10_cpu_gdb_read_register(CPUState *cpu, 
> GByteArray *buf,

Re: [PATCH v3 24/66] tcg: Rename TCGMemOpIdx to MemOpIdx

2021-08-18 Thread Alistair Francis

On Thu, Aug 19, 2021 at 5:45 AM Richard Henderson
 wrote:
>
> We're about to move this out of tcg.h, so rename it
> as we did when moving MemOp.
>
> Reviewed-by: Philippe Mathieu-Daudé 
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  accel/tcg/atomic_template.h   | 24 +--
>  include/tcg/tcg.h | 74 -
>  accel/tcg/cputlb.c| 78 +--
>  accel/tcg/user-exec.c |  2 +-
>  target/arm/helper-a64.c   | 16 +++
>  target/arm/m_helper.c |  2 +-
>  target/i386/tcg/mem_helper.c  |  4 +-
>  target/m68k/op_helper.c   |  2 +-
>  target/mips/tcg/msa_helper.c  |  6 +--
>  target/s390x/tcg/mem_helper.c | 20 -
>  target/sparc/ldst_helper.c|  2 +-
>  tcg/optimize.c|  2 +-
>  tcg/tcg-op.c  | 12 +++---
>  tcg/tcg.c |  2 +-
>  tcg/tci.c | 14 +++
>  accel/tcg/atomic_common.c.inc |  6 +--
>  tcg/aarch64/tcg-target.c.inc  | 14 +++
>  tcg/arm/tcg-target.c.inc  | 10 ++---
>  tcg/i386/tcg-target.c.inc | 10 ++---
>  tcg/mips/tcg-target.c.inc | 12 +++---
>  tcg/ppc/tcg-target.c.inc  | 10 ++---
>  tcg/riscv/tcg-target.c.inc| 16 +++
>  tcg/s390/tcg-target.c.inc | 10 ++---
>  tcg/sparc/tcg-target.c.inc|  4 +-
>  tcg/tcg-ldst.c.inc|  2 +-
>  25 files changed, 177 insertions(+), 177 deletions(-)
>
> diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
> index 8098a1be31..4230ff2957 100644
> --- a/accel/tcg/atomic_template.h
> +++ b/accel/tcg/atomic_template.h
> @@ -72,7 +72,7 @@
>
>  ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
>ABI_TYPE cmpv, ABI_TYPE newv,
> -  TCGMemOpIdx oi, uintptr_t retaddr)
> +  MemOpIdx oi, uintptr_t retaddr)
>  {
>  DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
>   PAGE_READ | PAGE_WRITE, retaddr);
> @@ -92,7 +92,7 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, 
> target_ulong addr,
>  #if DATA_SIZE >= 16
>  #if HAVE_ATOMIC128
>  ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
> - TCGMemOpIdx oi, uintptr_t retaddr)
> + MemOpIdx oi, uintptr_t retaddr)
>  {
>  DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
>   PAGE_READ, retaddr);
> @@ -106,7 +106,7 @@ ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong 
> addr,
>  }
>
>  void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
> - TCGMemOpIdx oi, uintptr_t retaddr)
> + MemOpIdx oi, uintptr_t retaddr)
>  {
>  DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
>   PAGE_WRITE, retaddr);
> @@ -119,7 +119,7 @@ void ATOMIC_NAME(st)(CPUArchState *env, target_ulong 
> addr, ABI_TYPE val,
>  #endif
>  #else
>  ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE 
> val,
> -   TCGMemOpIdx oi, uintptr_t retaddr)
> +   MemOpIdx oi, uintptr_t retaddr)
>  {
>  DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
>   PAGE_READ | PAGE_WRITE, retaddr);
> @@ -134,7 +134,7 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, 
> target_ulong addr, ABI_TYPE val,
>
>  #define GEN_ATOMIC_HELPER(X)\
>  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,   \
> -ABI_TYPE val, TCGMemOpIdx oi, uintptr_t retaddr) \
> +ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
>  {   \
>  DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,  \
>   PAGE_READ | PAGE_WRITE, retaddr); \
> @@ -167,7 +167,7 @@ GEN_ATOMIC_HELPER(xor_fetch)
>   */
>  #define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET)\
>  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,   \
> -ABI_TYPE xval, TCGMemOpIdx oi, uintptr_t retaddr) \
> +ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
>  {   \
>  XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
>PAGE_READ | PAGE_WRITE, retaddr); \
> @@ -211,7 +211,7 @@ GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)
>
>  ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
>ABI_TYPE cmpv, ABI_TYPE newv,
> -  TCGMemOpIdx oi, uintptr_t retaddr)
> +

Re: [PATCH v3 23/66] tcg: Expand MO_SIZE to 3 bits

2021-08-18 Thread Alistair Francis

On Thu, Aug 19, 2021 at 5:41 AM Richard Henderson
 wrote:
>
> We have lacked expressive support for memory sizes larger
> than 64-bits for a while.  Fixing that requires adjustment
> to several points where we used this for array indexing,
> and two places that develop -Wswitch warnings after the change.
>
> Reviewed-by: Philippe Mathieu-Daudé 
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  include/exec/memop.h| 14 +-
>  target/arm/translate-a64.c  |  2 +-
>  tcg/tcg-op.c| 13 -
>  target/s390x/tcg/translate_vx.c.inc |  2 +-
>  tcg/aarch64/tcg-target.c.inc|  4 ++--
>  tcg/arm/tcg-target.c.inc|  4 ++--
>  tcg/i386/tcg-target.c.inc   |  4 ++--
>  tcg/mips/tcg-target.c.inc   |  4 ++--
>  tcg/ppc/tcg-target.c.inc|  8 
>  tcg/riscv/tcg-target.c.inc  |  4 ++--
>  tcg/s390/tcg-target.c.inc   |  4 ++--
>  tcg/sparc/tcg-target.c.inc  | 16 
>  12 files changed, 43 insertions(+), 36 deletions(-)
>
> diff --git a/include/exec/memop.h b/include/exec/memop.h
> index 529d07b02d..04264ffd6b 100644
> --- a/include/exec/memop.h
> +++ b/include/exec/memop.h
> @@ -19,11 +19,15 @@ typedef enum MemOp {
>  MO_16= 1,
>  MO_32= 2,
>  MO_64= 3,
> -MO_SIZE  = 3,   /* Mask for the above.  */
> +MO_128   = 4,
> +MO_256   = 5,
> +MO_512   = 6,
> +MO_1024  = 7,
> +MO_SIZE  = 0x07,   /* Mask for the above.  */
>
> -MO_SIGN  = 4,   /* Sign-extended, otherwise zero-extended.  */
> +MO_SIGN  = 0x08,   /* Sign-extended, otherwise zero-extended.  */
>
> -MO_BSWAP = 8,   /* Host reverse endian.  */
> +MO_BSWAP = 0x10,   /* Host reverse endian.  */
>  #ifdef HOST_WORDS_BIGENDIAN
>  MO_LE= MO_BSWAP,
>  MO_BE= 0,
> @@ -59,8 +63,8 @@ typedef enum MemOp {
>   * - an alignment to a specified size, which may be more or less than
>   *   the access size (MO_ALIGN_x where 'x' is a size in bytes);
>   */
> -MO_ASHIFT = 4,
> -MO_AMASK = 7 << MO_ASHIFT,
> +MO_ASHIFT = 5,
> +MO_AMASK = 0x7 << MO_ASHIFT,
>  #ifdef NEED_CPU_H
>  #ifdef TARGET_ALIGNED_ONLY
>  MO_ALIGN = 0,
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 422e2ac0c9..247c9672be 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -1045,7 +1045,7 @@ static void read_vec_element(DisasContext *s, TCGv_i64 
> tcg_dest, int srcidx,
>   int element, MemOp memop)
>  {
>  int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
> -switch (memop) {
> +switch ((unsigned)memop) {
>  case MO_8:
>  tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
>  break;
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index c754396575..e01f68f44d 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -2780,10 +2780,13 @@ static inline MemOp tcg_canonicalize_memop(MemOp op, 
> bool is64, bool st)
>  }
>  break;
>  case MO_64:
> -if (!is64) {
> -tcg_abort();
> +if (is64) {
> +op &= ~MO_SIGN;
> +break;
>  }
> -break;
> +/* fall through */
> +default:
> +g_assert_not_reached();
>  }
>  if (st) {
>  op &= ~MO_SIGN;
> @@ -3095,7 +3098,7 @@ typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, 
> TCGv,
>  # define WITH_ATOMIC64(X)
>  #endif
>
> -static void * const table_cmpxchg[16] = {
> +static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
>  [MO_8] = gen_helper_atomic_cmpxchgb,
>  [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
>  [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
> @@ -3297,7 +3300,7 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, 
> TCGv_i64 val,
>  }
>
>  #define GEN_ATOMIC_HELPER(NAME, OP, NEW)\
> -static void * const table_##NAME[16] = {\
> +static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = {  \
>  [MO_8] = gen_helper_atomic_##NAME##b,   \
>  [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,   \
>  [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,   \
> diff --git a/target/s390x/tcg/translate_vx.c.inc 
> b/target/s390x/tcg/translate_vx.c.inc
> index 0afa46e463..28bf5a23b6 100644
> --- a/target/s390x/tcg/translate_vx.c.inc
> +++ b/target/s390x/tcg/translate_vx.c.inc
> @@ -67,7 +67,7 @@ static void read_vec_element_i64(TCGv_i64 dst, uint8_t reg, 
> uint8_t enr,
>  {
>  const int offs = vec_reg_offset(reg, enr, memop & MO_SIZE);
>
> -switch (memop) {
> +switch ((unsigned)memop) {
>  case ES_8:
>  tcg_gen_ld8u_i64(dst, cpu_env, offs);
>  break;
> diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
> index 5924977b42

Re: [PATCH v3 02/66] hw/core: Make do_unaligned_access noreturn

2021-08-18 Thread Alistair Francis

On Thu, Aug 19, 2021 at 5:23 AM Richard Henderson
 wrote:
>
> While we may have had some thought of allowing system-mode
> to return from this hook, we have no guests that require this.
>
> Reviewed-by: Alex Bennée 
> Reviewed-by: Philippe Mathieu-Daudé 
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  include/hw/core/tcg-cpu-ops.h  | 3 ++-
>  target/alpha/cpu.h | 4 ++--
>  target/arm/internals.h | 2 +-
>  target/microblaze/cpu.h| 2 +-
>  target/mips/tcg/tcg-internal.h | 4 ++--
>  target/nios2/cpu.h | 4 ++--
>  target/ppc/internal.h  | 4 ++--
>  target/riscv/cpu.h | 2 +-
>  target/s390x/s390x-internal.h  | 4 ++--
>  target/sh4/cpu.h   | 4 ++--
>  target/xtensa/cpu.h| 4 ++--
>  target/hppa/cpu.c  | 7 ---
>  12 files changed, 23 insertions(+), 21 deletions(-)
>
> diff --git a/include/hw/core/tcg-cpu-ops.h b/include/hw/core/tcg-cpu-ops.h
> index eab27d0c03..ee0795def4 100644
> --- a/include/hw/core/tcg-cpu-ops.h
> +++ b/include/hw/core/tcg-cpu-ops.h
> @@ -72,10 +72,11 @@ struct TCGCPUOps {
>MemTxResult response, uintptr_t retaddr);
>  /**
>   * @do_unaligned_access: Callback for unaligned access handling
> + * The callback must exit via raising an exception.
>   */
>  void (*do_unaligned_access)(CPUState *cpu, vaddr addr,
>  MMUAccessType access_type,
> -int mmu_idx, uintptr_t retaddr);
> +int mmu_idx, uintptr_t retaddr) 
> QEMU_NORETURN;
>
>  /**
>   * @adjust_watchpoint_address: hack for cpu_check_watchpoint used by ARM
> diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h
> index 82df108967..6eb3fcc63e 100644
> --- a/target/alpha/cpu.h
> +++ b/target/alpha/cpu.h
> @@ -283,8 +283,8 @@ hwaddr alpha_cpu_get_phys_page_debug(CPUState *cpu, vaddr 
> addr);
>  int alpha_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
>  int alpha_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
>  void alpha_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
> -   MMUAccessType access_type,
> -   int mmu_idx, uintptr_t retaddr);
> +   MMUAccessType access_type, int mmu_idx,
> +   uintptr_t retaddr) QEMU_NORETURN;
>
>  #define cpu_list alpha_cpu_list
>  #define cpu_signal_handler cpu_alpha_signal_handler
> diff --git a/target/arm/internals.h b/target/arm/internals.h
> index cd2ea8a388..8a77929793 100644
> --- a/target/arm/internals.h
> +++ b/target/arm/internals.h
> @@ -594,7 +594,7 @@ bool arm_s1_regime_using_lpae_format(CPUARMState *env, 
> ARMMMUIdx mmu_idx);
>  /* Raise a data fault alignment exception for the specified virtual address 
> */
>  void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
>   MMUAccessType access_type,
> - int mmu_idx, uintptr_t retaddr);
> + int mmu_idx, uintptr_t retaddr) 
> QEMU_NORETURN;
>
>  /* arm_cpu_do_transaction_failed: handle a memory system error response
>   * (eg "no device/memory present at address") by raising an external abort
> diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h
> index e4bba8a755..620c3742e1 100644
> --- a/target/microblaze/cpu.h
> +++ b/target/microblaze/cpu.h
> @@ -359,7 +359,7 @@ void mb_cpu_do_interrupt(CPUState *cs);
>  bool mb_cpu_exec_interrupt(CPUState *cs, int int_req);
>  void mb_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
>  MMUAccessType access_type,
> -int mmu_idx, uintptr_t retaddr);
> +int mmu_idx, uintptr_t retaddr) 
> QEMU_NORETURN;
>  void mb_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
>  hwaddr mb_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr,
>  MemTxAttrs *attrs);
> diff --git a/target/mips/tcg/tcg-internal.h b/target/mips/tcg/tcg-internal.h
> index 81b14eb219..7ac1e578d1 100644
> --- a/target/mips/tcg/tcg-internal.h
> +++ b/target/mips/tcg/tcg-internal.h
> @@ -24,8 +24,8 @@ bool mips_cpu_tlb_fill(CPUState *cs, vaddr address, int 
> size,
> MMUAccessType access_type, int mmu_idx,
> bool probe, uintptr_t retaddr);
>  void mips_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
> -  MMUAccessType access_type,
> -  int mmu_idx, uintptr_t retaddr);
> +  MMUAccessType access_type, int mmu_idx,
> +  uintptr_t retaddr) QEMU_NORETURN;
>
>  const char *mips_exception_name(int32_t exception);
>
> diff --git a/target/nios2/cpu.h b/target/nios2/cpu.h
> index 2ab82fdc71

Re: [PATCH RFC v6 08/12] target/riscv: Handle KVM_EXIT_RISCV_SBI exit

2021-08-18 Thread Alistair Francis

On Tue, Aug 17, 2021 at 1:25 PM Yifei Jiang  wrote:
>
> Use char-fe to handle console sbi call, which implement early
> console io while apply 'earlycon=sbi' into kernel parameters.
>
> Signed-off-by: Yifei Jiang 
> Signed-off-by: Mingwang Li 
> ---
>  target/riscv/kvm.c | 42 -
>  target/riscv/sbi_ecall_interface.h | 72 ++
>  2 files changed, 113 insertions(+), 1 deletion(-)
>  create mode 100644 target/riscv/sbi_ecall_interface.h
>
> diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
> index bc9cb5d8f9..a68f31c2f3 100644
> --- a/target/riscv/kvm.c
> +++ b/target/riscv/kvm.c
> @@ -38,6 +38,8 @@
>  #include "qemu/log.h"
>  #include "hw/loader.h"
>  #include "kvm_riscv.h"
> +#include "sbi_ecall_interface.h"
> +#include "chardev/char-fe.h"
>
>  static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t 
> idx)
>  {
> @@ -435,9 +437,47 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs)
>  return true;
>  }
>
> +static int kvm_riscv_handle_sbi(struct kvm_run *run)
> +{
> +int ret = 0;
> +unsigned char ch;
> +switch (run->riscv_sbi.extension_id) {
> +case SBI_EXT_0_1_CONSOLE_PUTCHAR:
> +ch = run->riscv_sbi.args[0];
> +qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch));
> +break;
> +case SBI_EXT_0_1_CONSOLE_GETCHAR:
> +ret = qemu_chr_fe_read_all(serial_hd(0)->be, &ch, sizeof(ch));
> +if (ret == sizeof(ch)) {
> +run->riscv_sbi.args[0] = ch;
> +} else {
> +run->riscv_sbi.args[0] = -1;
> +}
> +break;

These have been deprecated (see
https://github.com/riscv/riscv-sbi-doc/blob/master/riscv-sbi.adoc#4-legacy-extensions-eids-0x00---0x0f),
is it even worth supporting them?

> +default:
> +qemu_log_mask(LOG_UNIMP,
> +  "%s: un-handled SBI EXIT, specific reasons is %lu\n",
> +  __func__, run->riscv_sbi.extension_id);
> +ret = -1;
> +break;
> +}
> +return ret;
> +}
> +
>  int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>  {
> -return 0;
> +int ret = 0;
> +switch (run->exit_reason) {
> +case KVM_EXIT_RISCV_SBI:
> +ret = kvm_riscv_handle_sbi(run);
> +break;
> +default:
> +qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
> +  __func__, run->exit_reason);
> +ret = -1;
> +break;
> +}
> +return ret;
>  }
>
>  void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
> diff --git a/target/riscv/sbi_ecall_interface.h 
> b/target/riscv/sbi_ecall_interface.h
> new file mode 100644
> index 00..fb1a3fa8f2
> --- /dev/null
> +++ b/target/riscv/sbi_ecall_interface.h
> @@ -0,0 +1,72 @@
> +/*
> + * SPDX-License-Identifier: BSD-2-Clause
> + *
> + * Copyright (c) 2019 Western Digital Corporation or its affiliates.
> + *
> + * Authors:
> + *   Anup Patel 
> + */
> +
> +#ifndef __SBI_ECALL_INTERFACE_H__
> +#define __SBI_ECALL_INTERFACE_H__
> +
> +/* clang-format off */
> +
> +/* SBI Extension IDs */
> +#define SBI_EXT_0_1_SET_TIMER   0x0
> +#define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1
> +#define SBI_EXT_0_1_CONSOLE_GETCHAR 0x2
> +#define SBI_EXT_0_1_CLEAR_IPI   0x3
> +#define SBI_EXT_0_1_SEND_IPI0x4
> +#define SBI_EXT_0_1_REMOTE_FENCE_I  0x5
> +#define SBI_EXT_0_1_REMOTE_SFENCE_VMA   0x6
> +#define SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID 0x7
> +#define SBI_EXT_0_1_SHUTDOWN0x8
> +#define SBI_EXT_BASE0x10
> +#define SBI_EXT_TIME0x54494D45
> +#define SBI_EXT_IPI 0x735049
> +#define SBI_EXT_RFENCE  0x52464E43
> +#define SBI_EXT_HSM 0x48534D
> +
> +/* SBI function IDs for BASE extension*/
> +#define SBI_EXT_BASE_GET_SPEC_VERSION   0x0
> +#define SBI_EXT_BASE_GET_IMP_ID 0x1
> +#define SBI_EXT_BASE_GET_IMP_VERSION0x2
> +#define SBI_EXT_BASE_PROBE_EXT  0x3
> +#define SBI_EXT_BASE_GET_MVENDORID  0x4
> +#define SBI_EXT_BASE_GET_MARCHID0x5
> +#define SBI_EXT_BASE_GET_MIMPID 0x6
> +
> +/* SBI function IDs for TIME extension*/
> +#define SBI_EXT_TIME_SET_TIMER  0x0
> +
> +/* SBI function IDs for IPI extension*/
> +#define SBI_EXT_IPI_SEND_IPI0x0
> +
> +/* SBI function IDs for RFENCE extension*/
> +#define SBI_EXT_RFENCE_REMOTE_FENCE_I   0x0
> +#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA0x1
> +#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID  0x2
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA   0x3
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID 0x4
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA   0x5
> +#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID 0x6
> +
> +/* SBI function IDs for HSM extension */
> +#define SBI_EXT_HSM_HART_START  0x0
> +#define SBI_EXT_HSM_HART_STOP   0x1
> +#define SBI_EXT_HSM_HART_GET_STATUS 0x2
> +
> +#define SBI_HSM_HART_STATUS_STARTED 0x0
> +#

Re: [PATCH v2 11/21] target/riscv: Use DisasExtend in shift operations

2021-08-18 Thread Bin Meng

On Wed, Aug 18, 2021 at 5:26 AM Richard Henderson
 wrote:
>
> These operations are greatly simplified by ctx->w, which allows
> us to fold gen_shiftw into gen_shift.  Split gen_shifti into
> gen_shift_imm_{fn,tl} like we do for gen_arith_imm_{fn,tl}.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/translate.c| 110 +---
>  target/riscv/insn_trans/trans_rvb.c.inc | 129 +++-
>  target/riscv/insn_trans/trans_rvi.c.inc |  88 
>  3 files changed, 125 insertions(+), 202 deletions(-)
>

Reviewed-by: Bin Meng

[PATCH v3] hw/intc/sifive_clint: Fix overflow in sifive_clint_write_timecmp()

2021-08-18 Thread David Hoppenbrouwers

`muldiv64` would overflow in cases where the final 96-bit value does not
fit in a `uint64_t`. This would result in small values that cause an
interrupt to be triggered much sooner than intended.

The overflow can be detected in most cases by checking if the new value is
smaller than the previous value. If the final result is larger than
`diff` it is either correct or it doesn't matter as it is effectively
infinite anyways.

`next` is an `uint64_t` value, but `timer_mod` takes an `int64_t`. This
resulted in high values such as `UINT64_MAX` being converted to `-1`,
which caused an immediate timer interrupt.

By limiting `next` to `INT64_MAX` no overflow will happen while the
timer will still be effectively set to "infinitely" far in the future.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/493
Signed-off-by: David Hoppenbrouwers 
---
I did not account for the multiplication overflow mentioned in the bug
report. I've amended the patch and I do not spot any erroneous interrupts
anymore.

I see that the previous patch already got applied to
riscv-to-apply.next. Do I need to create a new patch?

David

 hw/intc/sifive_clint.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/hw/intc/sifive_clint.c b/hw/intc/sifive_clint.c
index 0f41e5ea1c..aa76e639a9 100644
--- a/hw/intc/sifive_clint.c
+++ b/hw/intc/sifive_clint.c
@@ -59,8 +59,23 @@ static void sifive_clint_write_timecmp(RISCVCPU *cpu, 
uint64_t value,
 riscv_cpu_update_mip(cpu, MIP_MTIP, BOOL_TO_MASK(0));
 diff = cpu->env.timecmp - rtc_r;
 /* back to ns (note args switched in muldiv64) */
-next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
+uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq);
+
+/*
+ * check if ns_diff overflowed and check if the addition would potentially
+ * overflow
+ */
+if ((NANOSECONDS_PER_SECOND > timebase_freq && ns_diff < diff) ||
+ns_diff > INT64_MAX) {
+next = INT64_MAX;
+} else {
+/*
+ * as it is very unlikely qemu_clock_get_ns will return a value
+ * greater than INT64_MAX, no additional check is needed.
+ */
+next = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns_diff;
+}
+
 timer_mod(cpu->env.timer, next);
 }
 
-- 
2.20.1

RE: [PATCH v4 1/3] target-arm: Add support for Fujitsu A64FX

2021-08-18 Thread ishii.shuuic...@fujitsu.com

> I think this will be more clear once I get the patch posted (which I haven't 
> started
> writing yet). I'll try to get it posted by tomorrow evening though, since I 
> have
> vacation on Friday.

While Andrew is working on the patch in a hurry, 
I'm sorry, I'll be on vacation for a while starting Friday too,
so my reply will be delayed.

Best regards.


> -Original Message-
> From: Andrew Jones 
> Sent: Wednesday, August 18, 2021 5:58 PM
> To: Ishii, Shuuichirou/石井 周一郎 
> Cc: Richard Henderson ;
> peter.mayd...@linaro.org; qemu-...@nongnu.org; qemu-devel@nongnu.org
> Subject: Re: [PATCH v4 1/3] target-arm: Add support for Fujitsu A64FX
> 
> On Wed, Aug 18, 2021 at 08:29:15AM +, ishii.shuuic...@fujitsu.com wrote:
> >
> > We appreciate everyone's comments.
> > Before making the V5 patch, please let me check the patch contents.
> >
> > > This looks reasonable to me, but you also need the 'sve' property
> > > that states sve in supported at all.
> > > > > So maybe we should just go ahead and add all sve* properties,
> >
> > In response to the above comment,
> > We understood that the sve property will be added to the v4 patch.
> >
> > i.e.
> > (QEMU) query-cpu-model-expansion type=full model={"name":"a64fx"}
> > {"return": {"model": {"name": "a64fx", "props": {"sve128": false,
> > "sve256": true, "sve": true, "sve512": true, "aarch64": true, "pmu":
> > true
> >
> > > > > but
> > > > > then make sure the default vq map is correct.
> >
> > Furthermore, We understood that I need to add the above process as well, is
> that correct?
> >
> > > That's a good idea. I'll send a patch with your suggested-by.
> >
> > If that's correct,
> > In the current v4 patch, in the aarch64_a64fx_initfn function, the
> > a64fx_cpu_set_sve function is executed to set the SVE property, and
> > the arm_cpu_sve_finalize function is not called.
> >
> > In which function is it appropriate to execute the modulo max_vq
> > function (or equivalent process)?
> >
> > If We are not understanding you correctly, We would appreciate your
> > comments.
> 
> Richard's suggestion is to generalize the "supported" bitmap concept, which is
> currently only used for KVM, in order to also use it for TCG cpu models. The 
> 'max'
> cpu type will have the trivial all-set supported bitmap, but the a64fx will 
> have a
> specific one. I plan to do this "supported" bitmap generalization and apply 
> it to the
> TCG max cpu type. You'll need to rebase this series on those patches and 
> provide
> the a64fx supported bitmap.
> 
> I think this will be more clear once I get the patch posted (which I haven't 
> started
> writing yet). I'll try to get it posted by tomorrow evening though, since I 
> have
> vacation on Friday.
> 
> Thanks,
> drew
> 
> 
> >
> > Best regards.
> >
> > > -Original Message-
> > > From: Andrew Jones 
> > > Sent: Wednesday, August 18, 2021 1:28 AM
> > > To: Richard Henderson 
> > > Cc: Ishii, Shuuichirou/石井 周一郎 ;
> > > peter.mayd...@linaro.org; qemu-...@nongnu.org; qemu-devel@nongnu.org
> > > Subject: Re: [PATCH v4 1/3] target-arm: Add support for Fujitsu
> > > A64FX
> > >
> > > On Tue, Aug 17, 2021 at 05:53:34AM -1000, Richard Henderson wrote:
> > > > On 8/17/21 5:36 AM, Andrew Jones wrote:
> > > > > On Tue, Aug 17, 2021 at 05:23:17AM -1000, Richard Henderson wrote:
> > > > > > On 8/17/21 1:56 AM, Andrew Jones wrote:
> > > > > > > I guess it's fine. You could easily create a new
> > > > > > > cpu_arm_set_sve_vq() which would forbid changing the
> > > > > > > properties if you wanted to, but then we need to answer
> > > > > > > Peter's question in order to see if there's a precedent for that 
> > > > > > > type of
> property.
> > > > > >
> > > > > > I don't see the point in read-only properties.  If the user
> > > > > > wants to set non-standard values on the command-line, let
> > > > > > them.  What is most important is getting the correct default from 
> > > > > > '-cpu
> a64fx'.
> > > > > >
> > > > >
> > > > > So maybe we should just go ahead and add all sve* properties,
> > > > > but then make sure the default vq map is correct.
> > > >
> > > > I think that's the right answer.
> > > >
> > > > Presently we have a kvm_supported variable that's initialized by
> > > > kvm_arm_sve_get_vls().  I think we want to rename that variable
> > > > and provide a version of that function for tcg. Probably we should
> > > > have done that before, with a trivial function for -cpu max to set all 
> > > > bits.
> > > >
> > > > Then eliminate most of the other kvm_enabled() checks in
> > > > arm_cpu_sve_finalize.  I think the only one we keep is the last,
> > > > where we verify that the final sve_vq_map matches kvm_enabled
> > > > exactly, modulo
> > > max_vq.
> > > >
> > > > This should minimize the differences in behaviour between tcg and kvm.
> > >
> > > That's a good idea. I'll send a patch with your suggested-by.
> > >
> > > Thanks,
> > > drew
> >

[PATCH 4/9] hw/nvram: Introduce Xilinx ZynqMP eFuse device

2021-08-18 Thread Tong Ho

This implements the Xilinx ZynqMP eFuse, an one-time
field-programmable non-volatile storage device.  There is
only one such device in the Xilinx ZynqMP product family.

The command argument:
  -drive if=pflash,index=N,...
Can be used to optionally connect the storage array to a
backend storage, such that field-programmed values in one
invocation can be made available to next invocation.

The backend storage must be a seekable binary file, and
its size must be 768 bytes or larger. A file with all
binary 0's is a 'blank'.

The drive 'index' value N has a default value of 3, but
can be changed using command argument:
  -global xlnx,efuse.drive-index=N

Co-authored-by: Edgar E. Iglesias 
Co-authored-by: Sai Pavan Boddu 

Signed-off-by: Edgar E. Iglesias 
Signed-off-by: Sai Pavan Boddu 
Signed-off-by: Tong Ho 
---
 hw/nvram/Kconfig |   5 +
 hw/nvram/meson.build |   2 +
 hw/nvram/xlnx-zynqmp-efuse.c | 861 +++
 include/hw/nvram/xlnx-zynqmp-efuse.h |  45 ++
 4 files changed, 913 insertions(+)
 create mode 100644 hw/nvram/xlnx-zynqmp-efuse.c
 create mode 100644 include/hw/nvram/xlnx-zynqmp-efuse.h

diff --git a/hw/nvram/Kconfig b/hw/nvram/Kconfig
index e96749ced3..cc3ed789fe 100644
--- a/hw/nvram/Kconfig
+++ b/hw/nvram/Kconfig
@@ -23,3 +23,8 @@ config XLNX_EFUSE_VERSAL
 bool
 default y if XLNX_VERSAL
 select XLNX_EFUSE
+
+config XLNX_EFUSE_ZYNQMP
+bool
+default y if XLNX_ZYNQMP
+select XLNX_EFUSE
diff --git a/hw/nvram/meson.build b/hw/nvram/meson.build
index a432665158..f364520ad5 100644
--- a/hw/nvram/meson.build
+++ b/hw/nvram/meson.build
@@ -13,6 +13,8 @@ softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE', if_true: 
files('xlnx-efuse.c'))
 softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_VERSAL', if_true: files(
'xlnx-versal-efuse-cache.c',
'xlnx-versal-efuse-ctrl.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_ZYNQMP', if_true: files(
+   'xlnx-zynqmp-efuse.c'))
 if 'CONFIG_XLNX_BBRAM' in config_all or \
'CONFIG_XLNX_EFUSE' in config_all
   softmmu_ss.add(files('xlnx-efuse-crc.c'))
diff --git a/hw/nvram/xlnx-zynqmp-efuse.c b/hw/nvram/xlnx-zynqmp-efuse.c
new file mode 100644
index 00..3591577498
--- /dev/null
+++ b/hw/nvram/xlnx-zynqmp-efuse.c
@@ -0,0 +1,861 @@
+/*
+ * QEMU model of the ZynqMP eFuse
+ *
+ * Copyright (c) 2015 Xilinx Inc.
+ *
+ * Written by Edgar E. Iglesias 
+ * Partially autogenerated by xregqemu.py 2015-01-02.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-zynqmp-efuse.h"
+
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+
+#ifndef ZYNQMP_EFUSE_ERR_DEBUG
+#define ZYNQMP_EFUSE_ERR_DEBUG 0
+#endif
+
+#define ZYNQMP_EFUSE(obj) \
+ OBJECT_CHECK(XlnxZynqMPEFuse, (obj), TYPE_XLNX_ZYNQMP_EFUSE)
+
+REG32(WR_LOCK, 0x0)
+FIELD(WR_LOCK, LOCK, 0, 16)
+REG32(CFG, 0x4)
+FIELD(CFG, SLVERR_ENABLE, 5, 1)
+FIELD(CFG, MARGIN_RD, 2, 2)
+FIELD(CFG, PGM_EN, 1, 1)
+FIELD(CFG, EFUSE_CLK_SEL, 0, 1)
+REG32(STATUS, 0x8)
+FIELD(STATUS, AES_CRC_PASS, 7, 1)
+FIELD(STATUS, AES_CRC_DONE, 6, 1)
+FIELD(STATUS, CACHE_DONE, 5, 1)
+FIELD(STATUS, CACHE_LOAD, 4, 1)
+FIELD(STATUS, EFUSE_3_TBIT, 2, 1)
+FIELD(STATUS, EFUSE_2_TBIT, 1, 1)
+FIELD(STATUS, EFUSE_0_TBIT, 0, 1)
+REG32(EFUSE_PGM_ADDR, 0xc)
+FIELD(EFUSE_PGM_ADDR, EFUSE, 11, 2)
+FIELD(EFUSE_PGM_ADDR, ROW, 5, 6)
+FIELD(EFUSE_PGM_ADDR, COLUMN, 0, 5)
+REG32(EFUSE_RD_ADDR, 0x10)
+FIELD(EFUSE_RD_ADDR, EFUSE, 11, 2)
+FIELD(EFUSE_RD_ADDR, ROW, 5, 6)
+REG32(EFUSE_RD_DATA, 0x14)
+REG32(TPGM, 0x18)
+FIELD(TPGM, VALUE, 0, 16)
+REG32(TRD, 0x1c)
+FIELD(TRD, VALUE, 0, 8)
+REG32(TSU_H_PS, 0x20)
+FIELD(TSU_H_PS, VALUE, 0, 8)
+REG32(

[PATCH 0/9] hw/nvram: hw/arm: Introduce Xilinx eFUSE and BBRAM

2021-08-18 Thread Tong Ho

This series implements the Xilinx eFUSE and BBRAM devices for
the Versal and ZynqMP product families.

Furthermore, both new devices are connected to the xlnx-versal-virt
board and the xlnx-zcu102 board.

See changes in docs/system/arm/xlnx-versal-virt.rst for detail.

Tong Ho (9):
  docs/system/arm: xlnx-versal-virt: BBRAM and eFUSE Usage
  hw/nvram: Introduce Xilinx eFuse QOM
  hw/nvram: Introduce Xilinx Versal eFuse device
  hw/nvram: Introduce Xilinx ZynqMP eFuse device
  hw/nvram: Introduce Xilinx battery-backed ram
  hw/arm: xlnx-versal: Add Xilinx BBRAM device
  hw/arm: xlnx-versal: Add Xilinx eFUSE device
  hw/arm: xlnx-zynqmp: Add Xilinx BBRAM device
  hw/arm: xlnx-zynqmp: Add Xilinx eFUSE device

 docs/system/arm/xlnx-versal-virt.rst |  49 ++
 hw/arm/xlnx-versal-virt.c|  57 ++
 hw/arm/xlnx-versal.c |  57 ++
 hw/arm/xlnx-zynqmp.c |  50 ++
 hw/nvram/Kconfig |  17 +
 hw/nvram/meson.build |  11 +
 hw/nvram/xlnx-bbram.c| 536 +
 hw/nvram/xlnx-efuse-crc.c| 118 
 hw/nvram/xlnx-efuse.c| 253 
 hw/nvram/xlnx-versal-efuse-cache.c   | 141 +
 hw/nvram/xlnx-versal-efuse-ctrl.c| 786 
 hw/nvram/xlnx-zynqmp-efuse.c | 861 +++
 include/hw/arm/xlnx-versal.h |  17 +
 include/hw/arm/xlnx-zynqmp.h |   5 +
 include/hw/nvram/xlnx-bbram.h|  55 ++
 include/hw/nvram/xlnx-efuse.h|  80 +++
 include/hw/nvram/xlnx-versal-efuse.h |  62 ++
 include/hw/nvram/xlnx-zynqmp-efuse.h |  45 ++
 18 files changed, 3200 insertions(+)
 create mode 100644 hw/nvram/xlnx-bbram.c
 create mode 100644 hw/nvram/xlnx-efuse-crc.c
 create mode 100644 hw/nvram/xlnx-efuse.c
 create mode 100644 hw/nvram/xlnx-versal-efuse-cache.c
 create mode 100644 hw/nvram/xlnx-versal-efuse-ctrl.c
 create mode 100644 hw/nvram/xlnx-zynqmp-efuse.c
 create mode 100644 include/hw/nvram/xlnx-bbram.h
 create mode 100644 include/hw/nvram/xlnx-efuse.h
 create mode 100644 include/hw/nvram/xlnx-versal-efuse.h
 create mode 100644 include/hw/nvram/xlnx-zynqmp-efuse.h

-- 
2.25.1

[PATCH 3/9] hw/nvram: Introduce Xilinx Versal eFuse device

2021-08-18 Thread Tong Ho

This implements the Xilinx Versal eFuse, an one-time
field-programmable non-volatile storage device.  There is
only one such device in the Xilinx Versal product family.

The command argument:
  -drive if=pflash,index=N,...
Can be used to optionally connect the storage array to a
backend storage, such that field-programmed values in one
invocation can be made available to next invocation.

The backend storage must be a seekable binary file, and
its size must be 3072 bytes or larger. A file with all
binary 0's is a 'blank'.

The drive 'index' value N has a default value of 1, but
can be changed using command argument:
  -global xlnx,efuse.drive-index=N

This device has two separate mmio interfaces, a controller
and a flatten readback.

The controller provides interfaces for field-programming,
configuration, control, and status.

The flatten readback is a cache to provide a byte-accessible
read-only interface to efficiently read efuse array.

Co-authored-by: Edgar E. Iglesias 
Co-authored-by: Sai Pavan Boddu 

Signed-off-by: Edgar E. Iglesias 
Signed-off-by: Sai Pavan Boddu 
Signed-off-by: Tong Ho 
---
 hw/nvram/Kconfig |   8 +
 hw/nvram/meson.build |   8 +
 hw/nvram/xlnx-versal-efuse-cache.c   | 141 +
 hw/nvram/xlnx-versal-efuse-ctrl.c| 786 +++
 include/hw/nvram/xlnx-versal-efuse.h |  62 +++
 5 files changed, 1005 insertions(+)
 create mode 100644 hw/nvram/xlnx-versal-efuse-cache.c
 create mode 100644 hw/nvram/xlnx-versal-efuse-ctrl.c
 create mode 100644 include/hw/nvram/xlnx-versal-efuse.h

diff --git a/hw/nvram/Kconfig b/hw/nvram/Kconfig
index e872fcb194..e96749ced3 100644
--- a/hw/nvram/Kconfig
+++ b/hw/nvram/Kconfig
@@ -15,3 +15,11 @@ config NMC93XX_EEPROM
 
 config CHRP_NVRAM
 bool
+
+config XLNX_EFUSE
+bool
+
+config XLNX_EFUSE_VERSAL
+bool
+default y if XLNX_VERSAL
+select XLNX_EFUSE
diff --git a/hw/nvram/meson.build b/hw/nvram/meson.build
index fd2951a860..a432665158 100644
--- a/hw/nvram/meson.build
+++ b/hw/nvram/meson.build
@@ -9,5 +9,13 @@ softmmu_ss.add(when: 'CONFIG_AT24C', if_true: 
files('eeprom_at24c.c'))
 softmmu_ss.add(when: 'CONFIG_MAC_NVRAM', if_true: files('mac_nvram.c'))
 softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_otp.c'))
 softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_nvm.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE', if_true: files('xlnx-efuse.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_VERSAL', if_true: files(
+   'xlnx-versal-efuse-cache.c',
+   'xlnx-versal-efuse-ctrl.c'))
+if 'CONFIG_XLNX_BBRAM' in config_all or \
+   'CONFIG_XLNX_EFUSE' in config_all
+  softmmu_ss.add(files('xlnx-efuse-crc.c'))
+endif
 
 specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_nvram.c'))
diff --git a/hw/nvram/xlnx-versal-efuse-cache.c 
b/hw/nvram/xlnx-versal-efuse-cache.c
new file mode 100644
index 00..8b4eca7a39
--- /dev/null
+++ b/hw/nvram/xlnx-versal-efuse-cache.c
@@ -0,0 +1,141 @@
+/*
+ * QEMU model of the EFuse_Cache
+ *
+ * Copyright (c) 2017 Xilinx Inc.
+ *
+ * Partially generated by xregqemu.py 2017-06-05.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-versal-efuse.h"
+
+#include "qemu/log.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+
+#ifndef XILINX_EFUSE_CACHE_ERR_DEBUG
+#define XILINX_EFUSE_CACHE_ERR_DEBUG 0
+#endif
+
+#define XILINX_EFUSE_CACHE(obj) \
+ OBJECT_CHECK(XlnxVersalEFuseCache, (obj), TYPE_XLNX_VERSAL_EFUSE_CACHE)
+
+#define DPRINT(...) \
+if (XILINX_EFUSE_CACHE_ERR_DEBUG) {  \
+qemu_log(__VA_ARGS__); \
+}
+
+#define DPRINT_GE(args, ...) \
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: " args, __func__, ## __VA_ARGS__);
+
+#define MR_SIZE 0xC00
+
+static uint64_t efuse_cache_read(void *opaque, hwaddr addr, unsig

[PATCH 5/9] hw/nvram: Introduce Xilinx battery-backed ram

2021-08-18 Thread Tong Ho

This device is present in Versal and ZynqMP product
families to store a 256-bit encryption key.

Co-authored-by: Edgar E. Iglesias 
Co-authored-by: Sai Pavan Boddu 

Signed-off-by: Edgar E. Iglesias 
Signed-off-by: Sai Pavan Boddu 
Signed-off-by: Tong Ho 
---
 hw/nvram/Kconfig  |   4 +
 hw/nvram/meson.build  |   1 +
 hw/nvram/xlnx-bbram.c | 536 ++
 include/hw/nvram/xlnx-bbram.h |  55 
 4 files changed, 596 insertions(+)
 create mode 100644 hw/nvram/xlnx-bbram.c
 create mode 100644 include/hw/nvram/xlnx-bbram.h

diff --git a/hw/nvram/Kconfig b/hw/nvram/Kconfig
index cc3ed789fe..a8c5e9227e 100644
--- a/hw/nvram/Kconfig
+++ b/hw/nvram/Kconfig
@@ -28,3 +28,7 @@ config XLNX_EFUSE_ZYNQMP
 bool
 default y if XLNX_ZYNQMP
 select XLNX_EFUSE
+
+config XLNX_BBRAM
+bool
+default y if (XLNX_VERSAL || XLNX_ZYNQMP)
diff --git a/hw/nvram/meson.build b/hw/nvram/meson.build
index f364520ad5..0a1676d37a 100644
--- a/hw/nvram/meson.build
+++ b/hw/nvram/meson.build
@@ -9,6 +9,7 @@ softmmu_ss.add(when: 'CONFIG_AT24C', if_true: 
files('eeprom_at24c.c'))
 softmmu_ss.add(when: 'CONFIG_MAC_NVRAM', if_true: files('mac_nvram.c'))
 softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_otp.c'))
 softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_nvm.c'))
+softmmu_ss.add(when: 'CONFIG_XLNX_BBRAM', if_true: files('xlnx-bbram.c', 
'xlnx-efuse-crc.c'))
 softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE', if_true: files('xlnx-efuse.c'))
 softmmu_ss.add(when: 'CONFIG_XLNX_EFUSE_VERSAL', if_true: files(
'xlnx-versal-efuse-cache.c',
diff --git a/hw/nvram/xlnx-bbram.c b/hw/nvram/xlnx-bbram.c
new file mode 100644
index 00..d560dcdfa8
--- /dev/null
+++ b/hw/nvram/xlnx-bbram.c
@@ -0,0 +1,536 @@
+/*
+ * QEMU model of the Xilinx BBRAM Battery Backed RAM
+ *
+ * Copyright (c) 2014-2021 Xilinx Inc.
+ *
+ * Autogenerated by xregqemu.py 2020-02-06.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/nvram/xlnx-bbram.h"
+
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "sysemu/blockdev.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "hw/nvram/xlnx-efuse.h"
+
+#ifndef XLNX_BBRAM_ERR_DEBUG
+#define XLNX_BBRAM_ERR_DEBUG 0
+#endif
+
+#define XLNX_BBRAM(obj) \
+ OBJECT_CHECK(XlnxBBRam, (obj), TYPE_XLNX_BBRAM)
+
+#define DB_PRINT_L(lvl, fmt, args...) do { \
+if (XLNX_BBRAM_ERR_DEBUG >= lvl) { \
+qemu_log("%s: " fmt, __func__, ## args); \
+} \
+} while (0)
+
+#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args)
+
+REG32(BBRAM_STATUS, 0x0)
+FIELD(BBRAM_STATUS, AES_CRC_PASS, 9, 1)
+FIELD(BBRAM_STATUS, AES_CRC_DONE, 8, 1)
+FIELD(BBRAM_STATUS, BBRAM_ZEROIZED, 4, 1)
+FIELD(BBRAM_STATUS, PGM_MODE, 0, 1)
+REG32(BBRAM_CTRL, 0x4)
+FIELD(BBRAM_CTRL, ZEROIZE, 0, 1)
+REG32(PGM_MODE, 0x8)
+REG32(BBRAM_AES_CRC, 0xc)
+REG32(BBRAM_0, 0x10)
+REG32(BBRAM_1, 0x14)
+REG32(BBRAM_2, 0x18)
+REG32(BBRAM_3, 0x1c)
+REG32(BBRAM_4, 0x20)
+REG32(BBRAM_5, 0x24)
+REG32(BBRAM_6, 0x28)
+REG32(BBRAM_7, 0x2c)
+REG32(BBRAM_8, 0x30)
+REG32(BBRAM_SLVERR, 0x34)
+FIELD(BBRAM_SLVERR, ENABLE, 0, 1)
+REG32(BBRAM_ISR, 0x38)
+FIELD(BBRAM_ISR, APB_SLVERR, 0, 1)
+REG32(BBRAM_IMR, 0x3c)
+FIELD(BBRAM_IMR, APB_SLVERR, 0, 1)
+REG32(BBRAM_IER, 0x40)
+FIELD(BBRAM_IER, APB_SLVERR, 0, 1)
+REG32(BBRAM_IDR, 0x44)
+FIELD(BBRAM_IDR, APB_SLVERR, 0, 1)
+REG32(BBRAM_MSW_LOCK, 0x4c)
+FIELD(BBRAM_MSW_LOCK, VAL, 0, 1)
+
+#define R_MAX (R_BBRAM_MSW_LOCK + 1)
+
+#define RAM_MAX (A_BBRAM_8 + 4 - A_BBRAM_0)
+
+#define BBRAM_PGM_MAGIC 0x757bdf0d
+
+QEMU_BUILD_BUG_ON(R_MAX != ARRAY_SIZE(((XlnxBBRam *)0)->regs));
+
+static bool bbram_msw_locked(XlnxBBRam *s)
+{
+return ARRAY_FIELD_EX32(s->regs, BBRAM_MSW_LOCK, VAL) != 0;
+}
+
+st

[PATCH 1/9] docs/system/arm: xlnx-versal-virt: BBRAM and eFUSE Usage

2021-08-18 Thread Tong Ho

Add BBRAM and eFUSE usage to the Xilinx Versal Virt board
document.

Signed-off-by: Tong Ho 
---
 docs/system/arm/xlnx-versal-virt.rst | 49 
 1 file changed, 49 insertions(+)

diff --git a/docs/system/arm/xlnx-versal-virt.rst 
b/docs/system/arm/xlnx-versal-virt.rst
index 27f73500d9..84afac3734 100644
--- a/docs/system/arm/xlnx-versal-virt.rst
+++ b/docs/system/arm/xlnx-versal-virt.rst
@@ -32,6 +32,8 @@ Implemented devices:
 - OCM (256KB of On Chip Memory)
 - XRAM (4MB of on chip Accelerator RAM)
 - DDR memory
+- BBRAM (36 bytes of Battery-backed RAM)
+- eFUSE (3072 bytes of one-time field-programmable bit array)
 
 QEMU does not yet model any other devices, including the PL and the AI Engine.
 
@@ -175,3 +177,50 @@ Run the following at the U-Boot prompt:
   fdt set /chosen/dom0 reg <0x 0x4000 0x0 0x0310>
   booti 3000 - 2000
 
+BBRAM File Backend
+""
+BBRAM can have an optional file backend, which must a seekable
+binary file with a size of 36 bytes or larger. A file with all
+binary 0s is a 'blank'.
+
+To add a file-backend for the BBRAM:
+
+.. code-block:: bash
+
+  -drive if=pflash,index=0,file=versal-bbram.bin,format=raw
+
+To use a different index value, N, from default of 0, add:
+
+.. code-block:: bash
+
+  -global xlnx,bbram-ctrl.drive-index=N
+
+eFUSE File Backend
+""
+eFUSE can have an optional file backend, which must a seekable
+binary file with a size of 3072 bytes or larger. A file with all
+binary 0s is a 'blank'.
+
+To add a file-backend for the eFUSE:
+
+.. code-block:: bash
+
+  -drive if=pflash,index=1,file=versal-efuse.bin,format=raw
+
+To use a different index value, N, from default of 1, add:
+
+.. code-block:: bash
+
+  -global xlnx,efuse.drive-index=N
+
+.. warning::
+  In actual physical Versal, BBRAM and eFUSE contain sensitive data.
+  The QEMU device models do **not** encrypt nor obfuscate any data
+  when holding them in models' memory or when writing them to their
+  file backends.
+
+  Thus, a file backend should be used with caution, and 'format=luks'
+  is highly recommended (albeit with usage complexity).
+
+  Better yet, do not use actual product data when running guest image
+  on this Xilinx Versal Virt board.  |
-- 
2.25.1

[PATCH 9/9] hw/arm: xlnx-zynqmp: Add Xilinx eFUSE device

2021-08-18 Thread Tong Ho

Connect the support for ZynqMP eFUSE one-time field-programmable
bit array.

Signed-off-by: Tong Ho 
---
 hw/arm/xlnx-zynqmp.c | 29 +
 include/hw/arm/xlnx-zynqmp.h |  3 +++
 2 files changed, 32 insertions(+)

diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 8e39b7d6c7..9e458ad1c0 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -65,6 +65,9 @@
 #define BBRAM_ADDR  0xffcd
 #define BBRAM_IRQ   11
 
+#define EFUSE_ADDR  0xffcc
+#define EFUSE_IRQ   87
+
 #define SDHCI_CAPABILITIES  0x280737ec6481 /* Datasheet: UG1085 (v1.7) */
 
 static const uint64_t gem_addr[XLNX_ZYNQMP_NUM_GEMS] = {
@@ -241,6 +244,31 @@ static void xlnx_zynqmp_create_bbram(XlnxZynqMPState *s, 
qemu_irq *gic)
 sysbus_connect_irq(sbd, 0, gic[BBRAM_IRQ]);
 }
 
+static void xlnx_zynqmp_create_efuse(XlnxZynqMPState *s, qemu_irq *gic)
+{
+Object *bits = OBJECT(&s->efuse_bits);
+Object *ctrl = OBJECT(&s->efuse);
+SysBusDevice *sbd;
+
+object_initialize_child(OBJECT(s), "efuse", &s->efuse,
+TYPE_XLNX_ZYNQMP_EFUSE);
+
+object_initialize_child_with_props(ctrl, "efuse-bits", bits,
+   sizeof(s->efuse_bits),
+   TYPE_XLNX_EFUSE, &error_abort,
+   "efuse-nr", "3",
+   "efuse-size", "2048",
+   NULL);
+
+qdev_realize(DEVICE(bits), NULL, &error_abort);
+object_property_set_link(ctrl, "efuse", bits, &error_abort);
+
+sbd = SYS_BUS_DEVICE(ctrl);
+sysbus_realize(sbd, &error_abort);
+sysbus_mmio_map(sbd, 0, EFUSE_ADDR);
+sysbus_connect_irq(sbd, 0, gic[EFUSE_IRQ]);
+}
+
 static void xlnx_zynqmp_init(Object *obj)
 {
 MachineState *ms = MACHINE(qdev_get_machine());
@@ -636,6 +664,7 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
**errp)
 sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0, gic_spi[RTC_IRQ]);
 
 xlnx_zynqmp_create_bbram(s, gic_spi);
+xlnx_zynqmp_create_efuse(s, gic_spi);
 
 for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) {
 if (!object_property_set_uint(OBJECT(&s->gdma[i]), "bus-width", 128,
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index 07ebcefbab..876e8bf4e3 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -37,6 +37,7 @@
 #include "net/can_emu.h"
 #include "hw/dma/xlnx_csu_dma.h"
 #include "hw/nvram/xlnx-bbram.h"
+#include "hw/nvram/xlnx-zynqmp-efuse.h"
 
 #define TYPE_XLNX_ZYNQMP "xlnx-zynqmp"
 OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP)
@@ -97,6 +98,8 @@ struct XlnxZynqMPState {
 MemoryRegion *ddr_ram;
 MemoryRegion ddr_ram_low, ddr_ram_high;
 XlnxBBRam bbram;
+XlnxZynqMPEFuse efuse;
+XLNXEFuse efuse_bits;
 
 CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
 CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
-- 
2.25.1

[PATCH 7/9] hw/arm: xlnx-versal: Add Xilinx eFUSE device

2021-08-18 Thread Tong Ho

Connect the support for Versal eFUSE one-time field-programmable
bit array.

Signed-off-by: Tong Ho 
---
 hw/arm/xlnx-versal-virt.c| 36 +
 hw/arm/xlnx-versal.c | 39 
 include/hw/arm/xlnx-versal.h | 12 +++
 3 files changed, 87 insertions(+)

diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index d9e2a6a853..04da6c4517 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -376,6 +376,40 @@ static void fdt_add_bbram_node(VersalVirt *s)
 g_free(name);
 }
 
+static void fdt_add_efuse_ctrl_node(VersalVirt *s)
+{
+const char compat[] = TYPE_XLNX_VERSAL_EFUSE_CTRL;
+const char interrupt_names[] = "pmc_efuse";
+char *name = g_strdup_printf("/pmc_efuse@%x", MM_PMC_EFUSE_CTRL);
+
+qemu_fdt_add_subnode(s->fdt, name);
+
+qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
+   GIC_FDT_IRQ_TYPE_SPI, VERSAL_EFUSE_IRQ,
+   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+qemu_fdt_setprop(s->fdt, name, "interrupt-names",
+ interrupt_names, sizeof(interrupt_names));
+qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+ 2, MM_PMC_EFUSE_CTRL,
+ 2, MM_PMC_EFUSE_CTRL_SIZE);
+qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
+g_free(name);
+}
+
+static void fdt_add_efuse_cache_node(VersalVirt *s)
+{
+const char compat[] = TYPE_XLNX_VERSAL_EFUSE_CACHE;
+char *name = g_strdup_printf("/xlnx_pmc_efuse_cache@%x", 
MM_PMC_EFUSE_CACHE);
+
+qemu_fdt_add_subnode(s->fdt, name);
+
+qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+ 2, MM_PMC_EFUSE_CACHE,
+ 2, MM_PMC_EFUSE_CACHE_SIZE);
+qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
+g_free(name);
+}
+
 static void fdt_nop_memory_nodes(void *fdt, Error **errp)
 {
 Error *err = NULL;
@@ -591,6 +625,8 @@ static void versal_virt_init(MachineState *machine)
 fdt_add_sd_nodes(s);
 fdt_add_rtc_node(s);
 fdt_add_bbram_node(s);
+fdt_add_efuse_ctrl_node(s);
+fdt_add_efuse_cache_node(s);
 fdt_add_cpu_nodes(s, psci_conduit);
 fdt_add_clk_node(s, "/clk125", 12500, s->phandle.clk_125Mhz);
 fdt_add_clk_node(s, "/clk25", 2500, s->phandle.clk_25Mhz);
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index 46d7f42a6b..d278d6e0f4 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -329,6 +329,44 @@ static void versal_create_bbram(Versal *s, qemu_irq *pic)
 sysbus_connect_irq(sbd, 0, pic[VERSAL_BBRAM_APB_IRQ_0]);
 }
 
+static void versal_realize_efuse_part(Versal *s, Object *dev, hwaddr base)
+{
+SysBusDevice *part = SYS_BUS_DEVICE(dev);
+
+object_property_set_link(OBJECT(part), "efuse",
+ OBJECT(&s->pmc.efuse.bits), &error_abort);
+
+sysbus_realize(part, &error_abort);
+memory_region_add_subregion(&s->mr_ps, base,
+sysbus_mmio_get_region(part, 0));
+}
+
+static void versal_create_efuse(Versal *s, qemu_irq *pic)
+{
+Object *bits = OBJECT(&s->pmc.efuse.bits);
+Object *ctrl = OBJECT(&s->pmc.efuse.ctrl);
+Object *cache = OBJECT(&s->pmc.efuse.cache);
+
+object_initialize_child(OBJECT(s), "efuse-ctrl", &s->pmc.efuse.ctrl,
+TYPE_XLNX_VERSAL_EFUSE_CTRL);
+
+object_initialize_child(OBJECT(s), "efuse-cache", &s->pmc.efuse.cache,
+TYPE_XLNX_VERSAL_EFUSE_CACHE);
+
+object_initialize_child_with_props(ctrl, "efuse-bits", bits,
+   sizeof(s->pmc.efuse.bits),
+   TYPE_XLNX_EFUSE, &error_abort,
+   "efuse-nr", "3",
+   "efuse-size", "8192",
+   NULL);
+
+qdev_realize(DEVICE(bits), NULL, &error_abort);
+versal_realize_efuse_part(s, ctrl, MM_PMC_EFUSE_CTRL);
+versal_realize_efuse_part(s, cache, MM_PMC_EFUSE_CACHE);
+
+sysbus_connect_irq(SYS_BUS_DEVICE(ctrl), 0, pic[VERSAL_EFUSE_IRQ]);
+}
+
 /* This takes the board allocated linear DDR memory and creates aliases
  * for each split DDR range/aperture on the Versal address map.
  */
@@ -416,6 +454,7 @@ static void versal_realize(DeviceState *dev, Error **errp)
 versal_create_rtc(s, pic);
 versal_create_xrams(s, pic);
 versal_create_bbram(s, pic);
+versal_create_efuse(s, pic);
 versal_map_ddr(s);
 versal_unimp(s);
 
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
index 7719e8c4d2..33b89f00b6 100644
--- a/include/hw/arm/xlnx-versal.h
+++ b/include/hw/arm/xlnx-versal.h
@@ -25,6 +25,7 @@
 #include "hw/usb/xlnx-usb-subsystem.h"
 #include "hw/misc/xlnx-versal-xramc.h"
 #include "hw/nvram/xlnx-bbram.h"
+#include "hw/nvram/xlnx-

[PATCH 2/9] hw/nvram: Introduce Xilinx eFuse QOM

2021-08-18 Thread Tong Ho

This introduces the QOM for Xilinx eFuse, an one-time
field-programmable storage bit array.

The actual mmio interface to the array varies by device
families and will be provided in different change-sets.

Co-authored-by: Edgar E. Iglesias 
Co-authored-by: Sai Pavan Boddu 

Signed-off-by: Edgar E. Iglesias 
Signed-off-by: Sai Pavan Boddu 
Signed-off-by: Tong Ho 
---
 hw/nvram/xlnx-efuse-crc.c | 118 
 hw/nvram/xlnx-efuse.c | 253 ++
 include/hw/nvram/xlnx-efuse.h |  80 +++
 3 files changed, 451 insertions(+)
 create mode 100644 hw/nvram/xlnx-efuse-crc.c
 create mode 100644 hw/nvram/xlnx-efuse.c
 create mode 100644 include/hw/nvram/xlnx-efuse.h

diff --git a/hw/nvram/xlnx-efuse-crc.c b/hw/nvram/xlnx-efuse-crc.c
new file mode 100644
index 00..bc12c39e00
--- /dev/null
+++ b/hw/nvram/xlnx-efuse-crc.c
@@ -0,0 +1,118 @@
+/*
+ * Xilinx eFuse/bbram CRC calculator
+ *
+ * Copyright (c) 2021 Xilinx Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw/nvram/xlnx-efuse.h"
+
+static uint32_t xlnx_efuse_u37_crc(uint32_t prev_crc, uint32_t data,
+   uint32_t addr)
+{
+/* A table for 7-bit slicing */
+static const uint32_t crc_tab[128] = {
+0x, 0xe13b70f7, 0xc79a971f, 0x26a1e7e8,
+0x8ad958cf, 0x6be22838, 0x4d43cfd0, 0xac78bf27,
+0x105ec76f, 0xf165b798, 0xd7c45070, 0x36ff2087,
+0x9a879fa0, 0x7bbcef57, 0x5d1d08bf, 0xbc267848,
+0x20bd8ede, 0xc186fe29, 0xe72719c1, 0x061c6936,
+0xaa64d611, 0x4b5fa6e6, 0x6dfe410e, 0x8cc531f9,
+0x30e349b1, 0xd1d83946, 0xf779deae, 0x1642ae59,
+0xba3a117e, 0x5b016189, 0x7da08661, 0x9c9bf696,
+0x417b1dbc, 0xa0406d4b, 0x86e18aa3, 0x67dafa54,
+0xcba24573, 0x2a993584, 0x0c38d26c, 0xed03a29b,
+0x5125dad3, 0xb01eaa24, 0x96bf4dcc, 0x77843d3b,
+0xdbfc821c, 0x3ac7f2eb, 0x1c661503, 0xfd5d65f4,
+0x61c69362, 0x80fde395, 0xa65c047d, 0x4767748a,
+0xeb1fcbad, 0x0a24bb5a, 0x2c855cb2, 0xcdbe2c45,
+0x7198540d, 0x90a324fa, 0xb602c312, 0x5739b3e5,
+0xfb410cc2, 0x1a7a7c35, 0x3cdb9bdd, 0xdde0eb2a,
+0x82f63b78, 0x63cd4b8f, 0x456cac67, 0xa457dc90,
+0x082f63b7, 0xe9141340, 0xcfb5f4a8, 0x2e8e845f,
+0x92a8fc17, 0x73938ce0, 0x55326b08, 0xb4091bff,
+0x1871a4d8, 0xf94ad42f, 0xdfeb33c7, 0x3ed04330,
+0xa24bb5a6, 0x4370c551, 0x65d122b9, 0x84ea524e,
+0x2892ed69, 0xc9a99d9e, 0xef087a76, 0x0e330a81,
+0xb21572c9, 0x532e023e, 0x758fe5d6, 0x94b49521,
+0x38cc2a06, 0xd9f75af1, 0xff56bd19, 0x1e6dcdee,
+0xc38d26c4, 0x22b65633, 0x0417b1db, 0xe52cc12c,
+0x49547e0b, 0xa86f0efc, 0x8ecee914, 0x6ff599e3,
+0xd3d3e1ab, 0x32e8915c, 0x144976b4, 0xf5720643,
+0x590ab964, 0xb831c993, 0x9e902e7b, 0x7fab5e8c,
+0xe330a81a, 0x020bd8ed, 0x24aa3f05, 0xc5914ff2,
+0x69e9f0d5, 0x88d28022, 0xae7367ca, 0x4f48173d,
+0xf36e6f75, 0x12551f82, 0x34f4f86a, 0xd5cf889d,
+0x79b737ba, 0x988c474d, 0xbe2da0a5, 0x5f16d052
+};
+
+/*
+ * eFuse calculation is shown here:
+ *  
https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilskey/src/xilskey_utils.c#L1496
+ *
+ * Each u32 word is appended a 5-bit value, for a total of 37 bits; see:
+ *  
https://github.com/Xilinx/embeddedsw/blob/release-2019.2/lib/sw_services/xilskey/src/xilskey_utils.c#L1356
+ */
+uint32_t crc = prev_crc;
+const unsigned rshf = 7;
+const uint32_t im = (1 << rshf) - 1;
+const uint32_t rm = (1 << (32 - rshf)) - 1;
+const uint32_t i2 = (1 << 2) - 1;
+const uint32_t r2 = (1 << 30) - 1;
+
+unsigned j;
+uint32_t i, r;
+uint64_t w;
+
+w = (uint64_t)(addr) << 32;
+w |= data;
+
+/* Feed 35 bits, in 5 rounds, each a slice of 7 bits */
+for (j = 0; j < 5; j++) {
+r = rm & (crc >> rshf);
+

[PATCH 6/9] hw/arm: xlnx-versal: Add Xilinx BBRAM device

2021-08-18 Thread Tong Ho

Connect the support for Versal Battery-Backed RAM (BBRAM)

Signed-off-by: Tong Ho 
---
 hw/arm/xlnx-versal-virt.c| 21 +
 hw/arm/xlnx-versal.c | 18 ++
 include/hw/arm/xlnx-versal.h |  5 +
 3 files changed, 44 insertions(+)

diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index 5bca360dce..d9e2a6a853 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -356,6 +356,26 @@ static void fdt_add_rtc_node(VersalVirt *s)
 g_free(name);
 }
 
+static void fdt_add_bbram_node(VersalVirt *s)
+{
+const char compat[] = TYPE_XLNX_BBRAM;
+const char interrupt_names[] = "bbram-error";
+char *name = g_strdup_printf("/bbram@%x", MM_PMC_BBRAM_CTRL);
+
+qemu_fdt_add_subnode(s->fdt, name);
+
+qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
+   GIC_FDT_IRQ_TYPE_SPI, VERSAL_BBRAM_APB_IRQ_0,
+   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+qemu_fdt_setprop(s->fdt, name, "interrupt-names",
+ interrupt_names, sizeof(interrupt_names));
+qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+ 2, MM_PMC_BBRAM_CTRL,
+ 2, MM_PMC_BBRAM_CTRL_SIZE);
+qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
+g_free(name);
+}
+
 static void fdt_nop_memory_nodes(void *fdt, Error **errp)
 {
 Error *err = NULL;
@@ -570,6 +590,7 @@ static void versal_virt_init(MachineState *machine)
 fdt_add_usb_xhci_nodes(s);
 fdt_add_sd_nodes(s);
 fdt_add_rtc_node(s);
+fdt_add_bbram_node(s);
 fdt_add_cpu_nodes(s, psci_conduit);
 fdt_add_clk_node(s, "/clk125", 12500, s->phandle.clk_125Mhz);
 fdt_add_clk_node(s, "/clk25", 2500, s->phandle.clk_25Mhz);
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index fb776834f7..46d7f42a6b 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -312,6 +312,23 @@ static void versal_create_xrams(Versal *s, qemu_irq *pic)
 }
 }
 
+static void versal_create_bbram(Versal *s, qemu_irq *pic)
+{
+SysBusDevice *sbd;
+
+object_initialize_child_with_props(OBJECT(s), "bbram", &s->pmc.bbram,
+   sizeof(s->pmc.bbram), TYPE_XLNX_BBRAM,
+   &error_fatal,
+   "crc-zpads", "0",
+   NULL);
+sbd = SYS_BUS_DEVICE(&s->pmc.bbram);
+
+sysbus_realize(sbd, &error_fatal);
+memory_region_add_subregion(&s->mr_ps, MM_PMC_BBRAM_CTRL,
+sysbus_mmio_get_region(sbd, 0));
+sysbus_connect_irq(sbd, 0, pic[VERSAL_BBRAM_APB_IRQ_0]);
+}
+
 /* This takes the board allocated linear DDR memory and creates aliases
  * for each split DDR range/aperture on the Versal address map.
  */
@@ -398,6 +415,7 @@ static void versal_realize(DeviceState *dev, Error **errp)
 versal_create_sds(s, pic);
 versal_create_rtc(s, pic);
 versal_create_xrams(s, pic);
+versal_create_bbram(s, pic);
 versal_map_ddr(s);
 versal_unimp(s);
 
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
index 22a8fa5d11..7719e8c4d2 100644
--- a/include/hw/arm/xlnx-versal.h
+++ b/include/hw/arm/xlnx-versal.h
@@ -24,6 +24,7 @@
 #include "qom/object.h"
 #include "hw/usb/xlnx-usb-subsystem.h"
 #include "hw/misc/xlnx-versal-xramc.h"
+#include "hw/nvram/xlnx-bbram.h"
 
 #define TYPE_XLNX_VERSAL "xlnx-versal"
 OBJECT_DECLARE_SIMPLE_TYPE(Versal, XLNX_VERSAL)
@@ -79,6 +80,7 @@ struct Versal {
 } iou;
 
 XlnxZynqMPRTC rtc;
+XlnxBBRam bbram;
 } pmc;
 
 struct {
@@ -105,6 +107,7 @@ struct Versal {
 #define VERSAL_GEM1_WAKE_IRQ_0 59
 #define VERSAL_ADMA_IRQ_0  60
 #define VERSAL_XRAM_IRQ_0  79
+#define VERSAL_BBRAM_APB_IRQ_0 121
 #define VERSAL_RTC_APB_ERR_IRQ 121
 #define VERSAL_SD0_IRQ_0   126
 #define VERSAL_RTC_ALARM_IRQ   142
@@ -170,6 +173,8 @@ struct Versal {
 
 #define MM_PMC_SD0  0xf104U
 #define MM_PMC_SD0_SIZE 0x1
+#define MM_PMC_BBRAM_CTRL   0xf11f
+#define MM_PMC_BBRAM_CTRL_SIZE  0x00050
 #define MM_PMC_CRP  0xf126U
 #define MM_PMC_CRP_SIZE 0x1
 #define MM_PMC_RTC  0xf12a
-- 
2.25.1

[PATCH 8/9] hw/arm: xlnx-zynqmp: Add Xilinx BBRAM device

2021-08-18 Thread Tong Ho

Connect the support for Xilinx ZynqMP Battery-Backed RAM (BBRAM)

Signed-off-by: Tong Ho 
---
 hw/arm/xlnx-zynqmp.c | 21 +
 include/hw/arm/xlnx-zynqmp.h |  2 ++
 2 files changed, 23 insertions(+)

diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 3597e8db4d..8e39b7d6c7 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -62,6 +62,9 @@
 #define RTC_ADDR0xffa6
 #define RTC_IRQ 26
 
+#define BBRAM_ADDR  0xffcd
+#define BBRAM_IRQ   11
+
 #define SDHCI_CAPABILITIES  0x280737ec6481 /* Datasheet: UG1085 (v1.7) */
 
 static const uint64_t gem_addr[XLNX_ZYNQMP_NUM_GEMS] = {
@@ -222,6 +225,22 @@ static void xlnx_zynqmp_create_rpu(MachineState *ms, 
XlnxZynqMPState *s,
 qdev_realize(DEVICE(&s->rpu_cluster), NULL, &error_fatal);
 }
 
+static void xlnx_zynqmp_create_bbram(XlnxZynqMPState *s, qemu_irq *gic)
+{
+SysBusDevice *sbd;
+
+object_initialize_child_with_props(OBJECT(s), "bbram", &s->bbram,
+   sizeof(s->bbram), TYPE_XLNX_BBRAM,
+   &error_fatal,
+   "crc-zpads", "1",
+   NULL);
+sbd = SYS_BUS_DEVICE(&s->bbram);
+
+sysbus_realize(sbd, &error_fatal);
+sysbus_mmio_map(sbd, 0, BBRAM_ADDR);
+sysbus_connect_irq(sbd, 0, gic[BBRAM_IRQ]);
+}
+
 static void xlnx_zynqmp_init(Object *obj)
 {
 MachineState *ms = MACHINE(qdev_get_machine());
@@ -616,6 +635,8 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
**errp)
 sysbus_mmio_map(SYS_BUS_DEVICE(&s->rtc), 0, RTC_ADDR);
 sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0, gic_spi[RTC_IRQ]);
 
+xlnx_zynqmp_create_bbram(s, gic_spi);
+
 for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) {
 if (!object_property_set_uint(OBJECT(&s->gdma[i]), "bus-width", 128,
   errp)) {
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index d3e2ef97f6..07ebcefbab 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -36,6 +36,7 @@
 #include "qom/object.h"
 #include "net/can_emu.h"
 #include "hw/dma/xlnx_csu_dma.h"
+#include "hw/nvram/xlnx-bbram.h"
 
 #define TYPE_XLNX_ZYNQMP "xlnx-zynqmp"
 OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP)
@@ -95,6 +96,7 @@ struct XlnxZynqMPState {
 
 MemoryRegion *ddr_ram;
 MemoryRegion ddr_ram_low, ddr_ram_high;
+XlnxBBRam bbram;
 
 CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
 CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
-- 
2.25.1

[PATCH v6 14/16] machine: Put all sanity-check in the generic SMP parser

2021-08-18 Thread Yanan Wang

Put both sanity-check of the input SMP configuration and sanity-check
of the output SMP configuration uniformly in the generic parser. Then
machine_set_smp() will become cleaner, also all the invalid scenarios
can be tested only by calling the parser.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
Reviewed-by: Pankaj Gupta 
---
 hw/core/machine.c | 63 +++
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1bdeff32b3..5b62ba7e34 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -813,6 +813,20 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 unsigned threads = config->has_threads ? config->threads : 0;
 unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
 
+/*
+ * Specified CPU topology parameters must be greater than zero,
+ * explicit configuration like "cpus=0" is not allowed.
+ */
+if ((config->has_cpus && config->cpus == 0) ||
+(config->has_sockets && config->sockets == 0) ||
+(config->has_dies && config->dies == 0) ||
+(config->has_cores && config->cores == 0) ||
+(config->has_threads && config->threads == 0) ||
+(config->has_maxcpus && config->maxcpus == 0)) {
+warn_report("Invalid CPU topology deprecated: "
+"CPU topology parameters must be greater than zero");
+}
+
 /*
  * If not supported by the machine, a topology parameter must be
  * omitted or specified equal to 1.
@@ -892,6 +906,22 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
topo_msg, maxcpus, cpus);
 return;
 }
+
+if (ms->smp.cpus < mc->min_cpus) {
+error_setg(errp, "Invalid SMP CPUs %d. The min CPUs "
+   "supported by machine '%s' is %d",
+   ms->smp.cpus,
+   mc->name, mc->min_cpus);
+return;
+}
+
+if (ms->smp.max_cpus > mc->max_cpus) {
+error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
+   "supported by machine '%s' is %d",
+   ms->smp.max_cpus,
+   mc->name, mc->max_cpus);
+return;
+}
 }
 
 static void machine_get_smp(Object *obj, Visitor *v, const char *name,
@@ -914,7 +944,6 @@ static void machine_get_smp(Object *obj, Visitor *v, const 
char *name,
 static void machine_set_smp(Object *obj, Visitor *v, const char *name,
 void *opaque, Error **errp)
 {
-MachineClass *mc = MACHINE_GET_CLASS(obj);
 MachineState *ms = MACHINE(obj);
 SMPConfiguration *config;
 ERRP_GUARD();
@@ -923,40 +952,10 @@ static void machine_set_smp(Object *obj, Visitor *v, 
const char *name,
 return;
 }
 
-/*
- * Specified CPU topology parameters must be greater than zero,
- * explicit configuration like "cpus=0" is not allowed.
- */
-if ((config->has_cpus && config->cpus == 0) ||
-(config->has_sockets && config->sockets == 0) ||
-(config->has_dies && config->dies == 0) ||
-(config->has_cores && config->cores == 0) ||
-(config->has_threads && config->threads == 0) ||
-(config->has_maxcpus && config->maxcpus == 0)) {
-warn_report("Invalid CPU topology deprecated: "
-"CPU topology parameters must be greater than zero");
-}
-
 smp_parse(ms, config, errp);
 if (*errp) {
-goto out_free;
-}
-
-/* sanity-check smp_cpus and max_cpus against mc */
-if (ms->smp.cpus < mc->min_cpus) {
-error_setg(errp, "Invalid SMP CPUs %d. The min CPUs "
-   "supported by machine '%s' is %d",
-   ms->smp.cpus,
-   mc->name, mc->min_cpus);
-} else if (ms->smp.max_cpus > mc->max_cpus) {
-error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
-   "supported by machine '%s' is %d",
-   ms->smp.max_cpus,
-   mc->name, mc->max_cpus);
+qapi_free_SMPConfiguration(config);
 }
-
-out_free:
-qapi_free_SMPConfiguration(config);
 }
 
 static void machine_class_init(ObjectClass *oc, void *data)
-- 
2.19.1

[PATCH v6 12/16] machine: Remove smp_parse callback from MachineClass

2021-08-18 Thread Yanan Wang

Now we have a generic smp parser for all arches, and there will
not be any other arch specific ones, so let's remove the callback
from MachineClass and call the parser directly.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
---
 hw/core/machine.c   | 3 +--
 include/hw/boards.h | 5 -
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 4b5c943f8e..ca7ca68ae3 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -937,7 +937,7 @@ static void machine_set_smp(Object *obj, Visitor *v, const 
char *name,
 "CPU topology parameters must be greater than zero");
 }
 
-mc->smp_parse(ms, config, errp);
+smp_parse(ms, config, errp);
 if (*errp) {
 goto out_free;
 }
@@ -966,7 +966,6 @@ static void machine_class_init(ObjectClass *oc, void *data)
 /* Default 128 MB as guest ram size */
 mc->default_ram_size = 128 * MiB;
 mc->rom_file_has_mr = true;
-mc->smp_parse = smp_parse;
 
 /* numa node memory size aligned on 8MB by default.
  * On Linux, each node's border has to be 8MB aligned
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 72a23e4e0f..fa284e01e9 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -177,10 +177,6 @@ typedef struct {
  *kvm-type may be NULL if it is not needed.
  * @numa_mem_supported:
  *true if '--numa node.mem' option is supported and false otherwise
- * @smp_parse:
- *The function pointer to hook different machine specific functions for
- *parsing "smp-opts" from QemuOpts to MachineState::CpuTopology and more
- *machine specific topology fields, such as smp_dies for PCMachine.
  * @hotplug_allowed:
  *If the hook is provided, then it'll be called for each device
  *hotplug to check whether the device hotplug is allowed.  Return
@@ -217,7 +213,6 @@ struct MachineClass {
 void (*reset)(MachineState *state);
 void (*wakeup)(MachineState *state);
 int (*kvm_type)(MachineState *machine, const char *arg);
-void (*smp_parse)(MachineState *ms, SMPConfiguration *config, Error 
**errp);
 
 BlockInterfaceType block_default_type;
 int units_per_default_bus;
-- 
2.19.1

[PATCH v6 16/16] tests/unit: Add a unit test for smp parsing

2021-08-18 Thread Yanan Wang

Add a QEMU unit test for the parsing of given SMP configuration.
Since all the parsing logic is in generic function smp_parse(),
this test passes different SMP configurations to the function
and compare the parsing result with what is expected.

In the test, all possible collections of the topology parameters
and the corresponding expected results are listed, including the
valid and invalid ones.

The preference of sockets over cores and the preference of cores
over sockets, and the support of multi-dies are also considered.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
---
 MAINTAINERS |   1 +
 tests/unit/meson.build  |   1 +
 tests/unit/test-smp-parse.c | 866 
 3 files changed, 868 insertions(+)
 create mode 100644 tests/unit/test-smp-parse.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 1e03352501..64255fecd4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1639,6 +1639,7 @@ F: include/hw/boards.h
 F: include/hw/core/cpu.h
 F: include/hw/cpu/cluster.h
 F: include/sysemu/numa.h
+F: tests/unit/test-smp-parse.c
 T: git https://gitlab.com/ehabkost/qemu.git machine-next
 
 Xtensa Machines
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
index 5736d285b2..e208173970 100644
--- a/tests/unit/meson.build
+++ b/tests/unit/meson.build
@@ -45,6 +45,7 @@ tests = {
   'test-uuid': [],
   'ptimer-test': ['ptimer-test-stubs.c', meson.source_root() / 
'hw/core/ptimer.c'],
   'test-qapi-util': [],
+  'test-smp-parse': [qom, meson.source_root() / 'hw/core/machine-smp.c'],
 }
 
 if have_system or have_tools
diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
new file mode 100644
index 00..836dfd320f
--- /dev/null
+++ b/tests/unit/test-smp-parse.c
@@ -0,0 +1,866 @@
+/*
+ * SMP parsing unit-tests
+ *
+ * Copyright (c) 2021 Huawei Technologies Co., Ltd
+ *
+ * Authors:
+ *  Yanan Wang 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "qemu/module.h"
+#include "qapi/error.h"
+
+#include "hw/boards.h"
+
+#define T true
+#define F false
+
+#define MIN_CPUS 1
+#define MAX_CPUS 512
+
+/* define a CPU topology hierarchy of sockets/cores/threads */
+#define SMP_CONFIG_GENERIC(ha, a, hb, b, hc, c, hd, d, he, e) \
+{ \
+.has_cpus= ha, .cpus= a,  \
+.has_sockets = hb, .sockets = b,  \
+.has_cores   = hc, .cores   = c,  \
+.has_threads = hd, .threads = d,  \
+.has_maxcpus = he, .maxcpus = e,  \
+}
+
+#define CPU_TOPOLOGY_GENERIC(a, b, c, d, e)   \
+{ \
+.cpus = a,\
+.sockets  = b,\
+.cores= c,\
+.threads  = d,\
+.max_cpus = e,\
+}
+
+/* define a CPU topology hierarchy of sockets/dies/cores/threads */
+#define SMP_CONFIG_WITH_DIES(ha, a, hb, b, hc, c, hd, d, he, e, hf, f) \
+{ \
+.has_cpus= ha, .cpus= a,  \
+.has_sockets = hb, .sockets = b,  \
+.has_dies= hc, .dies= c,  \
+.has_cores   = hd, .cores   = d,  \
+.has_threads = he, .threads = e,  \
+.has_maxcpus = hf, .maxcpus = f,  \
+}
+
+#define CPU_TOPOLOGY_WITH_DIES(a, b, c, d, e, f)  \
+{ \
+.cpus = a,\
+.sockets  = b,\
+.dies = c,\
+.cores= d,\
+.threads  = e,\
+.max_cpus = f,\
+}
+
+/**
+ * SMPTestData:
+ * @config - the given SMP configuration
+ * @expect_prefer_sockets - expected topology result for the valid
+ * configuration, when sockets are preferred over cores in parsing
+ * @expect_prefer_cores - expected topology result for the valid
+ * configuration, when cores are preferred over sockets in parsing
+ * @expect_error - expected error report for the invalid configuration
+ */
+typedef struct SMPTestData {
+SMPConfiguration config;
+CpuTopology expect_prefer_sockets;
+CpuTopology expect_prefer_cores;
+const char *expect_error;
+} SMPTestData;
+
+

[PATCH v6 13/16] machine: Move smp_prefer_sockets to struct SMPCompatProps

2021-08-18 Thread Yanan Wang

Now we have a common structure SMPCompatProps used to store information
about SMP compatibility stuff, so we can also move smp_prefer_sockets
there for cleaner code.

No functional change intended.

Signed-off-by: Yanan Wang 
Acked-by: David Gibson 
Reviewed-by: Andrew Jones 
---
 hw/arm/virt.c  | 2 +-
 hw/core/machine.c  | 2 +-
 hw/i386/pc_piix.c  | 2 +-
 hw/i386/pc_q35.c   | 2 +-
 hw/ppc/spapr.c | 2 +-
 hw/s390x/s390-virtio-ccw.c | 2 +-
 include/hw/boards.h| 3 ++-
 7 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 7babea40dc..ae029680da 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2797,7 +2797,7 @@ static void virt_machine_6_1_options(MachineClass *mc)
 {
 virt_machine_6_2_options(mc);
 compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
-mc->smp_prefer_sockets = true;
+mc->smp_props.prefer_sockets = true;
 }
 DEFINE_VIRT_MACHINE(6, 1)
 
diff --git a/hw/core/machine.c b/hw/core/machine.c
index ca7ca68ae3..1bdeff32b3 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -836,7 +836,7 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 } else {
 maxcpus = maxcpus > 0 ? maxcpus : cpus;
 
-if (mc->smp_prefer_sockets) {
+if (mc->smp_props.prefer_sockets) {
 /* prefer sockets over cores before 6.2 */
 if (sockets == 0) {
 cores = cores > 0 ? cores : 1;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 9b811fc6ca..a60ebfc2c1 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -432,7 +432,7 @@ static void pc_i440fx_6_1_machine_options(MachineClass *m)
 m->is_default = false;
 compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
 compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
-m->smp_prefer_sockets = true;
+m->smp_props.prefer_sockets = true;
 }
 
 DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 88efb7fde4..4b622ffb82 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -372,7 +372,7 @@ static void pc_q35_6_1_machine_options(MachineClass *m)
 m->alias = NULL;
 compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
 compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
-m->smp_prefer_sockets = true;
+m->smp_props.prefer_sockets = true;
 }
 
 DEFINE_Q35_MACHINE(v6_1, "pc-q35-6.1", NULL,
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a481fade51..efdea43c0d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4702,7 +4702,7 @@ static void spapr_machine_6_1_class_options(MachineClass 
*mc)
 {
 spapr_machine_6_2_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
-mc->smp_prefer_sockets = true;
+mc->smp_props.prefer_sockets = true;
 }
 
 DEFINE_SPAPR_MACHINE(6_1, "6.1", false);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index b40e647883..5bdef9b4d7 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -809,7 +809,7 @@ static void ccw_machine_6_1_class_options(MachineClass *mc)
 {
 ccw_machine_6_2_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
-mc->smp_prefer_sockets = true;
+mc->smp_props.prefer_sockets = true;
 }
 DEFINE_CCW_MACHINE(6_1, "6.1", false);
 
diff --git a/include/hw/boards.h b/include/hw/boards.h
index fa284e01e9..5adbcbb99b 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -110,9 +110,11 @@ typedef struct {
 
 /**
  * SMPCompatProps:
+ * @prefer_sockets - whether sockets are preferred over cores in smp parsing
  * @dies_supported - whether dies are supported by the machine
  */
 typedef struct {
+bool prefer_sockets;
 bool dies_supported;
 } SMPCompatProps;
 
@@ -250,7 +252,6 @@ struct MachineClass {
 bool nvdimm_supported;
 bool numa_mem_supported;
 bool auto_enable_numa;
-bool smp_prefer_sockets;
 SMPCompatProps smp_props;
 const char *default_ram_id;
 
-- 
2.19.1

[Patch 2/2] hw/arm/xlnx-zynqmp: Add unimplemented APU mmio

2021-08-18 Thread Tong Ho

Add unimplemented APU mmio region to xlnx-zynqmp for booting
bare-metal guests built with standalone bsp published at:
  
https://github.com/Xilinx/embeddedsw/tree/master/lib/bsp/standalone/src/arm/ARMv8/64bit

Signed-off-by: Tong Ho 
---
 hw/arm/xlnx-zynqmp.c | 32 
 include/hw/arm/xlnx-zynqmp.h |  7 +++
 2 files changed, 39 insertions(+)

diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 3597e8db4d..790df2b6f1 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -20,6 +20,7 @@
 #include "qemu/module.h"
 #include "hw/arm/xlnx-zynqmp.h"
 #include "hw/intc/arm_gic_common.h"
+#include "hw/misc/unimp.h"
 #include "hw/boards.h"
 #include "sysemu/kvm.h"
 #include "sysemu/sysemu.h"
@@ -56,6 +57,9 @@
 #define DPDMA_ADDR  0xfd4c
 #define DPDMA_IRQ   116
 
+#define APU_ADDR0xfd5c
+#define APU_SIZE0x100
+
 #define IPI_ADDR0xFF30
 #define IPI_IRQ 64
 
@@ -222,6 +226,32 @@ static void xlnx_zynqmp_create_rpu(MachineState *ms, 
XlnxZynqMPState *s,
 qdev_realize(DEVICE(&s->rpu_cluster), NULL, &error_fatal);
 }
 
+static void xlnx_zynqmp_create_unimp_mmio(XlnxZynqMPState *s)
+{
+static const struct UnimpInfo {
+const char *name;
+hwaddr base;
+hwaddr size;
+} unimp_areas[ARRAY_SIZE(s->mr_unimp)] = {
+{ .name = "apu", APU_ADDR, APU_SIZE },
+};
+
+unsigned nr;
+
+for (nr = 0; nr < ARRAY_SIZE(unimp_areas); nr++) {
+const struct UnimpInfo *info = &unimp_areas[nr];
+DeviceState *dev = qdev_new(TYPE_UNIMPLEMENTED_DEVICE);
+SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+qdev_prop_set_string(dev, "name", info->name);
+qdev_prop_set_uint64(dev, "size", info->size);
+object_property_add_child(OBJECT(s), info->name, OBJECT(dev));
+
+sysbus_realize_and_unref(sbd, &error_fatal);
+sysbus_mmio_map(sbd, 0, info->base);
+}
+}
+
 static void xlnx_zynqmp_init(Object *obj)
 {
 MachineState *ms = MACHINE(qdev_get_machine());
@@ -616,6 +646,8 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error 
**errp)
 sysbus_mmio_map(SYS_BUS_DEVICE(&s->rtc), 0, RTC_ADDR);
 sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0, gic_spi[RTC_IRQ]);
 
+xlnx_zynqmp_create_unimp_mmio(s);
+
 for (i = 0; i < XLNX_ZYNQMP_NUM_GDMA_CH; i++) {
 if (!object_property_set_uint(OBJECT(&s->gdma[i]), "bus-width", 128,
   errp)) {
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index d3e2ef97f6..c84fe15996 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -79,6 +79,11 @@ OBJECT_DECLARE_SIMPLE_TYPE(XlnxZynqMPState, XLNX_ZYNQMP)
 #define XLNX_ZYNQMP_MAX_RAM_SIZE (XLNX_ZYNQMP_MAX_LOW_RAM_SIZE + \
   XLNX_ZYNQMP_MAX_HIGH_RAM_SIZE)
 
+/*
+ * Unimplemented mmio regions needed to boot some images.
+ */
+#define XLNX_ZYNQMP_NUM_UNIMP_AREAS 1
+
 struct XlnxZynqMPState {
 /*< private >*/
 DeviceState parent_obj;
@@ -96,6 +101,8 @@ struct XlnxZynqMPState {
 MemoryRegion *ddr_ram;
 MemoryRegion ddr_ram_low, ddr_ram_high;
 
+MemoryRegion mr_unimp[XLNX_ZYNQMP_NUM_UNIMP_AREAS];
+
 CadenceGEMState gem[XLNX_ZYNQMP_NUM_GEMS];
 CadenceUARTState uart[XLNX_ZYNQMP_NUM_UARTS];
 XlnxZynqMPCANState can[XLNX_ZYNQMP_NUM_CAN];
-- 
2.25.1

[PATCH v6 10/16] machine: Tweak the order of topology members in struct CpuTopology

2021-08-18 Thread Yanan Wang

Now that all the possible topology parameters are integrated in struct
CpuTopology, tweak the order of topology members to be "cpus/sockets/
dies/cores/threads/maxcpus" for readability and consistency. We also
tweak the comment by adding explanation of dies parameter.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
Reviewed-by: Pankaj Gupta 
---
 hw/core/machine.c   | 8 
 include/hw/boards.h | 7 ---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 1ad5dac3e8..a21fcd7700 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -829,11 +829,11 @@ static void machine_get_smp(Object *obj, Visitor *v, 
const char *name,
 {
 MachineState *ms = MACHINE(obj);
 SMPConfiguration *config = &(SMPConfiguration){
-.has_cores = true, .cores = ms->smp.cores,
+.has_cpus = true, .cpus = ms->smp.cpus,
 .has_sockets = true, .sockets = ms->smp.sockets,
 .has_dies = true, .dies = ms->smp.dies,
+.has_cores = true, .cores = ms->smp.cores,
 .has_threads = true, .threads = ms->smp.threads,
-.has_cpus = true, .cpus = ms->smp.cpus,
 .has_maxcpus = true, .maxcpus = ms->smp.max_cpus,
 };
 if (!visit_type_SMPConfiguration(v, name, &config, &error_abort)) {
@@ -1060,10 +1060,10 @@ static void machine_initfn(Object *obj)
 /* default to mc->default_cpus */
 ms->smp.cpus = mc->default_cpus;
 ms->smp.max_cpus = mc->default_cpus;
-ms->smp.cores = 1;
+ms->smp.sockets = 1;
 ms->smp.dies = 1;
+ms->smp.cores = 1;
 ms->smp.threads = 1;
-ms->smp.sockets = 1;
 }
 
 static void machine_finalize(Object *obj)
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 2ae039b74f..2a1bba86c0 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -275,17 +275,18 @@ typedef struct DeviceMemoryState {
 /**
  * CpuTopology:
  * @cpus: the number of present logical processors on the machine
- * @cores: the number of cores in one package
- * @threads: the number of threads in one core
  * @sockets: the number of sockets on the machine
+ * @dies: the number of dies in one socket
+ * @cores: the number of cores in one die
+ * @threads: the number of threads in one core
  * @max_cpus: the maximum number of logical processors on the machine
  */
 typedef struct CpuTopology {
 unsigned int cpus;
+unsigned int sockets;
 unsigned int dies;
 unsigned int cores;
 unsigned int threads;
-unsigned int sockets;
 unsigned int max_cpus;
 } CpuTopology;
 
-- 
2.19.1

[PATCH v6 11/16] machine: Make smp_parse generic enough for all arches

2021-08-18 Thread Yanan Wang

Currently the only difference between smp_parse and pc_smp_parse
is the support of dies parameter and the related error reporting.
With some arch compat variables like "bool dies_supported", we can
make smp_parse generic enough for all arches and the PC specific
one can be removed.

Making smp_parse() generic enough can reduce code duplication and
ease the code maintenance, and also allows extending the topology
with more arch specific members (e.g., clusters) in the future.

Suggested-by: Andrew Jones 
Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
---
 hw/core/machine.c   | 110 
 hw/i386/pc.c|  84 +
 include/hw/boards.h |   9 
 3 files changed, 100 insertions(+), 103 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index a21fcd7700..4b5c943f8e 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -15,6 +15,7 @@
 #include "qapi/qmp/qerror.h"
 #include "sysemu/replay.h"
 #include "qemu/units.h"
+#include "qemu/cutils.h"
 #include "hw/boards.h"
 #include "hw/loader.h"
 #include "qapi/error.h"
@@ -746,20 +747,87 @@ void machine_set_cpu_numa_node(MachineState *machine,
 }
 }
 
+static char *cpu_topology_hierarchy(MachineState *ms)
+{
+MachineClass *mc = MACHINE_GET_CLASS(ms);
+SMPCompatProps *smp_props = &mc->smp_props;
+char topo_msg[256] = "";
+
+/*
+ * Topology members should be ordered from the largest to the smallest.
+ * Concept of sockets/cores/threads is supported by default and will be
+ * reported in the hierarchy. Unsupported members will not be reported.
+ */
+g_autofree char *sockets_msg = g_strdup_printf(
+" * sockets (%u)", ms->smp.sockets);
+pstrcat(topo_msg, sizeof(topo_msg), sockets_msg);
+
+if (smp_props->dies_supported) {
+g_autofree char *dies_msg = g_strdup_printf(
+" * dies (%u)", ms->smp.dies);
+pstrcat(topo_msg, sizeof(topo_msg), dies_msg);
+}
+
+g_autofree char *cores_msg = g_strdup_printf(
+" * cores (%u)", ms->smp.cores);
+pstrcat(topo_msg, sizeof(topo_msg), cores_msg);
+
+g_autofree char *threads_msg = g_strdup_printf(
+" * threads (%u)", ms->smp.threads);
+pstrcat(topo_msg, sizeof(topo_msg), threads_msg);
+
+return g_strdup_printf("%s", topo_msg + 3);
+}
+
+/*
+ * smp_parse - Generic function used to parse the given SMP configuration
+ *
+ * If not supported by the machine, a topology parameter must be omitted
+ * or specified equal to 1. Concept of sockets/cores/threads is supported
+ * by default. Unsupported members will not be reported in the topology
+ * hierarchy message.
+ *
+ * For compatibility, omitted arch-specific members (e.g. dies) will not
+ * be computed, but will directly default to 1 instead. This logic should
+ * also apply to future introduced ones.
+ *
+ * Omitted arch-neutral parameters (i.e. cpus/sockets/cores/threads/maxcpus)
+ * will be computed based on the provided ones. When both maxcpus and cpus
+ * are omitted, maxcpus will be computed from the given parameters and cpus
+ * will be set equal to maxcpus. When only one of maxcpus and cpus is given
+ * then the omitted one will be set to its given counterpart's value.
+ * Both maxcpus and cpus may be specified, but maxcpus must be equal to or
+ * greater than cpus.
+ *
+ * In calculation of omitted sockets/cores/threads, we prefer sockets over
+ * cores over threads before 6.2, while preferring cores over sockets over
+ * threads since 6.2.
+ */
 static void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp)
 {
 MachineClass *mc = MACHINE_GET_CLASS(ms);
 unsigned cpus= config->has_cpus ? config->cpus : 0;
 unsigned sockets = config->has_sockets ? config->sockets : 0;
+unsigned dies= config->has_dies ? config->dies : 0;
 unsigned cores   = config->has_cores ? config->cores : 0;
 unsigned threads = config->has_threads ? config->threads : 0;
 unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
 
-if (config->has_dies && config->dies > 1) {
+/*
+ * If not supported by the machine, a topology parameter must be
+ * omitted or specified equal to 1.
+ */
+if (!mc->smp_props.dies_supported && dies > 1) {
 error_setg(errp, "dies not supported by this machine's CPU topology");
 return;
 }
 
+/*
+ * Omitted arch-specific members will not be computed, but will
+ * directly default to 1 instead.
+ */
+dies = dies > 0 ? dies : 1;
+
 /* compute missing values based on the provided ones */
 if (cpus == 0 && maxcpus == 0) {
 sockets = sockets > 0 ? sockets : 1;
@@ -773,55 +841,57 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 if (sockets == 0) {
 cores = cores > 0 ? cores : 1;
 threads = threads > 0 ? threads : 1;
-sockets = maxcpus /

[Patch 1/2] hw/arm/xlnx-versal: Add unimplemented APU mmio

2021-08-18 Thread Tong Ho

Add unimplemented APU mmio region to xlnx-versal for booting
bare-metal guests built with standalone bsp published at:
  
https://github.com/Xilinx/embeddedsw/tree/master/lib/bsp/standalone/src/arm/ARMv8/64bit

Signed-off-by: Tong Ho 
---
 hw/arm/xlnx-versal.c | 2 ++
 include/hw/arm/xlnx-versal.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index fb776834f7..cb6ec0a4a0 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -376,6 +376,8 @@ static void versal_unimp(Versal *s)
 MM_CRL, MM_CRL_SIZE);
 versal_unimp_area(s, "crf", &s->mr_ps,
 MM_FPD_CRF, MM_FPD_CRF_SIZE);
+versal_unimp_area(s, "apu", &s->mr_ps,
+MM_FPD_FPD_APU, MM_FPD_FPD_APU_SIZE);
 versal_unimp_area(s, "crp", &s->mr_ps,
 MM_PMC_CRP, MM_PMC_CRP_SIZE);
 versal_unimp_area(s, "iou-scntr", &s->mr_ps,
diff --git a/include/hw/arm/xlnx-versal.h b/include/hw/arm/xlnx-versal.h
index 22a8fa5d11..9b79051747 100644
--- a/include/hw/arm/xlnx-versal.h
+++ b/include/hw/arm/xlnx-versal.h
@@ -167,6 +167,8 @@ struct Versal {
 #define MM_IOU_SCNTRS_SIZE  0x1
 #define MM_FPD_CRF  0xfd1aU
 #define MM_FPD_CRF_SIZE 0x14
+#define MM_FPD_FPD_APU  0xfd5c
+#define MM_FPD_FPD_APU_SIZE 0x100
 
 #define MM_PMC_SD0  0xf104U
 #define MM_PMC_SD0_SIZE 0x1
-- 
2.25.1

[Patch 0/2] hw/arm/xlnx-versal: hw/arm/xlnx-zynqmp: Add unimplemented mmio

2021-08-18 Thread Tong Ho

This series adds the APU mmio region as an unimplemented device
to each of two Xilinx SoC to support booting guests built with
the standalone bsp published at:
  
https://github.com/Xilinx/embeddedsw/tree/master/lib/bsp/standalone/src/arm/ARMv8/64bit

Tong Ho (2):
  hw/arm/xlnx-versal: Add unimplemented APU mmio
  hw/arm/xlnx-zynqmp: Add unimplemented APU mmio

 hw/arm/xlnx-versal.c |  2 ++
 hw/arm/xlnx-zynqmp.c | 32 
 include/hw/arm/xlnx-versal.h |  2 ++
 include/hw/arm/xlnx-zynqmp.h |  7 +++
 4 files changed, 43 insertions(+)

-- 
2.25.1

[PATCH v6 08/16] machine: Prefer cores over sockets in smp parsing since 6.2

2021-08-18 Thread Yanan Wang

In the real SMP hardware topology world, it's much more likely that
we have high cores-per-socket counts and few sockets totally. While
the current preference of sockets over cores in smp parsing results
in a virtual cpu topology with low cores-per-sockets counts and a
large number of sockets, which is just contrary to the real world.

Given that it is better to make the virtual cpu topology be more
reflective of the real world and also for the sake of compatibility,
we start to prefer cores over sockets over threads in smp parsing
since machine type 6.2 for different arches.

In this patch, a boolean "smp_prefer_sockets" is added, and we only
enable the old preference on older machines and enable the new one
since type 6.2 for all arches by using the machine compat mechanism.

Suggested-by: Daniel P. Berrange 
Signed-off-by: Yanan Wang 
Acked-by: David Gibson 
Acked-by: Cornelia Huck 
Reviewed-by: Andrew Jones 
Reviewed-by: Pankaj Gupta 
---
 hw/arm/virt.c  |  1 +
 hw/core/machine.c  | 35 ++-
 hw/i386/pc.c   | 35 ++-
 hw/i386/pc_piix.c  |  1 +
 hw/i386/pc_q35.c   |  1 +
 hw/ppc/spapr.c |  1 +
 hw/s390x/s390-virtio-ccw.c |  1 +
 include/hw/boards.h|  1 +
 qemu-options.hx|  3 ++-
 9 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 01165f7f53..7babea40dc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2797,6 +2797,7 @@ static void virt_machine_6_1_options(MachineClass *mc)
 {
 virt_machine_6_2_options(mc);
 compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+mc->smp_prefer_sockets = true;
 }
 DEFINE_VIRT_MACHINE(6, 1)
 
diff --git a/hw/core/machine.c b/hw/core/machine.c
index f1b30b3101..0df597f99c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -748,6 +748,7 @@ void machine_set_cpu_numa_node(MachineState *machine,
 
 static void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp)
 {
+MachineClass *mc = MACHINE_GET_CLASS(ms);
 unsigned cpus= config->has_cpus ? config->cpus : 0;
 unsigned sockets = config->has_sockets ? config->sockets : 0;
 unsigned cores   = config->has_cores ? config->cores : 0;
@@ -759,7 +760,7 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 return;
 }
 
-/* compute missing values, prefer sockets over cores over threads */
+/* compute missing values based on the provided ones */
 if (cpus == 0 && maxcpus == 0) {
 sockets = sockets > 0 ? sockets : 1;
 cores = cores > 0 ? cores : 1;
@@ -767,14 +768,30 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 } else {
 maxcpus = maxcpus > 0 ? maxcpus : cpus;
 
-if (sockets == 0) {
-cores = cores > 0 ? cores : 1;
-threads = threads > 0 ? threads : 1;
-sockets = maxcpus / (cores * threads);
-} else if (cores == 0) {
-threads = threads > 0 ? threads : 1;
-cores = maxcpus / (sockets * threads);
-} else if (threads == 0) {
+if (mc->smp_prefer_sockets) {
+/* prefer sockets over cores before 6.2 */
+if (sockets == 0) {
+cores = cores > 0 ? cores : 1;
+threads = threads > 0 ? threads : 1;
+sockets = maxcpus / (cores * threads);
+} else if (cores == 0) {
+threads = threads > 0 ? threads : 1;
+cores = maxcpus / (sockets * threads);
+}
+} else {
+/* prefer cores over sockets since 6.2 */
+if (cores == 0) {
+sockets = sockets > 0 ? sockets : 1;
+threads = threads > 0 ? threads : 1;
+cores = maxcpus / (sockets * threads);
+} else if (sockets == 0) {
+threads = threads > 0 ? threads : 1;
+sockets = maxcpus / (cores * threads);
+}
+}
+
+/* try to calculate omitted threads at last */
+if (threads == 0) {
 threads = maxcpus / (sockets * cores);
 }
 }
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index afd8b9c283..4b05ff7160 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -717,6 +717,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
  */
 static void pc_smp_parse(MachineState *ms, SMPConfiguration *config, Error 
**errp)
 {
+MachineClass *mc = MACHINE_GET_CLASS(ms);
 unsigned cpus= config->has_cpus ? config->cpus : 0;
 unsigned sockets = config->has_sockets ? config->sockets : 0;
 unsigned dies= config->has_dies ? config->dies : 0;
@@ -727,7 +728,7 @@ static void pc_smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **err
 /* directly default dies to 1 if it's omitted */
 dies = dies > 0 ? dies : 1;
 
-/* compute missing val

[PATCH v6 07/16] hw: Add compat machines for 6.2

2021-08-18 Thread Yanan Wang

Add 6.2 machine types for arm/i440fx/q35/s390x/spapr.

Signed-off-by: Yanan Wang 
Acked-by: David Gibson 
Reviewed-by: Andrew Jones 
Reviewed-by: Cornelia Huck 
Reviewed-by: Pankaj Gupta 
---
 hw/arm/virt.c  |  9 -
 hw/core/machine.c  |  3 +++
 hw/i386/pc.c   |  3 +++
 hw/i386/pc_piix.c  | 14 +-
 hw/i386/pc_q35.c   | 13 -
 hw/ppc/spapr.c | 15 +--
 hw/s390x/s390-virtio-ccw.c | 14 +-
 include/hw/boards.h|  3 +++
 include/hw/i386/pc.h   |  3 +++
 9 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 81eda46b0b..01165f7f53 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2788,10 +2788,17 @@ static void machvirt_machine_init(void)
 }
 type_init(machvirt_machine_init);
 
+static void virt_machine_6_2_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE_AS_LATEST(6, 2)
+
 static void virt_machine_6_1_options(MachineClass *mc)
 {
+virt_machine_6_2_options(mc);
+compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
 }
-DEFINE_VIRT_MACHINE_AS_LATEST(6, 1)
+DEFINE_VIRT_MACHINE(6, 1)
 
 static void virt_machine_6_0_options(MachineClass *mc)
 {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 093c0d382d..f1b30b3101 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -37,6 +37,9 @@
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-pci.h"
 
+GlobalProperty hw_compat_6_1[] = {};
+const size_t hw_compat_6_1_len = G_N_ELEMENTS(hw_compat_6_1);
+
 GlobalProperty hw_compat_6_0[] = {
 { "gpex-pcihost", "allow-unmapped-accesses", "false" },
 { "i8042", "extended-state", "false"},
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index fcf6905219..afd8b9c283 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -94,6 +94,9 @@
 #include "trace.h"
 #include CONFIG_DEVICES
 
+GlobalProperty pc_compat_6_1[] = {};
+const size_t pc_compat_6_1_len = G_N_ELEMENTS(pc_compat_6_1);
+
 GlobalProperty pc_compat_6_0[] = {
 { "qemu64" "-" TYPE_X86_CPU, "family", "6" },
 { "qemu64" "-" TYPE_X86_CPU, "model", "6" },
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 30b8bd6ea9..fd5c2277f2 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -413,7 +413,7 @@ static void pc_i440fx_machine_options(MachineClass *m)
 machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE);
 }
 
-static void pc_i440fx_6_1_machine_options(MachineClass *m)
+static void pc_i440fx_6_2_machine_options(MachineClass *m)
 {
 PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 pc_i440fx_machine_options(m);
@@ -422,6 +422,18 @@ static void pc_i440fx_6_1_machine_options(MachineClass *m)
 pcmc->default_cpu_version = 1;
 }
 
+DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL,
+  pc_i440fx_6_2_machine_options);
+
+static void pc_i440fx_6_1_machine_options(MachineClass *m)
+{
+pc_i440fx_6_2_machine_options(m);
+m->alias = NULL;
+m->is_default = false;
+compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
+}
+
 DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL,
   pc_i440fx_6_1_machine_options);
 
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 04b4a4788d..b45903b15e 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -355,7 +355,7 @@ static void pc_q35_machine_options(MachineClass *m)
 m->max_cpus = 288;
 }
 
-static void pc_q35_6_1_machine_options(MachineClass *m)
+static void pc_q35_6_2_machine_options(MachineClass *m)
 {
 PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
 pc_q35_machine_options(m);
@@ -363,6 +363,17 @@ static void pc_q35_6_1_machine_options(MachineClass *m)
 pcmc->default_cpu_version = 1;
 }
 
+DEFINE_Q35_MACHINE(v6_2, "pc-q35-6.2", NULL,
+   pc_q35_6_2_machine_options);
+
+static void pc_q35_6_1_machine_options(MachineClass *m)
+{
+pc_q35_6_2_machine_options(m);
+m->alias = NULL;
+compat_props_add(m->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+compat_props_add(m->compat_props, pc_compat_6_1, pc_compat_6_1_len);
+}
+
 DEFINE_Q35_MACHINE(v6_1, "pc-q35-6.1", NULL,
pc_q35_6_1_machine_options);
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 81699d4f8b..d39fd4e644 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4685,15 +4685,26 @@ static void 
spapr_machine_latest_class_options(MachineClass *mc)
 }\
 type_init(spapr_machine_register_##suffix)
 
+/*
+ * pseries-6.2
+ */
+static void spapr_machine_6_2_class_options(MachineClass *mc)
+{
+/* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(6_2, "6.2", true);
+
 /*
  * pseries-6.1
  */
 static void spapr_machine_6_1_class_options(MachineClass *mc)
 {
-/* Defaults for the latest behaviour inherited from the base class */
+

[PATCH v6 15/16] machine: Split out the smp parsing code

2021-08-18 Thread Yanan Wang

We are going to introduce an unit test for the parser smp_parse()
in hw/core/machine.c, but now machine.c is only built in softmmu.

In order to solve the build dependency on the smp parsing code and
avoid building unrelated stuff for the unit tests, move the related
code from machine.c into a new common file, i.e., machine-smp.c.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
---
 MAINTAINERS   |   1 +
 hw/core/machine-smp.c | 200 ++
 hw/core/machine.c | 178 -
 hw/core/meson.build   |   1 +
 include/hw/boards.h   |   1 +
 5 files changed, 203 insertions(+), 178 deletions(-)
 create mode 100644 hw/core/machine-smp.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 6b3697962c..1e03352501 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1629,6 +1629,7 @@ F: cpu.c
 F: hw/core/cpu.c
 F: hw/core/machine-qmp-cmds.c
 F: hw/core/machine.c
+F: hw/core/machine-smp.c
 F: hw/core/null-machine.c
 F: hw/core/numa.c
 F: hw/cpu/cluster.c
diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
new file mode 100644
index 00..07604aef8d
--- /dev/null
+++ b/hw/core/machine-smp.c
@@ -0,0 +1,200 @@
+/*
+ * QEMU Machine (related to SMP)
+ *
+ * Copyright (c) 2021 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+
+static char *cpu_topology_hierarchy(MachineState *ms)
+{
+MachineClass *mc = MACHINE_GET_CLASS(ms);
+SMPCompatProps *smp_props = &mc->smp_props;
+char topo_msg[256] = "";
+
+/*
+ * Topology members should be ordered from the largest to the smallest.
+ * Concept of sockets/cores/threads is supported by default and will be
+ * reported in the hierarchy. Unsupported members will not be reported.
+ */
+g_autofree char *sockets_msg = g_strdup_printf(
+" * sockets (%u)", ms->smp.sockets);
+pstrcat(topo_msg, sizeof(topo_msg), sockets_msg);
+
+if (smp_props->dies_supported) {
+g_autofree char *dies_msg = g_strdup_printf(
+" * dies (%u)", ms->smp.dies);
+pstrcat(topo_msg, sizeof(topo_msg), dies_msg);
+}
+
+g_autofree char *cores_msg = g_strdup_printf(
+" * cores (%u)", ms->smp.cores);
+pstrcat(topo_msg, sizeof(topo_msg), cores_msg);
+
+g_autofree char *threads_msg = g_strdup_printf(
+" * threads (%u)", ms->smp.threads);
+pstrcat(topo_msg, sizeof(topo_msg), threads_msg);
+
+return g_strdup_printf("%s", topo_msg + 3);
+}
+
+/*
+ * smp_parse - Generic function used to parse the given SMP configuration
+ *
+ * If not supported by the machine, a topology parameter must be omitted
+ * or specified equal to 1. Concept of sockets/cores/threads is supported
+ * by default. Unsupported members will not be reported in the topology
+ * hierarchy message.
+ *
+ * For compatibility, omitted arch-specific members (e.g. dies) will not
+ * be computed, but will directly default to 1 instead. This logic should
+ * also apply to future introduced ones.
+ *
+ * Omitted arch-neutral parameters (i.e. cpus/sockets/cores/threads/maxcpus)
+ * will be computed based on the provided ones. When both maxcpus and cpus
+ * are omitted, maxcpus will be computed from the given parameters and cpus
+ * will be set equal to maxcpus. When only one of maxcpus and cpus is given
+ * then the omitted one will be set to its given counterpart's value.
+ * Both maxcpus and cpus may be specified, but maxcpus must be equal to or
+ * greater than cpus.
+ *
+ * In calculation of omitted sockets/cores/threads, we prefer sockets over
+ * cores over threads before 6.2, while preferring cores over sockets over
+ * threads since 6.2.
+ */
+void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp)
+{
+MachineClass *mc = MACHINE_GET_CLASS(ms);
+unsigned cpus= config->has_cpus ? config->cpus : 0;
+unsigned sockets = config->has_sockets ? config->sockets : 0;
+unsigned dies= config->has_dies ? config->dies : 0;
+unsigned cores   = config->has_cores ? config->cores : 0;
+unsigned threads = config->has_threads ? config->threads : 0;
+unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
+
+/*
+ * Specified CPU topology parameters must be greater than ze

[PATCH v6 01/16] docs/about/removed-features: Remove duplicated doc about -smp

2021-08-18 Thread Yanan Wang

There are two places describing the same thing about deprecation
of invalid topologies of -smp CLI, so remove the duplicated one.

Signed-off-by: Yanan Wang 
---
 docs/about/removed-features.rst | 21 -
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
index cbfa1a8e31..f5d6e2ea9c 100644
--- a/docs/about/removed-features.rst
+++ b/docs/about/removed-features.rst
@@ -194,7 +194,7 @@ by the ``tls-authz`` and ``sasl-authz`` options.
 The ``pretty=on|off`` switch has no effect for HMP monitors and
 its use is rejected.
 
-``-drive file=json:{...{'driver':'file'}}`` (removed 6.0)
+``-drive file=json:{...{'driver':'file'}}`` (removed in 6.0)
 '
 
 The 'file' driver for drives is no longer appropriate for character or host
@@ -593,7 +593,7 @@ error when ``-u`` is not used.
 Command line options
 
 
-``-smp`` (invalid topologies) (removed 5.2)
+``-smp`` (invalid topologies) (removed in 5.2)
 '''
 
 CPU topology properties should describe whole machine topology including
@@ -606,7 +606,7 @@ Support for invalid topologies is removed, the user must 
ensure
 topologies described with -smp include all possible cpus, i.e.
 *sockets* * *cores* * *threads* = *maxcpus*.
 
-``-numa`` node (without memory specified) (removed 5.2)
+``-numa`` node (without memory specified) (removed in 5.2)
 '''
 
 Splitting RAM by default between NUMA nodes had the same issues as ``mem``
@@ -647,20 +647,7 @@ as ignored. Currently, users are responsible for making 
sure the backing storage
 specified with ``-mem-path`` can actually provide the guest RAM configured with
 ``-m`` and QEMU fails to start up if RAM allocation is unsuccessful.
 
-``-smp`` (invalid topologies) (removed 5.2)
-'''
-
-CPU topology properties should describe whole machine topology including
-possible CPUs.
-
-However, historically it was possible to start QEMU with an incorrect topology
-where *n* <= *sockets* * *cores* * *threads* < *maxcpus*,
-which could lead to an incorrect topology enumeration by the guest.
-Support for invalid topologies is removed, the user must ensure
-topologies described with -smp include all possible cpus, i.e.
-*sockets* * *cores* * *threads* = *maxcpus*.
-
-``-machine enforce-config-section=on|off`` (removed 5.2)
+``-machine enforce-config-section=on|off`` (removed in 5.2)
 
 
 The ``enforce-config-section`` property was replaced by the
-- 
2.19.1

[PATCH v6 04/16] machine: Uniformly use maxcpus to calculate the omitted parameters

2021-08-18 Thread Yanan Wang

We are currently using maxcpus to calculate the omitted sockets
but using cpus to calculate the omitted cores/threads. This makes
cmdlines like:
  -smp cpus=8,maxcpus=16
  -smp cpus=8,cores=4,maxcpus=16
  -smp cpus=8,threads=2,maxcpus=16
work fine but the ones like:
  -smp cpus=8,sockets=2,maxcpus=16
  -smp cpus=8,sockets=2,cores=4,maxcpus=16
  -smp cpus=8,sockets=2,threads=2,maxcpus=16
break the sanity check.

Since we require for a valid config that the product of "sockets * cores
* threads" should equal to the maxcpus, we should uniformly use maxcpus
to calculate their omitted values.

Also the if-branch of "cpus == 0 || sockets == 0" was split into two
branches of "cpus == 0" and "sockets == 0" so that we can clearly read
that we are parsing the configuration with a preference on cpus over
sockets over cores over threads.

Note: change in this patch won't affect any existing working cmdlines
but improves consistency and allows more incomplete configs to be valid.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
Reviewed-by: Pankaj Gupta 
---
 hw/core/machine.c | 30 +++---
 hw/i386/pc.c  | 30 +++---
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index bcced1e1c4..dc12b5ec4e 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -757,24 +757,26 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 }
 
 /* compute missing values, prefer sockets over cores over threads */
-if (cpus == 0 || sockets == 0) {
+maxcpus = maxcpus > 0 ? maxcpus : cpus;
+
+if (cpus == 0) {
+sockets = sockets > 0 ? sockets : 1;
 cores = cores > 0 ? cores : 1;
 threads = threads > 0 ? threads : 1;
-if (cpus == 0) {
-sockets = sockets > 0 ? sockets : 1;
-cpus = cores * threads * sockets;
-} else {
-maxcpus = maxcpus > 0 ? maxcpus : cpus;
-sockets = maxcpus / (cores * threads);
-}
+cpus = sockets * cores * threads;
+maxcpus = maxcpus > 0 ? maxcpus : cpus;
+} else if (sockets == 0) {
+cores = cores > 0 ? cores : 1;
+threads = threads > 0 ? threads : 1;
+sockets = maxcpus / (cores * threads);
 } else if (cores == 0) {
 threads = threads > 0 ? threads : 1;
-cores = cpus / (sockets * threads);
-cores = cores > 0 ? cores : 1;
+cores = maxcpus / (sockets * threads);
 } else if (threads == 0) {
-threads = cpus / (cores * sockets);
-threads = threads > 0 ? threads : 1;
-} else if (sockets * cores * threads < cpus) {
+threads = maxcpus / (sockets * cores);
+}
+
+if (sockets * cores * threads < cpus) {
 error_setg(errp, "cpu topology: "
"sockets (%u) * cores (%u) * threads (%u) < "
"smp_cpus (%u)",
@@ -782,8 +784,6 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 return;
 }
 
-maxcpus = maxcpus > 0 ? maxcpus : cpus;
-
 if (maxcpus < cpus) {
 error_setg(errp, "maxcpus must be equal to or greater than smp");
 return;
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index acd31af452..a9ff9ef52c 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -725,24 +725,26 @@ static void pc_smp_parse(MachineState *ms, 
SMPConfiguration *config, Error **err
 dies = dies > 0 ? dies : 1;
 
 /* compute missing values, prefer sockets over cores over threads */
-if (cpus == 0 || sockets == 0) {
+maxcpus = maxcpus > 0 ? maxcpus : cpus;
+
+if (cpus == 0) {
+sockets = sockets > 0 ? sockets : 1;
 cores = cores > 0 ? cores : 1;
 threads = threads > 0 ? threads : 1;
-if (cpus == 0) {
-sockets = sockets > 0 ? sockets : 1;
-cpus = cores * threads * dies * sockets;
-} else {
-maxcpus = maxcpus > 0 ? maxcpus : cpus;
-sockets = maxcpus / (dies * cores * threads);
-}
+cpus = sockets * dies * cores * threads;
+maxcpus = maxcpus > 0 ? maxcpus : cpus;
+} else if (sockets == 0) {
+cores = cores > 0 ? cores : 1;
+threads = threads > 0 ? threads : 1;
+sockets = maxcpus / (dies * cores * threads);
 } else if (cores == 0) {
 threads = threads > 0 ? threads : 1;
-cores = cpus / (sockets * dies * threads);
-cores = cores > 0 ? cores : 1;
+cores = maxcpus / (sockets * dies * threads);
 } else if (threads == 0) {
-threads = cpus / (cores * dies * sockets);
-threads = threads > 0 ? threads : 1;
-} else if (sockets * dies * cores * threads < cpus) {
+threads = maxcpus / (sockets * dies * cores);
+}
+
+if (sockets * dies * cores * threads < cpus) {
 error_setg(errp, "cpu topology: "
"sockets (%u) * dies (%u) * cores (%u) * threads (%u) < "

[PATCH v6 02/16] machine: Deprecate "parameter=0" SMP configurations

2021-08-18 Thread Yanan Wang

In the SMP configuration, we should either provide a topology
parameter with a reasonable value (greater than zero) or just
omit it and QEMU will compute the missing value.

The users shouldn't provide a configuration with any parameter
of it specified as zero (e.g. -smp 8,sockets=0) which could
possibly cause unexpected results in the -smp parsing. So we
deprecate this kind of configurations since 6.2 by adding the
explicit sanity check.

Signed-off-by: Yanan Wang 
---
 docs/about/deprecated.rst | 15 +++
 hw/core/machine.c | 14 ++
 qapi/machine.json |  2 +-
 qemu-options.hx   | 12 +++-
 4 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 6d438f1c8d..8dbb027dbb 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -138,6 +138,21 @@ an underscore between "window" and "close").
 The ``-no-quit`` is a synonym for ``-display ...,window-close=off`` which
 should be used instead.
 
+``-smp`` ("parameter=0" SMP configurations) (since 6.2)
+'''
+
+Specified CPU topology parameters must be greater than zero.
+
+In the SMP configuration, users should either provide a CPU topology
+parameter with a reasonable value (greater than zero) or just omit it
+and QEMU will compute the missing value.
+
+However, historically it was implicitly allowed for users to provide
+a parameter with zero value, which is meaningless and could also possibly
+cause unexpected results in the -smp parsing. So support for this kind of
+configurations (e.g. -smp 8,sockets=0) is deprecated since 6.2 and will
+be removed in the near future, users have to ensure that all the topology
+members described with -smp are greater than zero.
 
 QEMU Machine Protocol (QMP) commands
 
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 54e040587d..3b5df9b002 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -832,6 +832,20 @@ static void machine_set_smp(Object *obj, Visitor *v, const 
char *name,
 return;
 }
 
+/*
+ * Specified CPU topology parameters must be greater than zero,
+ * explicit configuration like "cpus=0" is not allowed.
+ */
+if ((config->has_cpus && config->cpus == 0) ||
+(config->has_sockets && config->sockets == 0) ||
+(config->has_dies && config->dies == 0) ||
+(config->has_cores && config->cores == 0) ||
+(config->has_threads && config->threads == 0) ||
+(config->has_maxcpus && config->maxcpus == 0)) {
+warn_report("Invalid CPU topology deprecated: "
+"CPU topology parameters must be greater than zero");
+}
+
 mc->smp_parse(ms, config, errp);
 if (*errp) {
 goto out_free;
diff --git a/qapi/machine.json b/qapi/machine.json
index 157712f006..10a97837d3 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1297,7 +1297,7 @@
 #
 # @dies: number of dies per socket in the CPU topology
 #
-# @cores: number of cores per thread in the CPU topology
+# @cores: number of cores per die in the CPU topology
 #
 # @threads: number of threads per core in the CPU topology
 #
diff --git a/qemu-options.hx b/qemu-options.hx
index 83aa59a920..aee622f577 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -227,11 +227,13 @@ SRST
 of computing the CPU maximum count.
 
 Either the initial CPU count, or at least one of the topology parameters
-must be specified. Values for any omitted parameters will be computed
-from those which are given. Historically preference was given to the
-coarsest topology parameters when computing missing values (ie sockets
-preferred over cores, which were preferred over threads), however, this
-behaviour is considered liable to change.
+must be specified. The specified parameters must be greater than zero,
+explicit configuration like "cpus=0" is not allowed. Values for any
+omitted parameters will be computed from those which are given.
+Historically preference was given to the coarsest topology parameters
+when computing missing values (ie sockets preferred over cores, which
+were preferred over threads), however, this behaviour is considered
+liable to change.
 ERST
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
-- 
2.19.1

[PATCH v6 09/16] machine: Use ms instead of global current_machine in sanity-check

2021-08-18 Thread Yanan Wang

In the sanity-check of smp_cpus and max_cpus against mc in function
machine_set_smp(), we are now using ms->smp.max_cpus for the check
but using current_machine->smp.max_cpus in the error message.
Tweak this by uniformly using the local ms.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
Reviewed-by: Pankaj Gupta 
Reviewed-by: Cornelia Huck 
---
 hw/core/machine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 0df597f99c..1ad5dac3e8 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -881,7 +881,7 @@ static void machine_set_smp(Object *obj, Visitor *v, const 
char *name,
 } else if (ms->smp.max_cpus > mc->max_cpus) {
 error_setg(errp, "Invalid SMP CPUs %d. The max CPUs "
"supported by machine '%s' is %d",
-   current_machine->smp.max_cpus,
+   ms->smp.max_cpus,
mc->name, mc->max_cpus);
 }
 
-- 
2.19.1

[PATCH v6 03/16] machine: Minor refactor/fix for the smp parsers

2021-08-18 Thread Yanan Wang

To pave the way for the functional improvement in later patches,
make some refactor/cleanup for the smp parsers, including using
local maxcpus instead of ms->smp.max_cpus in the calculation,
defaulting dies to 0 initially like other members, cleanup the
sanity check for dies.

We actually also fix a hidden defect by avoiding directly using
the provided *zero value* in the calculation, which could cause
a segment fault (e.g. using dies=0 in the calculation).

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
---
 hw/core/machine.c | 18 ++
 hw/i386/pc.c  | 23 ++-
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 3b5df9b002..bcced1e1c4 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -749,8 +749,9 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 unsigned sockets = config->has_sockets ? config->sockets : 0;
 unsigned cores   = config->has_cores ? config->cores : 0;
 unsigned threads = config->has_threads ? config->threads : 0;
+unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
 
-if (config->has_dies && config->dies != 0 && config->dies != 1) {
+if (config->has_dies && config->dies > 1) {
 error_setg(errp, "dies not supported by this machine's CPU topology");
 return;
 }
@@ -763,8 +764,8 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 sockets = sockets > 0 ? sockets : 1;
 cpus = cores * threads * sockets;
 } else {
-ms->smp.max_cpus = config->has_maxcpus ? config->maxcpus : cpus;
-sockets = ms->smp.max_cpus / (cores * threads);
+maxcpus = maxcpus > 0 ? maxcpus : cpus;
+sockets = maxcpus / (cores * threads);
 }
 } else if (cores == 0) {
 threads = threads > 0 ? threads : 1;
@@ -781,26 +782,27 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 return;
 }
 
-ms->smp.max_cpus = config->has_maxcpus ? config->maxcpus : cpus;
+maxcpus = maxcpus > 0 ? maxcpus : cpus;
 
-if (ms->smp.max_cpus < cpus) {
+if (maxcpus < cpus) {
 error_setg(errp, "maxcpus must be equal to or greater than smp");
 return;
 }
 
-if (sockets * cores * threads != ms->smp.max_cpus) {
+if (sockets * cores * threads != maxcpus) {
 error_setg(errp, "Invalid CPU topology: "
"sockets (%u) * cores (%u) * threads (%u) "
"!= maxcpus (%u)",
sockets, cores, threads,
-   ms->smp.max_cpus);
+   maxcpus);
 return;
 }
 
 ms->smp.cpus = cpus;
+ms->smp.sockets = sockets;
 ms->smp.cores = cores;
 ms->smp.threads = threads;
-ms->smp.sockets = sockets;
+ms->smp.max_cpus = maxcpus;
 }
 
 static void machine_get_smp(Object *obj, Visitor *v, const char *name,
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index c2b9d62a35..acd31af452 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -716,9 +716,13 @@ static void pc_smp_parse(MachineState *ms, 
SMPConfiguration *config, Error **err
 {
 unsigned cpus= config->has_cpus ? config->cpus : 0;
 unsigned sockets = config->has_sockets ? config->sockets : 0;
-unsigned dies= config->has_dies ? config->dies : 1;
+unsigned dies= config->has_dies ? config->dies : 0;
 unsigned cores   = config->has_cores ? config->cores : 0;
 unsigned threads = config->has_threads ? config->threads : 0;
+unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
+
+/* directly default dies to 1 if it's omitted */
+dies = dies > 0 ? dies : 1;
 
 /* compute missing values, prefer sockets over cores over threads */
 if (cpus == 0 || sockets == 0) {
@@ -728,8 +732,8 @@ static void pc_smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **err
 sockets = sockets > 0 ? sockets : 1;
 cpus = cores * threads * dies * sockets;
 } else {
-ms->smp.max_cpus = config->has_maxcpus ? config->maxcpus : cpus;
-sockets = ms->smp.max_cpus / (cores * threads * dies);
+maxcpus = maxcpus > 0 ? maxcpus : cpus;
+sockets = maxcpus / (dies * cores * threads);
 }
 } else if (cores == 0) {
 threads = threads > 0 ? threads : 1;
@@ -746,27 +750,28 @@ static void pc_smp_parse(MachineState *ms, 
SMPConfiguration *config, Error **err
 return;
 }
 
-ms->smp.max_cpus = config->has_maxcpus ? config->maxcpus : cpus;
+maxcpus = maxcpus > 0 ? maxcpus : cpus;
 
-if (ms->smp.max_cpus < cpus) {
+if (maxcpus < cpus) {
 error_setg(errp, "maxcpus must be equal to or greater than smp");
 return;
 }
 
-if (sockets * dies * cores * threads != ms->smp.max_cpus) {
+if (sockets * dies * cores * threads != maxcpus) {
 error_set

[PATCH v6 06/16] machine: Improve the error reporting of smp parsing

2021-08-18 Thread Yanan Wang

We have two requirements for a valid SMP configuration:
the product of "sockets * cores * threads" must represent all the
possible cpus, i.e., max_cpus, and then must include the initially
present cpus, i.e., smp_cpus.

So we only need to ensure 1) "sockets * cores * threads == maxcpus"
at first and then ensure 2) "maxcpus >= cpus". With a reasonable
order of the sanity check, we can simplify the error reporting code.
When reporting an error message we also report the exact value of
each topology member to make users easily see what's going on.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
Reviewed-by: Pankaj Gupta 
---
 hw/core/machine.c | 22 +-
 hw/i386/pc.c  | 24 ++--
 2 files changed, 19 insertions(+), 27 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 85908abc77..093c0d382d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -779,25 +779,21 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 maxcpus = maxcpus > 0 ? maxcpus : sockets * cores * threads;
 cpus = cpus > 0 ? cpus : maxcpus;
 
-if (sockets * cores * threads < cpus) {
-error_setg(errp, "cpu topology: "
-   "sockets (%u) * cores (%u) * threads (%u) < "
-   "smp_cpus (%u)",
-   sockets, cores, threads, cpus);
+if (sockets * cores * threads != maxcpus) {
+error_setg(errp, "Invalid CPU topology: "
+   "product of the hierarchy must match maxcpus: "
+   "sockets (%u) * cores (%u) * threads (%u) "
+   "!= maxcpus (%u)",
+   sockets, cores, threads, maxcpus);
 return;
 }
 
 if (maxcpus < cpus) {
-error_setg(errp, "maxcpus must be equal to or greater than smp");
-return;
-}
-
-if (sockets * cores * threads != maxcpus) {
 error_setg(errp, "Invalid CPU topology: "
+   "maxcpus must be equal to or greater than smp: "
"sockets (%u) * cores (%u) * threads (%u) "
-   "!= maxcpus (%u)",
-   sockets, cores, threads,
-   maxcpus);
+   "== maxcpus (%u) < smp_cpus (%u)",
+   sockets, cores, threads, maxcpus, cpus);
 return;
 }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 9ad7ae5254..fcf6905219 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -747,25 +747,21 @@ static void pc_smp_parse(MachineState *ms, 
SMPConfiguration *config, Error **err
 maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * cores * threads;
 cpus = cpus > 0 ? cpus : maxcpus;
 
-if (sockets * dies * cores * threads < cpus) {
-error_setg(errp, "cpu topology: "
-   "sockets (%u) * dies (%u) * cores (%u) * threads (%u) < "
-   "smp_cpus (%u)",
-   sockets, dies, cores, threads, cpus);
+if (sockets * dies * cores * threads != maxcpus) {
+error_setg(errp, "Invalid CPU topology: "
+   "product of the hierarchy must match maxcpus: "
+   "sockets (%u) * dies (%u) * cores (%u) * threads (%u) "
+   "!= maxcpus (%u)",
+   sockets, dies, cores, threads, maxcpus);
 return;
 }
 
 if (maxcpus < cpus) {
-error_setg(errp, "maxcpus must be equal to or greater than smp");
-return;
-}
-
-if (sockets * dies * cores * threads != maxcpus) {
-error_setg(errp, "Invalid CPU topology deprecated: "
+error_setg(errp, "Invalid CPU topology: "
+   "maxcpus must be equal to or greater than smp: "
"sockets (%u) * dies (%u) * cores (%u) * threads (%u) "
-   "!= maxcpus (%u)",
-   sockets, dies, cores, threads,
-   maxcpus);
+   "== maxcpus (%u) < smp_cpus (%u)",
+   sockets, dies, cores, threads, maxcpus, cpus);
 return;
 }
 
-- 
2.19.1

[PATCH v6 00/16] machine: smp parsing fixes and improvement

2021-08-18 Thread Yanan Wang

Rebased on upstream v6.1.0-rc4 with two more patches added.

This series introduces some fixes and improvement for the SMP parsing.
Behavior of specifying a CPU topology parameter as zero was implicitly
allowed but undocumented before, while now it's explicitly deprecated.
maxcpus is now uniformly used to calculate the omitted topology members.
It's also suggested that we should start to prefer cores over sockets
over threads on the newer machine types, which will make the computed
virtual topology more reflective of the real hardware.

In order to reduce code duplication and ease the code maintenance,
smp_parse() is converted into a generic enough parser for all arches,
so that the arch-specific ones (e.g. pc_smp_parse) can be removed.
It's also convenient to introduce more topology members to the generic
parser in the future.

A unit test for the SMP parsing is added. In the test, all possible
collections of the topology parameters and the corresponding expected
results are listed, including the valid and invalid ones. The preference
of sockets over cores and the preference of cores over sockets, and the
support of dies are also taken into consideration.

---

Changelogs:

v5->v6:
- deprecate "parameter=0" SMP configurations (patch #1 and #2 added)
- rebased on upstream v6.1.0-rc4
- v6: 
https://lore.kernel.org/qemu-devel/20210813023912.105880-1-wangyana...@huawei.com/

v4->v5:
- refactor out the duplicated "threads == 0" case in patch #6 (Pankaj)
- pick up more R-b tags from v4 (thanks very much for the review!)
- v4: 
https://lore.kernel.org/qemu-devel/20210803080527.156556-1-wangyana...@huawei.com/

v3->v4:
- put all the sanity check into the parser
- refine the unit test and add it back to the series
- add the R-b/A-b tags for the reviewed/acked patches
- v3: 
https://lore.kernel.org/qemu-devel/20210728034848.75228-1-wangyana...@huawei.com/

v2->v3:
- apply the calculation improvement to smp_parse and pc_smp_parse
  separately and then convert the finally improved parsers into a
  generic one, so that patches can be reviewed separately.
- to ease review, drop the unit test part for a while until we have
  a good enough generic parser.
- send the patch "machine: Disallow specifying topology parameters as zero"
  for 6.1 separately.
- v2: 
https://lore.kernel.org/qemu-devel/20210719032043.25416-1-wangyana...@huawei.com/

v1->v2:
- disallow "anything=0" in the smp configuration (Andrew)
- make function smp_parse() a generic helper for all arches
- improve the error reporting in the parser
- start to prefer cores over sockets since 6.2 (Daniel)
- add a unit test for the smp parsing (Daniel)
- v1: 
https://lore.kernel.org/qemu-devel/20210702100739.13672-1-wangyana...@huawei.com/

---

Yanan Wang (16):
  docs/about/removed-features: Remove duplicated doc about -smp
  machine: Deprecate "parameter=0" SMP configurations
  machine: Minor refactor/fix for the smp parsers
  machine: Uniformly use maxcpus to calculate the omitted parameters
  machine: Set the value of cpus to match maxcpus if it's omitted
  machine: Improve the error reporting of smp parsing
  hw: Add compat machines for 6.2
  machine: Prefer cores over sockets in smp parsing since 6.2
  machine: Use ms instead of global current_machine in sanity-check
  machine: Tweak the order of topology members in struct CpuTopology
  machine: Make smp_parse generic enough for all arches
  machine: Remove smp_parse callback from MachineClass
  machine: Move smp_prefer_sockets to struct SMPCompatProps
  machine: Put all sanity-check in the generic SMP parser
  machine: Split out the smp parsing code
  tests/unit: Add a unit test for smp parsing

 MAINTAINERS |   2 +
 docs/about/deprecated.rst   |  15 +
 docs/about/removed-features.rst |  21 +-
 hw/arm/virt.c   |  10 +-
 hw/core/machine-smp.c   | 200 
 hw/core/machine.c   |  93 +---
 hw/core/meson.build |   1 +
 hw/i386/pc.c|  66 +--
 hw/i386/pc_piix.c   |  15 +-
 hw/i386/pc_q35.c|  14 +-
 hw/ppc/spapr.c  |  16 +-
 hw/s390x/s390-virtio-ccw.c  |  15 +-
 include/hw/boards.h |  27 +-
 include/hw/i386/pc.h|   3 +
 qapi/machine.json   |   2 +-
 qemu-options.hx |  24 +-
 tests/unit/meson.build  |   1 +
 tests/unit/test-smp-parse.c | 866 
 18 files changed, 1205 insertions(+), 186 deletions(-)
 create mode 100644 hw/core/machine-smp.c
 create mode 100644 tests/unit/test-smp-parse.c

--
2.19.1

[PATCH v6 05/16] machine: Set the value of cpus to match maxcpus if it's omitted

2021-08-18 Thread Yanan Wang

Currently we directly calculate the omitted cpus based on the given
incomplete collection of parameters. This makes some cmdlines like:
  -smp maxcpus=16
  -smp sockets=2,maxcpus=16
  -smp sockets=2,dies=2,maxcpus=16
  -smp sockets=2,cores=4,maxcpus=16
not work. We should probably set the value of cpus to match maxcpus
if it's omitted, which will make above configs start to work.

So the calculation logic of cpus/maxcpus after this patch will be:
When both maxcpus and cpus are omitted, maxcpus will be calculated
from the given parameters and cpus will be set equal to maxcpus.
When only one of maxcpus and cpus is given then the omitted one
will be set to its counterpart's value. Both maxcpus and cpus may
be specified, but maxcpus must be equal to or greater than cpus.

Note: change in this patch won't affect any existing working cmdlines
but allows more incomplete configs to be valid.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
---
 hw/core/machine.c | 29 -
 hw/i386/pc.c  | 29 -
 qemu-options.hx   | 11 ---
 3 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index dc12b5ec4e..85908abc77 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -757,25 +757,28 @@ static void smp_parse(MachineState *ms, SMPConfiguration 
*config, Error **errp)
 }
 
 /* compute missing values, prefer sockets over cores over threads */
-maxcpus = maxcpus > 0 ? maxcpus : cpus;
-
-if (cpus == 0) {
+if (cpus == 0 && maxcpus == 0) {
 sockets = sockets > 0 ? sockets : 1;
 cores = cores > 0 ? cores : 1;
 threads = threads > 0 ? threads : 1;
-cpus = sockets * cores * threads;
+} else {
 maxcpus = maxcpus > 0 ? maxcpus : cpus;
-} else if (sockets == 0) {
-cores = cores > 0 ? cores : 1;
-threads = threads > 0 ? threads : 1;
-sockets = maxcpus / (cores * threads);
-} else if (cores == 0) {
-threads = threads > 0 ? threads : 1;
-cores = maxcpus / (sockets * threads);
-} else if (threads == 0) {
-threads = maxcpus / (sockets * cores);
+
+if (sockets == 0) {
+cores = cores > 0 ? cores : 1;
+threads = threads > 0 ? threads : 1;
+sockets = maxcpus / (cores * threads);
+} else if (cores == 0) {
+threads = threads > 0 ? threads : 1;
+cores = maxcpus / (sockets * threads);
+} else if (threads == 0) {
+threads = maxcpus / (sockets * cores);
+}
 }
 
+maxcpus = maxcpus > 0 ? maxcpus : sockets * cores * threads;
+cpus = cpus > 0 ? cpus : maxcpus;
+
 if (sockets * cores * threads < cpus) {
 error_setg(errp, "cpu topology: "
"sockets (%u) * cores (%u) * threads (%u) < "
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index a9ff9ef52c..9ad7ae5254 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -725,25 +725,28 @@ static void pc_smp_parse(MachineState *ms, 
SMPConfiguration *config, Error **err
 dies = dies > 0 ? dies : 1;
 
 /* compute missing values, prefer sockets over cores over threads */
-maxcpus = maxcpus > 0 ? maxcpus : cpus;
-
-if (cpus == 0) {
+if (cpus == 0 && maxcpus == 0) {
 sockets = sockets > 0 ? sockets : 1;
 cores = cores > 0 ? cores : 1;
 threads = threads > 0 ? threads : 1;
-cpus = sockets * dies * cores * threads;
+} else {
 maxcpus = maxcpus > 0 ? maxcpus : cpus;
-} else if (sockets == 0) {
-cores = cores > 0 ? cores : 1;
-threads = threads > 0 ? threads : 1;
-sockets = maxcpus / (dies * cores * threads);
-} else if (cores == 0) {
-threads = threads > 0 ? threads : 1;
-cores = maxcpus / (sockets * dies * threads);
-} else if (threads == 0) {
-threads = maxcpus / (sockets * dies * cores);
+
+if (sockets == 0) {
+cores = cores > 0 ? cores : 1;
+threads = threads > 0 ? threads : 1;
+sockets = maxcpus / (dies * cores * threads);
+} else if (cores == 0) {
+threads = threads > 0 ? threads : 1;
+cores = maxcpus / (sockets * dies * threads);
+} else if (threads == 0) {
+threads = maxcpus / (sockets * dies * cores);
+}
 }
 
+maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * cores * threads;
+cpus = cpus > 0 ? cpus : maxcpus;
+
 if (sockets * dies * cores * threads < cpus) {
 error_setg(errp, "cpu topology: "
"sockets (%u) * dies (%u) * cores (%u) * threads (%u) < "
diff --git a/qemu-options.hx b/qemu-options.hx
index aee622f577..06f819177e 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -214,9 +214,14 @@ SRST
 Simulate a SMP system with '\ ``n``\ ' CPUs initially present on
 the machine type board. On boards supporting CPU hotplug, the optional
 '\ ``maxcpus``\ ' parame

Re: [PATCH v2 08/21] target/riscv: Move gen_* helpers for RVM

2021-08-18 Thread Bin Meng

On Wed, Aug 18, 2021 at 5:18 AM Richard Henderson
 wrote:
>
> Move these helpers near their use by the trans_*
> functions within insn_trans/trans_rvm.c.inc.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/translate.c| 112 
>  target/riscv/insn_trans/trans_rvm.c.inc | 112 
>  2 files changed, 112 insertions(+), 112 deletions(-)
>

Reviewed-by: Bin Meng

Re: [PATCH v2 09/21] target/riscv: Move gen_* helpers for RVB

2021-08-18 Thread Bin Meng

On Wed, Aug 18, 2021 at 5:21 AM Richard Henderson
 wrote:
>
> Move these helpers near their use by the trans_*
> functions within insn_trans/trans_rvb.c.inc.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/translate.c| 233 ---
>  target/riscv/insn_trans/trans_rvb.c.inc | 234 
>  2 files changed, 234 insertions(+), 233 deletions(-)
>

Reviewed-by: Bin Meng

Re: [PATCH v2 07/21] target/riscv: Use gen_arith for mulh and mulhu

2021-08-18 Thread Bin Meng

On Wed, Aug 18, 2021 at 5:23 AM Richard Henderson
 wrote:
>
> Split out gen_mulh and gen_mulhu and use the common helper.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/insn_trans/trans_rvm.c.inc | 40 +++--
>  1 file changed, 18 insertions(+), 22 deletions(-)
>

Reviewed-by: Bin Meng

Re: [PATCH v2 06/21] target/riscv: Remove gen_arith_div*

2021-08-18 Thread Bin Meng

On Wed, Aug 18, 2021 at 5:20 AM Richard Henderson
 wrote:
>
> Use ctx->w and the enhanced gen_arith function.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/translate.c| 42 -
>  target/riscv/insn_trans/trans_rvm.c.inc | 16 +-
>  2 files changed, 8 insertions(+), 50 deletions(-)
>

Reviewed-by: Bin Meng

Re: [PATCH v2 05/21] target/riscv: Add DisasExtend to gen_arith*

2021-08-18 Thread Bin Meng

On Wed, Aug 18, 2021 at 5:23 AM Richard Henderson
 wrote:
>
> Most arithmetic does not require extending the inputs.
> Exceptions include division, comparison and minmax.
>
> Begin using ctx->w, which allows elimination of gen_addw,
> gen_subw, gen_mulw.
>
> Signed-off-by: Richard Henderson 
> ---
>  target/riscv/translate.c| 69 +++--
>  target/riscv/insn_trans/trans_rvb.c.inc | 30 +--
>  target/riscv/insn_trans/trans_rvi.c.inc | 39 --
>  target/riscv/insn_trans/trans_rvm.c.inc | 16 +++---
>  4 files changed, 64 insertions(+), 90 deletions(-)
>

Reviewed-by: Bin Meng

Re: [PATCH v2 04/21] target/riscv: Introduce DisasExtend and new helpers

2021-08-18 Thread Richard Henderson


On 8/18/21 12:58 AM, Bin Meng wrote:

+TCGv temp[4];


Why is 4? Is it enough? Perhaps a comment here is needed here?


It's a round number that will cover three operands plus an extra for address 
computation.

r~

[PATCH v2] net/colo: check vnet_hdr_support flag when using virtio-net

2021-08-18 Thread Tao Xu

When COLO use only one vnet_hdr_support parameter between
COLO network filter(filter-mirror, filter-redirector or
filter-rewriter and colo-compare, packet will not be parsed
correctly. Acquire network driver related to COLO, if it is
nirtio-net, check vnet_hdr_support flag of COLO network filter
and colo-compare.

Signed-off-by: Tao Xu 
Signed-off-by: Zhang Chen 
---

Changelog:
v2:
 Detect virtio-net driver and apply vnet_hdr_support
 automatically. (Jason)
---
 net/colo-compare.c| 57 +++
 net/colo.c| 20 +++
 net/colo.h|  4 +++
 net/filter-mirror.c   | 21 
 net/filter-rewriter.c | 10 
 qapi/qom.json |  6 +
 qemu-options.hx   |  6 +++--
 7 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b100e7b51f..870bd05a41 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -110,6 +110,7 @@ struct CompareState {
 char *sec_indev;
 char *outdev;
 char *notify_dev;
+char *netdev;
 CharBackend chr_pri_in;
 CharBackend chr_sec_in;
 CharBackend chr_out;
@@ -838,6 +839,28 @@ static int compare_chr_can_read(void *opaque)
 return COMPARE_READ_LEN_MAX;
 }
 
+static int colo_set_default_netdev(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *colo_obj_type, *netdev_from_filter;
+char **netdev = (char **)opaque;
+
+colo_obj_type = qemu_opt_get(opts, "qom-type");
+
+if (colo_obj_type &&
+(strcmp(colo_obj_type, "filter-mirror") == 0 ||
+ strcmp(colo_obj_type, "filter-redirector") == 0 ||
+ strcmp(colo_obj_type, "filter-rewriter") == 0)) {
+netdev_from_filter = qemu_opt_get(opts, "netdev");
+if (*netdev == NULL) {
+*netdev = g_strdup(netdev_from_filter);
+} else if (strcmp(*netdev, netdev_from_filter) != 0) {
+warn_report("%s is using a different netdev from other COLO "
+"component", colo_obj_type);
+}
+}
+return 0;
+}
+
 /*
  * Called from the main thread on the primary for packets
  * arriving over the socket from the primary.
@@ -1050,6 +1073,21 @@ static void compare_set_vnet_hdr(Object *obj,
 s->vnet_hdr = value;
 }
 
+static char *compare_get_netdev(Object *obj, Error **errp)
+{
+CompareState *s = COLO_COMPARE(obj);
+
+return g_strdup(s->netdev);
+}
+
+static void compare_set_netdev(Object *obj, const char *value, Error **errp)
+{
+CompareState *s = COLO_COMPARE(obj);
+
+g_free(s->netdev);
+s->netdev = g_strdup(value);
+}
+
 static char *compare_get_notify_dev(Object *obj, Error **errp)
 {
 CompareState *s = COLO_COMPARE(obj);
@@ -1274,6 +1312,12 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
 max_queue_size = MAX_QUEUE_SIZE;
 }
 
+if (!s->netdev) {
+/* Set default netdev as the first colo netfilter found */
+qemu_opts_foreach(qemu_find_opts("object"),
+  colo_set_default_netdev, &s->netdev, NULL);
+}
+
 if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
 !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
 return;
@@ -1289,6 +1333,16 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
 return;
 }
 
+if (!s->vnet_hdr &&
+qemu_opts_foreach(qemu_find_opts("device"),
+  vnet_driver_check, s->netdev, NULL)) {
+/*
+ * colo compare needs 'vnet_hdr_support' when it works on virtio-net,
+ * add 'vnet_hdr_support' automatically
+ */
+s->vnet_hdr = true;
+}
+
 net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
 net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
 
@@ -1400,6 +1454,9 @@ static void colo_compare_init(Object *obj)
 s->vnet_hdr = false;
 object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
  compare_set_vnet_hdr);
+/* colo compare can't varify that netdev is correct */
+object_property_add_str(obj, "netdev", compare_get_netdev,
+compare_set_netdev);
 }
 
 void colo_compare_cleanup(void)
diff --git a/net/colo.c b/net/colo.c
index 3a3e6e89a0..4a03780f45 100644
--- a/net/colo.c
+++ b/net/colo.c
@@ -243,3 +243,23 @@ bool connection_has_tracked(GHashTable 
*connection_track_table,
 
 return conn ? true : false;
 }
+
+/* check the network driver related to COLO, return 1 if it is virtio-net */
+int vnet_driver_check(void *opaque, QemuOpts *opts, Error **errp)
+{
+const char *driver_type, *netdev_from_driver;
+char *netdev_from_filter = (char *)opaque;
+
+driver_type = qemu_opt_get(opts, "driver");
+netdev_from_driver = qemu_opt_get(opts, "netdev");
+
+if (!driver_type || !netdev_from_driver || !netdev_from_filter) {
+return 0;
+}
+
+if (g_str_has_prefix(driv

Re: [PATCH v2 04/21] target/riscv: Introduce DisasExtend and new helpers

2021-08-18 Thread Richard Henderson


On 8/18/21 12:58 AM, Bin Meng wrote:

+static void gen_set_gpr(DisasContext *ctx, int reg_num, TCGv t)
+{
+if (reg_num != 0) {
+if (ctx->w) {
+tcg_gen_ext32s_tl(cpu_gpr[reg_num], t);


What about zero extension?


All of the RV64 word instructions sign-extend the result.


  void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
  {
-DisasContext ctx;
+DisasContext ctx = { };


Why is this change? I believe we should explicitly initialize the ctx
in riscv_tr_init_disas_context()


I considered it easier to zero-init the whole thing here.

r~

Re: [PATCH v2 2/3] target/riscv: update Zb[abcs] to 1.0.0 (public review) specification

2021-08-18 Thread Richard Henderson


On 8/18/21 10:32 AM, Philipp Tomsich wrote:

The ratification package for Zb[abcs] does not contain all instructions
that have been added to QEmu and don't define misa.B for these: the
individual extensions are now Zba, Zbb, Zbc and Zbs.

Some of the instructions that had previously been added and now need to
be dropped are:
  - shift-one instructions
  - generalized reverse and or-combine
  - w-forms of single-bit instructions
  - w-form of rev8



Do not try to do this all in one patch.  It's too large to review that way.


The following have been adjusted:
  - rori and slli.uw only accept a 6-bit shamt field
(if the bit that is reserved for a future 7-bit shamt for RV128 is
 set, the encoding is illegal on RV64)


The gen_shifti helper should be taking care of testing that the shamt is in range.  You 
really should match the base shift instructions here.



 
-static bool trans_grevi(DisasContext *ctx, arg_grevi *a)

+static void gen_orc_b(TCGv ret, TCGv source1)
 {
-REQUIRE_EXT(ctx, RVB);
-
-if (a->shamt >= TARGET_LONG_BITS) {
-return false;
-}
-
-return gen_grevi(ctx, a);
+TCGv  tmp = tcg_temp_new();
+tcg_gen_andi_tl(tmp, source1, (TARGET_LONG_BITS == 64) ? 
0xLL
+   : 0x);
+tcg_gen_shli_tl(tmp, tmp, 1);
+tcg_gen_or_tl(source1, source1, tmp);
+tcg_gen_andi_tl(tmp, source1, (TARGET_LONG_BITS == 64) ? 
0xLL
+   : 0x);
+tcg_gen_shri_tl(tmp, tmp, 1);
+tcg_gen_or_tl(source1, source1, tmp);
+tcg_gen_andi_tl(tmp, source1, (TARGET_LONG_BITS == 64) ? 
0xLL
+   : 0x);
+tcg_gen_shli_tl(tmp, tmp, 2);
+tcg_gen_or_tl(source1, source1, tmp);
+tcg_gen_andi_tl(tmp, source1, (TARGET_LONG_BITS == 64) ? 
0xLL
+   : 0x);
+tcg_gen_shri_tl(tmp, tmp, 2);
+tcg_gen_or_tl(source1, source1, tmp);
+tcg_gen_andi_tl(tmp, source1, (TARGET_LONG_BITS == 64) ? 
0x0f0f0f0f0f0f0f0fLL
+   : 0x0f0f0f0f);
+tcg_gen_shli_tl(tmp, tmp, 4);
+tcg_gen_or_tl(source1, source1, tmp);
+tcg_gen_andi_tl(tmp, source1, (TARGET_LONG_BITS == 64) ? 
0xf0f0f0f0f0f0f0f0LL
+   : 0xf0f0f0f0);
+tcg_gen_shri_tl(tmp, tmp, 4);
+tcg_gen_or_tl(ret, source1, tmp);
 }


You can use the simpler algorithm from
 https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord

  /* Set msb in each byte if the byte was zero. */
  tcg_gen_subi_tl(tmp, src1, dup_const(MO_8, 0x01));
  tcg_gen_andc_tl(tmp, tmp, src1);
  tcg_gen_andi_tl(tmp, tmp, dup_const(MO_8, 0x80));
  /* Replicate the msb of each byte across the byte. */
  tcg_gen_shri_tl(tmp, tmp, 7);
  tcg_gen_muli_tl(dest, tmp, 0xff);




+static void gen_clmulx(DisasContext *ctx, arg_r *a, bool reverse)
+{
+TCGv source1 = tcg_temp_new();
+TCGv source2 = tcg_temp_new();
+TCGv zeroreg = tcg_const_tl(0);
+TCGv t0 = tcg_temp_new();
+TCGv t1 = tcg_temp_new();
+TCGv result = tcg_temp_new();
+
+gen_get_gpr(source1, a->rs1);
+gen_get_gpr(source2, a->rs2);
+tcg_gen_movi_tl(result, 0);
+
+for (int i = 0; i < TARGET_LONG_BITS; i++) {
+tcg_gen_shri_tl(t0, source2, i);
+if (reverse) {
+tcg_gen_shri_tl(t1, source1, TARGET_LONG_BITS - i - 1);
+} else {
+tcg_gen_shli_tl(t1, source1, i);
+}
+tcg_gen_andi_tl(t0, t0, 1);
+tcg_gen_xor_tl(t1, result, t1);
+tcg_gen_movcond_tl(TCG_COND_NE, result, t0, zeroreg, t1, result);
+}
+
+gen_set_gpr(a->rd, result);
+tcg_temp_free(source1);
+tcg_temp_free(source2);
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+tcg_temp_free(zeroreg);
+tcg_temp_free(result);
+}


This inline is way too large -- up to 384 instructions.
Use a couple of out-of-line helpers.


r~

Re: [PATCH v2] hw/intc/sifive_clint: Fix overflow in sifive_clint_write_timecmp()

2021-08-18 Thread Alistair Francis

On Tue, Aug 17, 2021 at 6:00 PM Bin Meng  wrote:
>
> On Tue, Aug 17, 2021 at 2:38 AM David Hoppenbrouwers  
> wrote:
> >
> > `next` is an `uint64_t` value, but `timer_mod` takes an `int64_t`. This
> > resulted in high values such as `UINT64_MAX` being converted to `-1`,
> > which caused an immediate timer interrupt.
> >
> > By limiting `next` to `INT64_MAX` no overflow will happen while the
> > timer will still be effectively set to "infinitely" far in the future.
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/493
> > Signed-off-by: David Hoppenbrouwers 
> > ---
> > I wrongly used `MAX` instead of `MIN`. I've amended the patch.
> >
> >  hw/intc/sifive_clint.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
>
> Reviewed-by: Bin Meng 

Thanks!

Applied to riscv-to-apply.next

Alistair

>

Re: [PATCH v2 1/3] target/riscv: Add x-zba, x-zbb, x-zbc and x-zbs properties

2021-08-18 Thread Philipp Tomsich

I will provide a v3 to restore bisectability,

On Thu, 19 Aug 2021 at 00:39, Richard Henderson <
richard.hender...@linaro.org> wrote:

> On 8/18/21 10:32 AM, Philipp Tomsich wrote:
> > +++ b/target/riscv/cpu.h
> > @@ -67,7 +67,6 @@
> >   #define RVS RV('S')
> >   #define RVU RV('U')
> >   #define RVH RV('H')
> > -#define RVB RV('B')
>
> This patch does not compile by itself, because RVB is still used in
> insn_trans/trans_rvb.c.inc.
>
>
> r~
>

Re: [PATCH v2 1/3] target/riscv: Add x-zba, x-zbb, x-zbc and x-zbs properties

2021-08-18 Thread Richard Henderson


On 8/18/21 10:32 AM, Philipp Tomsich wrote:

+++ b/target/riscv/cpu.h
@@ -67,7 +67,6 @@
  #define RVS RV('S')
  #define RVU RV('U')
  #define RVH RV('H')
-#define RVB RV('B')


This patch does not compile by itself, because RVB is still used in 
insn_trans/trans_rvb.c.inc.



r~

Re: [PATCH v2 2/5] target/mips: Replace GET_LMASK() macro by get_lmask(32) function

2021-08-18 Thread Richard Henderson


On 8/18/21 11:55 AM, Philippe Mathieu-Daudé wrote:

The target endianess information is stored in the BigEndian
bit of the Config0 register in CP0.

Replace the GET_LMASK() macro by an inlined get_lmask() function,
passing CPUMIPSState and the word size as argument.

We can remove one use of the TARGET_WORDS_BIGENDIAN definition.

Signed-off-by: Philippe Mathieu-Daudé
---
  target/mips/tcg/ldst_helper.c | 32 +---
  1 file changed, 21 insertions(+), 11 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v2 3/5] target/mips: Replace GET_LMASK64() macro by get_lmask(64) function

2021-08-18 Thread Richard Henderson


On 8/18/21 11:55 AM, Philippe Mathieu-Daudé wrote:

The target endianess information is stored in the BigEndian
bit of the Config0 register in CP0.

Replace the GET_LMASK() macro by an inlined get_lmask() function,
passing CPUMIPSState and the word size as argument.

We can remove another use of the TARGET_WORDS_BIGENDIAN definition.

Signed-off-by: Philippe Mathieu-Daudé
---
  target/mips/tcg/ldst_helper.c | 35 ---
  1 file changed, 16 insertions(+), 19 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v2 1/5] target/mips: Call cpu_is_bigendian & inline GET_OFFSET in ld/st helpers

2021-08-18 Thread Richard Henderson


On 8/18/21 11:55 AM, Philippe Mathieu-Daudé wrote:

The target endianess information is stored in the BigEndian
bit of the Config0 register in CP0.

As a first step, inline the GET_OFFSET() macro, calling
cpu_is_bigendian() to get the 'direction' of the offset.

Signed-off-by: Philippe Mathieu-Daudé
---
  target/mips/tcg/ldst_helper.c | 55 +--
  1 file changed, 33 insertions(+), 22 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 1/5] target/mips: Replace GET_OFFSET() macro by get_offset() function

2021-08-18 Thread Richard Henderson


On 8/18/21 11:31 AM, Philippe Mathieu-Daudé wrote:

   I think you should drop
get_offset() entirely and replace it with

     int dir = cpu_is_bigendian(env) ? 1 : -1;

     stb(env, arg2 + 1 * dir, data);

     stb(env, arg2 + 2 * dir, data);

Alternately, bite the bullet and split the function(s) into two,
explicitly endian versions: helper_swl_be, helper_swl_le, etc.


I'll go for the easier path ;)


It's not really more difficult.

static inline void do_swl(env, uint32_t val, target_ulong addr, int midx,
  int dir, unsigned lmask, uintptr_t ra)
{
cpu_stb_mmuidx_ra(env, addr, val >> 24, midx, ra);

if (lmask <= 2) {
cpu_stb_mmuidx_ra(env, addr + 1 * dir, val >> 16, midx, ra);
}
if (lmask <= 1) {
cpu_stb_mmuidx_ra(env, addr + 1 * dir, val >> 8, midx, ra);
}
if (lmask == 0) {
cpu_stb_mmuidx_ra(env, addr + 1 * dir, val, midx, ra);
}
}

void helper_swl_be(env, val, addr, midx)
{
do_swl(env, val, addr, midx, 1, addr & 3, GETPC());
}

void helper_swl_le(env, val, addr, midx)
{
do_swl(env, val, addr, midx, -1, ~addr & 3, GETPC());
}

Although I do wonder if this is strictly correct with respect to atomicity.  In my 
tcg/mips unaligned patch set, I assumed that lwl+lwr of an aligned address produces two 
atomic 32-bit loads, which result in a complete atomic load at the end.


Should we be doing something like

void helper_swl_be(env, val, addr, midx)
{
uintptr_t ra = GETPC();

switch (addr & 3) {
case 0:
cpu_stl_be_mmuidx_ra(env, val, addr, midx, ra);
break;
case 1:
cpu_stb_mmuidx_ra(env, val >> 24, addr, midx, ra);
cpu_stw_be_mmuidx_ra(env, val >> 16, addr + 1, midx, ra);
break;
case 2:
cpu_stw_be_mmuidx_ra(env, val >> 16, addr, midx, ra);
break;
case 3:
cpu_stb_mmuidx_ra(env, val >> 24, addr, midx, ra);
break;
}
}

void helper_swl_le(env, val, addr, midx)
{
uintptr_t ra = GETPC();

/*
 * We want to use stw and stl for atomicity, but want any
 * fault to report ADDR, not the aligned address.
 */
probe_write(env, addr, 0, midx, ra);

switch (addr & 3) {
case 3:
cpu_stl_le_mmuidx_ra(env, val, addr - 3, midx, ra);
break;
case 1:
cpu_stw_le_mmuidx_ra(env, val >> 16, addr - 1, midx, ra);
break;
case 2:
cpu_stw_le_mmuidx_ra(env, val >> 8, addr - 2, midx, ra);
/* fall through */
case 0:
cpu_stb_mmuidx_ra(env, val >> 24, addr, midx, ra);
break;
}
}

etc.


r~

Re: [PATCH v3 09/16] tcg/mips: Drop special alignment for code_gen_buffer

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 10:19 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  tcg/region.c | 91 
>  1 file changed, 91 deletions(-)

Yay!

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 08/16] tcg/mips: Unset TCG_TARGET_HAS_direct_jump

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 10:19 PM, Richard Henderson wrote:
> Only use indirect jumps.  Finish weaning away from the
> unique alignment requirements for code_gen_buffer.
> 
> Signed-off-by: Richard Henderson 
> ---
>  tcg/mips/tcg-target.h | 12 +---
>  tcg/mips/tcg-target.c.inc | 23 +--
>  2 files changed, 10 insertions(+), 25 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 05/16] tcg/mips: Move TCG_GUEST_BASE_REG to S7

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 10:19 PM, Richard Henderson wrote:
> No functional change; just moving the saved reserved regs to the end.
> 
> Signed-off-by: Richard Henderson 
> ---
>  tcg/mips/tcg-target.c.inc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
> index 92bde50704..b3a2cc88ab 100644
> --- a/tcg/mips/tcg-target.c.inc
> +++ b/tcg/mips/tcg-target.c.inc
> @@ -86,7 +86,7 @@ static const char * const 
> tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
>  #define TCG_TMP3  TCG_REG_T7
>  
>  #ifndef CONFIG_SOFTMMU
> -#define TCG_GUEST_BASE_REG TCG_REG_S1
> +#define TCG_GUEST_BASE_REG TCG_REG_S7
>  #endif

Maybe add a comment in tcg_target_callee_save_regs[]?

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 04/16] tcg/mips: Move TCG_AREG0 to S8

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 10:19 PM, Richard Henderson wrote:
> No functional change; just moving the saved reserved regs to the end.
> 
> Signed-off-by: Richard Henderson 
> ---
>  tcg/mips/tcg-target.h | 2 +-
>  tcg/mips/tcg-target.c.inc | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup

2021-08-18 Thread Philippe Mathieu-Daudé

Sorry, use Huacai's newer email .

On Thu, Aug 19, 2021 at 12:07 AM Philippe Mathieu-Daudé  wrote:
>
> Cc'ing Jiaxun & Huacai.
>
> On 8/18/21 10:19 PM, Richard Henderson wrote:
> > Based-on: <20210818191920.390759-1-richard.hender...@linaro.org>
> > ("[PATCH v3 00/66] Unaligned access for user-only")
> >
> > Important points:
> >   * Support unaligned accesses.
> >   * Drop requirement for 256MB alignment of code_gen_buffer.
> >   * Improvements to tcg_out_movi:
> > - Have a tb-relative register for mips64, reducing the
> >   code size for most pointers,
> > - Try a few 3-insn sequences,
> > - Drop everything else into a constant pool.
> >
> >
> > r~
> >
> >
> > Richard Henderson (16):
> >   tcg/mips: Support unaligned access for user-only
> >   tcg/mips: Support unaligned access for softmmu
> >   tcg/mips: Drop inline markers
> >   tcg/mips: Move TCG_AREG0 to S8
> >   tcg/mips: Move TCG_GUEST_BASE_REG to S7
> >   tcg/mips: Unify TCG_GUEST_BASE_REG tests
> >   tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
> >   tcg/mips: Unset TCG_TARGET_HAS_direct_jump
> >   tcg/mips: Drop special alignment for code_gen_buffer
> >   tcg/mips: Create and use TCG_REG_TB
> >   tcg/mips: Split out tcg_out_movi_one
> >   tcg/mips: Split out tcg_out_movi_two
> >   tcg/mips: Use the constant pool for 64-bit constants
> >   tcg/mips: Aggressively use the constant pool for n64 calls
> >   tcg/mips: Try tb-relative addresses in tcg_out_movi
> >   tcg/mips: Try three insns with shift and add in tcg_out_movi
> >
> >  tcg/mips/tcg-target.h |  17 +-
> >  tcg/region.c  |  91 -
> >  tcg/mips/tcg-target.c.inc | 730 +++---
> >  3 files changed, 604 insertions(+), 234 deletions(-)
> >
>

Re: [PATCH v3 00/16] tcg/mips: Unaligned access and other cleanup

2021-08-18 Thread Philippe Mathieu-Daudé

Cc'ing Jiaxun & Huacai.

On 8/18/21 10:19 PM, Richard Henderson wrote:
> Based-on: <20210818191920.390759-1-richard.hender...@linaro.org>
> ("[PATCH v3 00/66] Unaligned access for user-only")
> 
> Important points:
>   * Support unaligned accesses.
>   * Drop requirement for 256MB alignment of code_gen_buffer.
>   * Improvements to tcg_out_movi:
> - Have a tb-relative register for mips64, reducing the
>   code size for most pointers,
> - Try a few 3-insn sequences,
> - Drop everything else into a constant pool.
> 
> 
> r~
> 
> 
> Richard Henderson (16):
>   tcg/mips: Support unaligned access for user-only
>   tcg/mips: Support unaligned access for softmmu
>   tcg/mips: Drop inline markers
>   tcg/mips: Move TCG_AREG0 to S8
>   tcg/mips: Move TCG_GUEST_BASE_REG to S7
>   tcg/mips: Unify TCG_GUEST_BASE_REG tests
>   tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
>   tcg/mips: Unset TCG_TARGET_HAS_direct_jump
>   tcg/mips: Drop special alignment for code_gen_buffer
>   tcg/mips: Create and use TCG_REG_TB
>   tcg/mips: Split out tcg_out_movi_one
>   tcg/mips: Split out tcg_out_movi_two
>   tcg/mips: Use the constant pool for 64-bit constants
>   tcg/mips: Aggressively use the constant pool for n64 calls
>   tcg/mips: Try tb-relative addresses in tcg_out_movi
>   tcg/mips: Try three insns with shift and add in tcg_out_movi
> 
>  tcg/mips/tcg-target.h |  17 +-
>  tcg/region.c  |  91 -
>  tcg/mips/tcg-target.c.inc | 730 +++---
>  3 files changed, 604 insertions(+), 234 deletions(-)
>

Re: [PATCH v3 12/14] tcg/arm: More use of the TCGReg enum

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 11:29 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.c.inc | 65 +---
>  1 file changed, 35 insertions(+), 30 deletions(-)

I like it :)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 11/14] tcg/arm: More use of the ARMInsn enum

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 11:29 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.c.inc | 20 ++--
>  1 file changed, 10 insertions(+), 10 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 10/14] tcg/arm: Give enum arm_cond_code_e a typedef and use it

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 11:29 PM, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.c.inc | 136 +++
>  1 file changed, 68 insertions(+), 68 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 09/14] tcg/arm: Drop inline markers

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 11:29 PM, Richard Henderson wrote:
> Let the compiler decide about inlining.
> Remove tcg_out_nop as unused.
> 
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.c.inc | 234 +++
>  1 file changed, 114 insertions(+), 120 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 02/14] tcg/arm: Standardize on tcg_out__{reg,imm}

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 11:29 PM, Richard Henderson wrote:
> Some of the functions specified _reg, some _imm, and some
> left it blank.  Make it clearer to which we are referring.
> 
> Split tcg_out_b_reg from tcg_out_bx_reg, to indicate when
> we do not actually require BX semantics.
> 
> Signed-off-by: Richard Henderson 
> ---
>  tcg/arm/tcg-target.c.inc | 38 ++
>  1 file changed, 22 insertions(+), 16 deletions(-)

Appreciated cleanup :)

Reviewed-by: Philippe Mathieu-Daudé

[PATCH v2 4/5] target/mips: Store CP0_Config0 in DisasContext

2021-08-18 Thread Philippe Mathieu-Daudé

Most TCG helpers only have access to a DisasContext pointer,
not CPUMIPSState. Store a copy of CPUMIPSState::CP0_Config0
in DisasContext so we can access it from TCG helpers.

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20210818164321.2474534-5-f4...@amsat.org>
---
 target/mips/tcg/translate.h | 1 +
 target/mips/tcg/translate.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h
index bb0a6b8d74f..9d325c836aa 100644
--- a/target/mips/tcg/translate.h
+++ b/target/mips/tcg/translate.h
@@ -18,6 +18,7 @@ typedef struct DisasContext {
 target_ulong page_start;
 uint32_t opcode;
 uint64_t insn_flags;
+int32_t CP0_Config0;
 int32_t CP0_Config1;
 int32_t CP0_Config2;
 int32_t CP0_Config3;
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index a58d50e40e2..572104e2cc2 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -16034,6 +16034,7 @@ static void mips_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 ctx->page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
 ctx->saved_pc = -1;
 ctx->insn_flags = env->insn_flags;
+ctx->CP0_Config0 = env->CP0_Config0;
 ctx->CP0_Config1 = env->CP0_Config1;
 ctx->CP0_Config2 = env->CP0_Config2;
 ctx->CP0_Config3 = env->CP0_Config3;
-- 
2.31.1

[PATCH v2 5/5] target/mips: Replace TARGET_WORDS_BIGENDIAN by cpu_is_bigendian()

2021-08-18 Thread Philippe Mathieu-Daudé

Add the inlined cpu_is_bigendian() function in "translate.h".

Replace the TARGET_WORDS_BIGENDIAN #ifdef'ry by calls to
cpu_is_bigendian().

Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
Message-Id: <20210818164321.2474534-6-f4...@amsat.org>
---
 target/mips/tcg/translate.h  |  5 ++
 target/mips/tcg/translate.c  | 70 
 target/mips/tcg/nanomips_translate.c.inc | 20 +++
 3 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/target/mips/tcg/translate.h b/target/mips/tcg/translate.h
index 9d325c836aa..dfb1552c2fc 100644
--- a/target/mips/tcg/translate.h
+++ b/target/mips/tcg/translate.h
@@ -212,4 +212,9 @@ bool decode_ext_vr54xx(DisasContext *ctx, uint32_t insn);
 static bool trans_##NAME(DisasContext *ctx, arg_##NAME *a) \
 { return FUNC(ctx, a, __VA_ARGS__); }
 
+static inline bool cpu_is_bigendian(DisasContext *ctx)
+{
+return extract32(ctx->CP0_Config0, CP0C0_BE, 1);
+}
+
 #endif
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index 572104e2cc2..f182e64643d 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -2093,9 +2093,9 @@ static void gen_ld(DisasContext *ctx, uint32_t opc,
  */
 tcg_gen_qemu_ld_tl(t1, t0, mem_idx, MO_UB);
 tcg_gen_andi_tl(t1, t0, 7);
-#ifndef TARGET_WORDS_BIGENDIAN
-tcg_gen_xori_tl(t1, t1, 7);
-#endif
+if (!cpu_is_bigendian(ctx)) {
+tcg_gen_xori_tl(t1, t1, 7);
+}
 tcg_gen_shli_tl(t1, t1, 3);
 tcg_gen_andi_tl(t0, t0, ~7);
 tcg_gen_qemu_ld_tl(t0, t0, mem_idx, MO_TEQ);
@@ -2117,9 +2117,9 @@ static void gen_ld(DisasContext *ctx, uint32_t opc,
  */
 tcg_gen_qemu_ld_tl(t1, t0, mem_idx, MO_UB);
 tcg_gen_andi_tl(t1, t0, 7);
-#ifdef TARGET_WORDS_BIGENDIAN
-tcg_gen_xori_tl(t1, t1, 7);
-#endif
+if (cpu_is_bigendian(ctx)) {
+tcg_gen_xori_tl(t1, t1, 7);
+}
 tcg_gen_shli_tl(t1, t1, 3);
 tcg_gen_andi_tl(t0, t0, ~7);
 tcg_gen_qemu_ld_tl(t0, t0, mem_idx, MO_TEQ);
@@ -2198,9 +2198,9 @@ static void gen_ld(DisasContext *ctx, uint32_t opc,
  */
 tcg_gen_qemu_ld_tl(t1, t0, mem_idx, MO_UB);
 tcg_gen_andi_tl(t1, t0, 3);
-#ifndef TARGET_WORDS_BIGENDIAN
-tcg_gen_xori_tl(t1, t1, 3);
-#endif
+if (!cpu_is_bigendian(ctx)) {
+tcg_gen_xori_tl(t1, t1, 3);
+}
 tcg_gen_shli_tl(t1, t1, 3);
 tcg_gen_andi_tl(t0, t0, ~3);
 tcg_gen_qemu_ld_tl(t0, t0, mem_idx, MO_TEUL);
@@ -2226,9 +2226,9 @@ static void gen_ld(DisasContext *ctx, uint32_t opc,
  */
 tcg_gen_qemu_ld_tl(t1, t0, mem_idx, MO_UB);
 tcg_gen_andi_tl(t1, t0, 3);
-#ifdef TARGET_WORDS_BIGENDIAN
-tcg_gen_xori_tl(t1, t1, 3);
-#endif
+if (cpu_is_bigendian(ctx)) {
+tcg_gen_xori_tl(t1, t1, 3);
+}
 tcg_gen_shli_tl(t1, t1, 3);
 tcg_gen_andi_tl(t0, t0, ~3);
 tcg_gen_qemu_ld_tl(t0, t0, mem_idx, MO_TEUL);
@@ -4445,9 +4445,9 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
 t1 = tcg_temp_new();
 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
 tcg_gen_andi_tl(t1, t0, 3);
-#ifndef TARGET_WORDS_BIGENDIAN
-tcg_gen_xori_tl(t1, t1, 3);
-#endif
+if (!cpu_is_bigendian(ctx)) {
+tcg_gen_xori_tl(t1, t1, 3);
+}
 tcg_gen_shli_tl(t1, t1, 3);
 tcg_gen_andi_tl(t0, t0, ~3);
 tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
@@ -4475,9 +4475,9 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
 t1 = tcg_temp_new();
 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
 tcg_gen_andi_tl(t1, t0, 3);
-#ifdef TARGET_WORDS_BIGENDIAN
-tcg_gen_xori_tl(t1, t1, 3);
-#endif
+if (cpu_is_bigendian(ctx)) {
+tcg_gen_xori_tl(t1, t1, 3);
+}
 tcg_gen_shli_tl(t1, t1, 3);
 tcg_gen_andi_tl(t0, t0, ~3);
 tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEUL);
@@ -4507,9 +4507,9 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
 t1 = tcg_temp_new();
 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
 tcg_gen_andi_tl(t1, t0, 7);
-#ifndef TARGET_WORDS_BIGENDIAN
-tcg_gen_xori_tl(t1, t1, 7);
-#endif
+if (!cpu_is_bigendian(ctx)) {
+tcg_gen_xori_tl(t1, t1, 7);
+}
 tcg_gen_shli_tl(t1, t1, 3);
 tcg_gen_andi_tl(t0, t0, ~7);
 tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_TEQ);
@@ -4529,9 +4529,9 @@ static void gen_loongson_lswc2(DisasContext *ctx, int rt,
 t1 = tcg_temp_new();
 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
 tcg_gen_andi_tl(t1, t0, 7);
-#ifdef TARGET_WORDS_BIGENDIAN
-tcg_gen_xori_tl(t1, t1, 7);
-#end

[PATCH v2 3/5] target/mips: Replace GET_LMASK64() macro by get_lmask(64) function

2021-08-18 Thread Philippe Mathieu-Daudé

The target endianess information is stored in the BigEndian
bit of the Config0 register in CP0.

Replace the GET_LMASK() macro by an inlined get_lmask() function,
passing CPUMIPSState and the word size as argument.

We can remove another use of the TARGET_WORDS_BIGENDIAN definition.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/tcg/ldst_helper.c | 35 ---
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/target/mips/tcg/ldst_helper.c b/target/mips/tcg/ldst_helper.c
index c48a2818681..139f4d833bd 100644
--- a/target/mips/tcg/ldst_helper.c
+++ b/target/mips/tcg/ldst_helper.c
@@ -124,50 +124,46 @@ void helper_swr(CPUMIPSState *env, target_ulong arg1, 
target_ulong arg2,
  * "half" load and stores.  We must do the memory access inline,
  * or fault handling won't work.
  */
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK64(v) ((v) & 7)
-#else
-#define GET_LMASK64(v) (((v) & 7) ^ 7)
-#endif
 
 void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
 int mem_idx)
 {
+target_ulong lmask = get_lmask(env, arg2, 64);
 int dir = cpu_is_bigendian(env) ? 1 : -1;
 
 cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 56), mem_idx, GETPC());
 
-if (GET_LMASK64(arg2) <= 6) {
+if (lmask <= 6) {
 cpu_stb_mmuidx_ra(env, arg2 + 1 * dir, (uint8_t)(arg1 >> 48),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) <= 5) {
+if (lmask <= 5) {
 cpu_stb_mmuidx_ra(env, arg2 + 2 * dir, (uint8_t)(arg1 >> 40),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) <= 4) {
+if (lmask <= 4) {
 cpu_stb_mmuidx_ra(env, arg2 + 3 * dir, (uint8_t)(arg1 >> 32),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) <= 3) {
+if (lmask <= 3) {
 cpu_stb_mmuidx_ra(env, arg2 + 4 * dir, (uint8_t)(arg1 >> 24),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) <= 2) {
+if (lmask <= 2) {
 cpu_stb_mmuidx_ra(env, arg2 + 5 * dir, (uint8_t)(arg1 >> 16),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) <= 1) {
+if (lmask <= 1) {
 cpu_stb_mmuidx_ra(env, arg2 + 6 * dir, (uint8_t)(arg1 >> 8),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) <= 0) {
+if (lmask <= 0) {
 cpu_stb_mmuidx_ra(env, arg2 + 7 * dir, (uint8_t)arg1,
   mem_idx, GETPC());
 }
@@ -176,41 +172,42 @@ void helper_sdl(CPUMIPSState *env, target_ulong arg1, 
target_ulong arg2,
 void helper_sdr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
 int mem_idx)
 {
+target_ulong lmask = get_lmask(env, arg2, 64);
 int dir = cpu_is_bigendian(env) ? 1 : -1;
 
 cpu_stb_mmuidx_ra(env, arg2, (uint8_t)arg1, mem_idx, GETPC());
 
-if (GET_LMASK64(arg2) >= 1) {
+if (lmask >= 1) {
 cpu_stb_mmuidx_ra(env, arg2 - 1 * dir, (uint8_t)(arg1 >> 8),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) >= 2) {
+if (lmask >= 2) {
 cpu_stb_mmuidx_ra(env, arg2 - 2 * dir, (uint8_t)(arg1 >> 16),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) >= 3) {
+if (lmask >= 3) {
 cpu_stb_mmuidx_ra(env, arg2 - 3 * dir, (uint8_t)(arg1 >> 24),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) >= 4) {
+if (lmask >= 4) {
 cpu_stb_mmuidx_ra(env, arg2 - 4 * dir, (uint8_t)(arg1 >> 32),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) >= 5) {
+if (lmask >= 5) {
 cpu_stb_mmuidx_ra(env, arg2 - 5 * dir, (uint8_t)(arg1 >> 40),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) >= 6) {
+if (lmask >= 6) {
 cpu_stb_mmuidx_ra(env, arg2 - 6 * dir, (uint8_t)(arg1 >> 48),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK64(arg2) == 7) {
+if (lmask == 7) {
 cpu_stb_mmuidx_ra(env, arg2 - 7 * dir, (uint8_t)(arg1 >> 56),
   mem_idx, GETPC());
 }
-- 
2.31.1

[PATCH v2 2/5] target/mips: Replace GET_LMASK() macro by get_lmask(32) function

2021-08-18 Thread Philippe Mathieu-Daudé

The target endianess information is stored in the BigEndian
bit of the Config0 register in CP0.

Replace the GET_LMASK() macro by an inlined get_lmask() function,
passing CPUMIPSState and the word size as argument.

We can remove one use of the TARGET_WORDS_BIGENDIAN definition.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/tcg/ldst_helper.c | 32 +---
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/target/mips/tcg/ldst_helper.c b/target/mips/tcg/ldst_helper.c
index 8d1dfea6766..c48a2818681 100644
--- a/target/mips/tcg/ldst_helper.c
+++ b/target/mips/tcg/ldst_helper.c
@@ -57,30 +57,39 @@ static inline bool cpu_is_bigendian(CPUMIPSState *env)
 return extract32(env->CP0_Config0, CP0C0_BE, 1);
 }
 
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK(v) ((v) & 3)
-#else
-#define GET_LMASK(v) (((v) & 3) ^ 3)
-#endif
+static inline target_ulong get_lmask(CPUMIPSState *env,
+ target_ulong value, unsigned bits)
+{
+unsigned mask = (bits / BITS_PER_BYTE) - 1;
+
+value &= mask;
+
+if (cpu_is_bigendian(env)) {
+value ^= mask;
+}
+
+return value;
+}
 
 void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
 int mem_idx)
 {
+target_ulong lmask = get_lmask(env, arg2, 32);
 int dir = cpu_is_bigendian(env) ? 1 : -1;
 
 cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 24), mem_idx, GETPC());
 
-if (GET_LMASK(arg2) <= 2) {
+if (lmask <= 2) {
 cpu_stb_mmuidx_ra(env, arg2 + 1 * dir, (uint8_t)(arg1 >> 16),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK(arg2) <= 1) {
+if (lmask <= 1) {
 cpu_stb_mmuidx_ra(env, arg2 + 2 * dir, (uint8_t)(arg1 >> 8),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK(arg2) == 0) {
+if (lmask == 0) {
 cpu_stb_mmuidx_ra(env, arg2 + 3 * dir, (uint8_t)arg1,
   mem_idx, GETPC());
 }
@@ -89,21 +98,22 @@ void helper_swl(CPUMIPSState *env, target_ulong arg1, 
target_ulong arg2,
 void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
 int mem_idx)
 {
+target_ulong lmask = get_lmask(env, arg2, 32);
 int dir = cpu_is_bigendian(env) ? 1 : -1;
 
 cpu_stb_mmuidx_ra(env, arg2, (uint8_t)arg1, mem_idx, GETPC());
 
-if (GET_LMASK(arg2) >= 1) {
+if (lmask >= 1) {
 cpu_stb_mmuidx_ra(env, arg2 - 1 * dir, (uint8_t)(arg1 >> 8),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK(arg2) >= 2) {
+if (lmask >= 2) {
 cpu_stb_mmuidx_ra(env, arg2 - 2 * dir, (uint8_t)(arg1 >> 16),
   mem_idx, GETPC());
 }
 
-if (GET_LMASK(arg2) == 3) {
+if (lmask == 3) {
 cpu_stb_mmuidx_ra(env, arg2 - 3 * dir, (uint8_t)(arg1 >> 24),
   mem_idx, GETPC());
 }
-- 
2.31.1

[PATCH v2 1/5] target/mips: Call cpu_is_bigendian & inline GET_OFFSET in ld/st helpers

2021-08-18 Thread Philippe Mathieu-Daudé

The target endianess information is stored in the BigEndian
bit of the Config0 register in CP0.

As a first step, inline the GET_OFFSET() macro, calling
cpu_is_bigendian() to get the 'direction' of the offset.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/mips/tcg/ldst_helper.c | 55 +--
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/target/mips/tcg/ldst_helper.c b/target/mips/tcg/ldst_helper.c
index d42812b8a6a..8d1dfea6766 100644
--- a/target/mips/tcg/ldst_helper.c
+++ b/target/mips/tcg/ldst_helper.c
@@ -52,31 +52,36 @@ HELPER_LD_ATOMIC(lld, ldq, 0x7, (target_ulong))
 
 #endif /* !CONFIG_USER_ONLY */
 
+static inline bool cpu_is_bigendian(CPUMIPSState *env)
+{
+return extract32(env->CP0_Config0, CP0C0_BE, 1);
+}
+
 #ifdef TARGET_WORDS_BIGENDIAN
 #define GET_LMASK(v) ((v) & 3)
-#define GET_OFFSET(addr, offset) (addr + (offset))
 #else
 #define GET_LMASK(v) (((v) & 3) ^ 3)
-#define GET_OFFSET(addr, offset) (addr - (offset))
 #endif
 
 void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
 int mem_idx)
 {
+int dir = cpu_is_bigendian(env) ? 1 : -1;
+
 cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 24), mem_idx, GETPC());
 
 if (GET_LMASK(arg2) <= 2) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 16),
+cpu_stb_mmuidx_ra(env, arg2 + 1 * dir, (uint8_t)(arg1 >> 16),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK(arg2) <= 1) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 8),
+cpu_stb_mmuidx_ra(env, arg2 + 2 * dir, (uint8_t)(arg1 >> 8),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK(arg2) == 0) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 3), (uint8_t)arg1,
+cpu_stb_mmuidx_ra(env, arg2 + 3 * dir, (uint8_t)arg1,
   mem_idx, GETPC());
 }
 }
@@ -84,20 +89,22 @@ void helper_swl(CPUMIPSState *env, target_ulong arg1, 
target_ulong arg2,
 void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
 int mem_idx)
 {
+int dir = cpu_is_bigendian(env) ? 1 : -1;
+
 cpu_stb_mmuidx_ra(env, arg2, (uint8_t)arg1, mem_idx, GETPC());
 
 if (GET_LMASK(arg2) >= 1) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8),
+cpu_stb_mmuidx_ra(env, arg2 - 1 * dir, (uint8_t)(arg1 >> 8),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK(arg2) >= 2) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16),
+cpu_stb_mmuidx_ra(env, arg2 - 2 * dir, (uint8_t)(arg1 >> 16),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK(arg2) == 3) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24),
+cpu_stb_mmuidx_ra(env, arg2 - 3 * dir, (uint8_t)(arg1 >> 24),
   mem_idx, GETPC());
 }
 }
@@ -116,40 +123,42 @@ void helper_swr(CPUMIPSState *env, target_ulong arg1, 
target_ulong arg2,
 void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
 int mem_idx)
 {
+int dir = cpu_is_bigendian(env) ? 1 : -1;
+
 cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 56), mem_idx, GETPC());
 
 if (GET_LMASK64(arg2) <= 6) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 48),
+cpu_stb_mmuidx_ra(env, arg2 + 1 * dir, (uint8_t)(arg1 >> 48),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK64(arg2) <= 5) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 40),
+cpu_stb_mmuidx_ra(env, arg2 + 2 * dir, (uint8_t)(arg1 >> 40),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK64(arg2) <= 4) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 3), (uint8_t)(arg1 >> 32),
+cpu_stb_mmuidx_ra(env, arg2 + 3 * dir, (uint8_t)(arg1 >> 32),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK64(arg2) <= 3) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 4), (uint8_t)(arg1 >> 24),
+cpu_stb_mmuidx_ra(env, arg2 + 4 * dir, (uint8_t)(arg1 >> 24),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK64(arg2) <= 2) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 5), (uint8_t)(arg1 >> 16),
+cpu_stb_mmuidx_ra(env, arg2 + 5 * dir, (uint8_t)(arg1 >> 16),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK64(arg2) <= 1) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 6), (uint8_t)(arg1 >> 8),
+cpu_stb_mmuidx_ra(env, arg2 + 6 * dir, (uint8_t)(arg1 >> 8),
   mem_idx, GETPC());
 }
 
 if (GET_LMASK64(arg2) <= 0) {
-cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 7), (uint8_t)arg1,
+cpu_stb_mmuidx_ra(env, arg2 + 7 * dir, (uint8_t)arg1,
   mem_idx, GETPC());
 }
 }
@@ -157,40 +166,42 @@ void helper_sdl(CPUMIPSState *env, target_ulong a

[PATCH v2 0/5] target/mips: Replace TARGET_WORDS_BIGENDIAN by cpu_is_bigendian()

2021-08-18 Thread Philippe Mathieu-Daudé

Missing review: 1-3

MIPS CPU store its endianess in the CP0 Config0 register.
Use that runtime information instead of #ifdef'ry checking
TARGET_WORDS_BIGENDIAN by introducing the cpu_is_bigendian()
helper.

Since v1:
- Addressed rth's comments (call cpu_is_bigendian/get_lmask once)
- Add rth R-b on patches 4-5

Philippe Mathieu-Daudé (5):
  target/mips: Call cpu_is_bigendian & inline GET_OFFSET in ld/st
helpers
  target/mips: Replace GET_LMASK() macro by get_lmask(32) function
  target/mips: Replace GET_LMASK64() macro by get_lmask(64) function
  target/mips: Store CP0_Config0 in DisasContext
  target/mips: Replace TARGET_WORDS_BIGENDIAN by cpu_is_bigendian()

 target/mips/tcg/translate.h  |   6 ++
 target/mips/tcg/ldst_helper.c| 122 +--
 target/mips/tcg/translate.c  |  71 ++---
 target/mips/tcg/nanomips_translate.c.inc |  20 ++--
 4 files changed, 122 insertions(+), 97 deletions(-)

-- 
2.31.1

Re: [PATCH 1/5] target/mips: Replace GET_OFFSET() macro by get_offset() function

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 6:56 PM, Richard Henderson wrote:
> On 8/18/21 6:43 AM, Philippe Mathieu-Daudé wrote:
>> The target endianess information is stored in the BigEndian
>> bit of the Config0 register in CP0.
>>
>> As a first step, replace the GET_OFFSET() macro by an inlined
>> get_offset() function, passing CPUMIPSState as argument.
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>>   target/mips/tcg/ldst_helper.c | 57 +--
>>   1 file changed, 35 insertions(+), 22 deletions(-)
>>
>> diff --git a/target/mips/tcg/ldst_helper.c
>> b/target/mips/tcg/ldst_helper.c
>> index d42812b8a6a..97e7ad7d7a4 100644
>> --- a/target/mips/tcg/ldst_helper.c
>> +++ b/target/mips/tcg/ldst_helper.c
>> @@ -52,31 +52,44 @@ HELPER_LD_ATOMIC(lld, ldq, 0x7, (target_ulong))
>>     #endif /* !CONFIG_USER_ONLY */
>>   +static inline bool cpu_is_bigendian(CPUMIPSState *env)
>> +{
>> +    return extract32(env->CP0_Config0, CP0C0_BE, 1);
>> +}
>> +
>>   #ifdef TARGET_WORDS_BIGENDIAN
>>   #define GET_LMASK(v) ((v) & 3)
>> -#define GET_OFFSET(addr, offset) (addr + (offset))
>>   #else
>>   #define GET_LMASK(v) (((v) & 3) ^ 3)
>> -#define GET_OFFSET(addr, offset) (addr - (offset))
>>   #endif
>>   +static inline target_ulong get_offset(CPUMIPSState *env,
>> +  target_ulong addr, int offset)
>> +{
>> +    if (cpu_is_bigendian(env)) {
>> +    return addr + offset;
>> +    } else {
>> +    return addr - offset;
>> +    }
>> +}
>> +
>>   void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong
>> arg2,
>>   int mem_idx)
>>   {
>>   cpu_stb_mmuidx_ra(env, arg2, (uint8_t)(arg1 >> 24), mem_idx,
>> GETPC());
>>     if (GET_LMASK(arg2) <= 2) {
>> -    cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >>
>> 16),
>> +    cpu_stb_mmuidx_ra(env, get_offset(env, arg2, 1),
>> (uint8_t)(arg1 >> 16),
>>     mem_idx, GETPC());
>>   }
>>     if (GET_LMASK(arg2) <= 1) {
>> -    cpu_stb_mmuidx_ra(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >>
>> 8),
>> +    cpu_stb_mmuidx_ra(env, get_offset(env, arg2, 2),
>> (uint8_t)(arg1 >> 8),
>>     mem_idx, GETPC());
> 
> So... yes, this is an improvement, but it's now substituting a constant
> for a runtime variable many times over.

Oops indeed.

>  I think you should drop
> get_offset() entirely and replace it with
> 
>     int dir = cpu_is_bigendian(env) ? 1 : -1;
> 
>     stb(env, arg2 + 1 * dir, data);
> 
>     stb(env, arg2 + 2 * dir, data);
> 
> Alternately, bite the bullet and split the function(s) into two,
> explicitly endian versions: helper_swl_be, helper_swl_le, etc.

I'll go for the easier path ;)

[PATCH v3 12/14] tcg/arm: More use of the TCGReg enum

2021-08-18 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 65 +---
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 2f55b94ada..35bd4c68d6 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -552,7 +552,7 @@ static void tcg_out_bl_imm(TCGContext *s, ARMCond cond, 
int32_t offset)
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static void tcg_out_blx_reg(TCGContext *s, ARMCond cond, int rn)
+static void tcg_out_blx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
 {
 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 }
@@ -563,14 +563,14 @@ static void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, ARMInsn opc, int rd,
-int rn, int rm, int shift)
+static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, ARMInsn opc,
+TCGReg rd, TCGReg rn, TCGReg rm, int shift)
 {
 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 (rn << 16) | (rd << 12) | shift | rm);
 }
 
-static void tcg_out_mov_reg(TCGContext *s, ARMCond cond, int rd, int rm)
+static void tcg_out_mov_reg(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rm)
 {
 /* Simple reg-reg move, optimising out the 'do nothing' case */
 if (rd != rm) {
@@ -597,7 +597,7 @@ static void tcg_out_b_reg(TCGContext *s, ARMCond cond, 
TCGReg rn)
 }
 
 static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc,
-int rd, int rn, int im)
+TCGReg rd, TCGReg rn, int im)
 {
 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 (rn << 16) | (rd << 12) | im);
@@ -781,13 +781,15 @@ static void tcg_out_ld8s_r(TCGContext *s, ARMCond cond, 
TCGReg rt,
 tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static void tcg_out_movi_pool(TCGContext *s, ARMCond cond, int rd, uint32_t 
arg)
+static void tcg_out_movi_pool(TCGContext *s, ARMCond cond,
+  TCGReg rd, uint32_t arg)
 {
 new_pool_label(s, arg, R_ARM_PC13, s->code_ptr, 0);
 tcg_out_ld32_12(s, cond, rd, TCG_REG_PC, 0);
 }
 
-static void tcg_out_movi32(TCGContext *s, ARMCond cond, int rd, uint32_t arg)
+static void tcg_out_movi32(TCGContext *s, ARMCond cond,
+   TCGReg rd, uint32_t arg)
 {
 int imm12, diff, opc, sh1, sh2;
 uint32_t tt0, tt1, tt2;
@@ -866,8 +868,8 @@ static void tcg_out_movi32(TCGContext *s, ARMCond cond, int 
rd, uint32_t arg)
  * Emit either the reg,imm or reg,reg form of a data-processing insn.
  * rhs must satisfy the "rI" constraint.
  */
-static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc, TCGArg 
dst,
-   TCGArg lhs, TCGArg rhs, int rhs_is_const)
+static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc,
+   TCGReg dst, TCGReg lhs, TCGArg rhs, int 
rhs_is_const)
 {
 if (rhs_is_const) {
 tcg_out_dat_imm(s, cond, opc, dst, lhs, encode_imm_nofail(rhs));
@@ -897,7 +899,7 @@ static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, 
ARMInsn opc,
 }
 
 static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc,
-ARMInsn opneg, TCGArg dst, TCGArg lhs, TCGArg rhs,
+ARMInsn opneg, TCGReg dst, TCGReg lhs, TCGArg rhs,
 bool rhs_is_const)
 {
 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
@@ -971,17 +973,19 @@ static void tcg_out_smull32(TCGContext *s, ARMCond cond, 
TCGReg rd0,
   (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
 }
 
-static void tcg_out_sdiv(TCGContext *s, ARMCond cond, int rd, int rn, int rm)
+static void tcg_out_sdiv(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, TCGReg rm)
 {
 tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 }
 
-static void tcg_out_udiv(TCGContext *s, ARMCond cond, int rd, int rn, int rm)
+static void tcg_out_udiv(TCGContext *s, ARMCond cond,
+ TCGReg rd, TCGReg rn, TCGReg rm)
 {
 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 }
 
-static void tcg_out_ext8s(TCGContext *s, ARMCond cond, int rd, int rn)
+static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
 {
 if (use_armv6_instructions) {
 /* sxtb */
@@ -995,12 +999,12 @@ static void tcg_out_ext8s(TCGContext *s, ARMCond cond, 
int rd, int rn)
 }
 
 static void __attribute__((unused))
-tcg_out_ext8u(TCGContext *s, ARMCond cond, int rd, int rn)
+tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
 {
 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 }
 
-static void tcg_out_ext16s(TCGContext *s, ARMCond cond, int rd, int rn)
+static void tcg_out_ext16s(T

Re: [RFC PATCH 00/13] Add support for Mirror VM.

2021-08-18 Thread Tobin Feldman-Fitzthum


On 8/18/21 3:04 PM, Dr. David Alan Gilbert wrote:

* Tobin Feldman-Fitzthum (to...@linux.ibm.com) wrote:

On 8/17/21 6:04 PM, Steve Rutherford wrote:

Ahh, It sounds like you are looking into sidestepping the existing
AMD-SP flows for migration. I assume the idea is to spin up a VM on
the target side, and have the two VMs attest to each other. How do the
two sides know if the other is legitimate? I take it that the source
is directing the LAUNCH flows?

Yeah we don't use PSP migration flows at all. We don't need to send the MH
code from the source to the target because the MH lives in firmware, which
is common between the two.

Are you relying on the target firmware to be *identical* or purely for
it to be *compatible* ?  It's normal for a migration to be the result of
wanting to do an upgrade; and that means the destination build of OVMF
might be newer (or older, or ...).

Dave


This is a good point. The migration handler on the source and target 
must have the same memory footprint or bad things will happen. Using the 
same firmware on the source and target is an easy way to guarantee this. 
Since the MH in OVMF is not a contiguous region of memory, but a group 
of functions scattered around OVMF, it is a bit difficult to guarantee 
that the memory footprint is the same if the build is different.


-Tobin





We start the target like a normal VM rather than
waiting for an incoming migration. The plan is to treat the target like a
normal VM for attestation as well. The guest owner will attest the target VM
just like they would any other VM that is started on their behalf. Secret
injection can be used to establish a shared key for the source and target.

-Tobin


--Steve

[PATCH v3 14/14] tcg/arm: Support raising sigbus for user-only

2021-08-18 Thread Richard Henderson

For v6+, use ldm/stm, ldrd/strd for the normal case of alignment
matching the access size.  Otherwise, emit a test + branch sequence
invoking helper_unaligned_{ld,st}.

For v4+v5, use piecewise load and stores to implement misalignment.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.h |   2 -
 tcg/arm/tcg-target.c.inc | 364 ---
 2 files changed, 340 insertions(+), 26 deletions(-)

diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index e47720a85b..fa75fd3626 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -159,9 +159,7 @@ extern bool use_neon_instructions;
 /* not defined -- call should be eliminated at compile time */
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
-#ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
-#endif
 #define TCG_TARGET_NEED_POOL_LABELS
 
 #endif
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 2728035177..278639be44 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -23,6 +23,7 @@
  */
 
 #include "elf.h"
+#include "../tcg-ldst.c.inc"
 #include "../tcg-pool.c.inc"
 
 int arm_arch = __ARM_ARCH;
@@ -86,6 +87,7 @@ static const int tcg_target_call_oarg_regs[2] = {
 #define TCG_VEC_TMP  TCG_REG_Q15
 #ifndef CONFIG_SOFTMMU
 #define TCG_REG_GUEST_BASE  TCG_REG_R11
+#define TCG_REG_TMP2TCG_REG_R14
 #endif
 
 typedef enum {
@@ -137,7 +139,9 @@ typedef enum {
 INSN_CLZ   = 0x016f0f10,
 INSN_RBIT  = 0x06ff0f30,
 
+INSN_LDM   = 0x0890,
 INSN_LDMIA = 0x08b0,
+INSN_STM   = 0x0880,
 INSN_STMDB = 0x0920,
 
 INSN_LDR_IMM   = 0x0410,
@@ -1428,8 +1432,6 @@ static void tcg_out_vldst(TCGContext *s, ARMInsn insn,
 }
 
 #ifdef CONFIG_SOFTMMU
-#include "../tcg-ldst.c.inc"
-
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  * int mmu_idx, uintptr_t ra)
  */
@@ -1762,6 +1764,74 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
 return true;
 }
+#else
+
+static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
+   TCGReg addrhi, unsigned a_bits)
+{
+unsigned a_mask = (1 << a_bits) - 1;
+TCGLabelQemuLdst *label = new_ldst_label(s);
+
+label->is_ld = is_ld;
+label->addrlo_reg = addrlo;
+label->addrhi_reg = addrhi;
+
+/* We are expecting a_bits to max out at 7, and can easily support 8. */
+tcg_debug_assert(a_mask <= 0xff);
+/* tst addr, #mask */
+tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
+
+/* blne slow_path */
+label->label_ptr[0] = s->code_ptr;
+tcg_out_bl_imm(s, COND_NE, 0);
+
+label->raddr = tcg_splitwx_to_rx(s->code_ptr);
+}
+
+static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
+{
+if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
+return false;
+}
+
+if (TARGET_LONG_BITS == 64) {
+/* 64-bit target address is aligned into R2:R3. */
+if (l->addrhi_reg != TCG_REG_R2) {
+tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
+tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
+} else if (l->addrlo_reg != TCG_REG_R3) {
+tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, l->addrhi_reg);
+tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, l->addrlo_reg);
+} else {
+tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, TCG_REG_R2);
+tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R2, TCG_REG_R3);
+tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R3, TCG_REG_R1);
+}
+} else {
+tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
+}
+tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
+
+/*
+ * Tail call to the helper, with the return address back inline,
+ * just for the clarity of the debugging traceback -- the helper
+ * cannot return.  We have used BLNE to arrive here, so LR is
+ * already set.
+ */
+tcg_out_goto(s, COND_AL, (const void *)
+ (l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
+return true;
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+return tcg_out_fail_alignment(s, l);
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+return tcg_out_fail_alignment(s, l);
+}
 #endif /* SOFTMMU */
 
 static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
@@ -1811,45 +1881,175 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp 
opc,
 
 #ifndef CONFIG_SOFTMMU
 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
-   TCGReg datahi, TCGReg addrlo)
+   TCGReg datahi, TCGReg addrlo, uint8_t ofs)
 {
 /* Byte swapping is lef

[PATCH v3 05/14] tcg/arm: Examine QEMU_TCG_DEBUG environment variable

2021-08-18 Thread Richard Henderson

Use the environment variable to test an older ISA from
the one supported by the host.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.h |  8 +++-
 tcg/arm/tcg-target.c.inc | 32 
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index f41b809554..e47720a85b 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -28,9 +28,15 @@
 
 extern int arm_arch;
 
+#ifdef CONFIG_DEBUG_TCG
+#define use_armv5t_instructions (arm_arch >= 5)
+#define use_armv6_instructions  (arm_arch >= 6)
+#define use_armv7_instructions  (arm_arch >= 7)
+#else
 #define use_armv5t_instructions (__ARM_ARCH >= 5 || arm_arch >= 5)
 #define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6)
 #define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
+#endif
 
 #undef TCG_TARGET_STACK_GROWSUP
 #define TCG_TARGET_INSN_UNIT_SIZE 4
@@ -83,7 +89,7 @@ typedef enum {
 #else
 extern bool use_idiv_instructions;
 #endif
-#ifdef __ARM_NEON__
+#if defined(__ARM_NEON__) && !defined(CONFIG_DEBUG_TCG)
 #define use_neon_instructions  1
 #else
 extern bool use_neon_instructions;
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 87df812bb5..0c7e4f8411 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -2455,6 +2455,38 @@ static void tcg_target_init(TCGContext *s)
 }
 }
 
+/*
+ * For debugging/testing purposes, allow the ISA to be reduced
+ * (but not extended) from the set detected above.
+ */
+#ifdef CONFIG_DEBUG_TCG
+{
+char *opt = g_strdup(getenv("QEMU_TCG_DEBUG"));
+if (opt) {
+for (char *o = strtok(opt, ","); o ; o = strtok(NULL, ",")) {
+if (o[0] == 'v' &&
+o[1] >= '4' &&
+o[1] <= '0' + arm_arch &&
+o[2] == 0) {
+arm_arch = o[1] - '0';
+continue;
+}
+if (strcmp(o, "!neon") == 0) {
+use_neon_instructions = false;
+continue;
+}
+if (strcmp(o, "help") == 0) {
+printf("QEMU_TCG_DEBUG={,} where  is\n"
+   "  v   select ARMv\n"
+   "  !neon  disable ARM NEON\n");
+exit(0);
+}
+}
+g_free(opt);
+}
+}
+#endif
+
 tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
 
 tcg_target_call_clobber_regs = 0;
-- 
2.25.1

[PATCH v3 11/14] tcg/arm: More use of the ARMInsn enum

2021-08-18 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index b20c313615..2f55b94ada 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -563,7 +563,7 @@ static void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, int opc, int rd,
+static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, ARMInsn opc, int rd,
 int rn, int rm, int shift)
 {
 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
@@ -596,14 +596,14 @@ static void tcg_out_b_reg(TCGContext *s, ARMCond cond, 
TCGReg rn)
 }
 }
 
-static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, int opc,
+static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, ARMInsn opc,
 int rd, int rn, int im)
 {
 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 (rn << 16) | (rd << 12) | im);
 }
 
-static void tcg_out_ldstm(TCGContext *s, ARMCond cond, int opc,
+static void tcg_out_ldstm(TCGContext *s, ARMCond cond, ARMInsn opc,
   TCGReg rn, uint16_t mask)
 {
 tcg_out32(s, (cond << 28) | opc | (rn << 16) | mask);
@@ -630,8 +630,8 @@ static void tcg_out_memop_8(TCGContext *s, ARMCond cond, 
ARMInsn opc, TCGReg rt,
   (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 }
 
-static void tcg_out_memop_12(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg 
rt,
- TCGReg rn, int imm12, bool p, bool w)
+static void tcg_out_memop_12(TCGContext *s, ARMCond cond, ARMInsn opc,
+ TCGReg rt, TCGReg rn, int imm12, bool p, bool w)
 {
 bool u = 1;
 if (imm12 < 0) {
@@ -866,7 +866,7 @@ static void tcg_out_movi32(TCGContext *s, ARMCond cond, int 
rd, uint32_t arg)
  * Emit either the reg,imm or reg,reg form of a data-processing insn.
  * rhs must satisfy the "rI" constraint.
  */
-static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, int opc, TCGArg dst,
+static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, ARMInsn opc, TCGArg 
dst,
TCGArg lhs, TCGArg rhs, int rhs_is_const)
 {
 if (rhs_is_const) {
@@ -880,8 +880,8 @@ static void tcg_out_dat_rI(TCGContext *s, ARMCond cond, int 
opc, TCGArg dst,
  * Emit either the reg,imm or reg,reg form of a data-processing insn.
  * rhs must satisfy the "rIK" constraint.
  */
-static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, int opc, int opinv,
-TCGReg dst, TCGReg lhs, TCGArg rhs,
+static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, ARMInsn opc,
+ARMInsn opinv, TCGReg dst, TCGReg lhs, TCGArg rhs,
 bool rhs_is_const)
 {
 if (rhs_is_const) {
@@ -896,8 +896,8 @@ static void tcg_out_dat_rIK(TCGContext *s, ARMCond cond, 
int opc, int opinv,
 }
 }
 
-static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, int opc, int opneg,
-TCGArg dst, TCGArg lhs, TCGArg rhs,
+static void tcg_out_dat_rIN(TCGContext *s, ARMCond cond, ARMInsn opc,
+ARMInsn opneg, TCGArg dst, TCGArg lhs, TCGArg rhs,
 bool rhs_is_const)
 {
 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
-- 
2.25.1

[PATCH v3 02/14] tcg/arm: Standardize on tcg_out__{reg,imm}

2021-08-18 Thread Richard Henderson

Some of the functions specified _reg, some _imm, and some
left it blank.  Make it clearer to which we are referring.

Split tcg_out_b_reg from tcg_out_bx_reg, to indicate when
we do not actually require BX semantics.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 38 ++
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index cbe3057a9d..0578f9749b 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -525,19 +525,19 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 return 0;
 }
 
-static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
+static inline void tcg_out_b_imm(TCGContext *s, int cond, int32_t offset)
 {
 tcg_out32(s, (cond << 28) | 0x0a00 |
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
+static inline void tcg_out_bl_imm(TCGContext *s, int cond, int32_t offset)
 {
 tcg_out32(s, (cond << 28) | 0x0b00 |
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
+static inline void tcg_out_blx_reg(TCGContext *s, int cond, int rn)
 {
 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 }
@@ -568,13 +568,19 @@ static inline void tcg_out_mov_reg(TCGContext *s, int 
cond, int rd, int rm)
 }
 }
 
-static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
+static void tcg_out_bx_reg(TCGContext *s, int cond, TCGReg rn)
 {
-/* Unless the C portion of QEMU is compiled as thumb, we don't
-   actually need true BX semantics; merely a branch to an address
-   held in a register.  */
+tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
+}
+
+static void tcg_out_b_reg(TCGContext *s, int cond, TCGReg rn)
+{
+/*
+ * Unless the C portion of QEMU is compiled as thumb, we don't need
+ * true BX semantics; merely a branch to an address held in a register.
+ */
 if (use_armv5t_instructions) {
-tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
+tcg_out_bx_reg(s, cond, rn);
 } else {
 tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
 }
@@ -1215,7 +1221,7 @@ static void tcg_out_goto(TCGContext *s, int cond, const 
tcg_insn_unit *addr)
 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
 
 if ((addri & 1) == 0 && disp - 8 < 0x01fd && disp - 8 > -0x01fd) {
-tcg_out_b(s, cond, disp);
+tcg_out_b_imm(s, cond, disp);
 return;
 }
 tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
@@ -1236,11 +1242,11 @@ static void tcg_out_call(TCGContext *s, const 
tcg_insn_unit *addr)
 }
 tcg_out_blx_imm(s, disp);
 } else {
-tcg_out_bl(s, COND_AL, disp);
+tcg_out_bl_imm(s, COND_AL, disp);
 }
 } else if (use_armv7_instructions) {
 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
-tcg_out_blx(s, COND_AL, TCG_REG_TMP);
+tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP);
 } else {
 /* ??? Know that movi_pool emits exactly 1 insn.  */
 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 0);
@@ -1254,7 +1260,7 @@ static inline void tcg_out_goto_label(TCGContext *s, int 
cond, TCGLabel *l)
 tcg_out_goto(s, cond, l->u.value_ptr);
 } else {
 tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
-tcg_out_b(s, cond, 0);
+tcg_out_b_imm(s, cond, 0);
 }
 }
 
@@ -1823,7 +1829,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
 /* This a conditional BL only to load a pointer within this opcode into LR
for the slow path.  We will not be using the value for a tail call.  */
 label_ptr = s->code_ptr;
-tcg_out_bl(s, COND_NE, 0);
+tcg_out_bl_imm(s, COND_NE, 0);
 
 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
 
@@ -1929,7 +1935,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
 
 /* The conditional call must come last, as we're going to return here.  */
 label_ptr = s->code_ptr;
-tcg_out_bl(s, COND_NE, 0);
+tcg_out_bl_imm(s, COND_NE, 0);
 
 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
 s->code_ptr, label_ptr);
@@ -1982,7 +1988,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 }
 break;
 case INDEX_op_goto_ptr:
-tcg_out_bx(s, COND_AL, args[0]);
+tcg_out_b_reg(s, COND_AL, args[0]);
 break;
 case INDEX_op_br:
 tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
@@ -3065,7 +3071,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 
 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
 
-tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
+tcg_out_b_reg(s, COND_AL, tcg_target_call_iarg_regs[1]);
 
 /*

[PATCH v3 08/14] tcg/arm: Simplify usage of encode_imm

2021-08-18 Thread Richard Henderson

We have already computed the rotated value of the imm8
portion of the complete imm12 encoding.  No sense leaving
the combination of rot + rotation to the caller.

Create an encode_imm12_nofail helper that performs an assert.

This removes the final use of the local "rotl" function,
which duplicated our generic "rol32" function.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 141 +--
 1 file changed, 77 insertions(+), 64 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 63b786a3e5..265370b2ee 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -305,10 +305,10 @@ static bool reloc_pc8(tcg_insn_unit *src_rw, const 
tcg_insn_unit *target)
 {
 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
 ptrdiff_t offset = tcg_ptr_byte_diff(target, src_rx) - 8;
-int rot = encode_imm(offset);
+int imm12 = encode_imm(offset);
 
-if (rot >= 0) {
-*src_rw = deposit32(*src_rw, 0, 12, rol32(offset, rot) | (rot << 7));
+if (imm12 >= 0) {
+*src_rw = deposit32(*src_rw, 0, 12, imm12);
 return true;
 }
 return false;
@@ -362,33 +362,52 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
 #endif
 
-static inline uint32_t rotl(uint32_t val, int n)
-{
-  return (val << n) | (val >> (32 - n));
-}
-
-/* ARM immediates for ALU instructions are made of an unsigned 8-bit
-   right-rotated by an even amount between 0 and 30. */
+/*
+ * ARM immediates for ALU instructions are made of an unsigned 8-bit
+ * right-rotated by an even amount between 0 and 30.
+ *
+ * Return < 0 if @imm cannot be encoded, else the entire imm12 field.
+ */
 static int encode_imm(uint32_t imm)
 {
-int shift;
+uint32_t rot, imm8;
 
-/* simple case, only lower bits */
-if ((imm & ~0xff) == 0)
-return 0;
-/* then try a simple even shift */
-shift = ctz32(imm) & ~1;
-if (((imm >> shift) & ~0xff) == 0)
-return 32 - shift;
-/* now try harder with rotations */
-if ((rotl(imm, 2) & ~0xff) == 0)
-return 2;
-if ((rotl(imm, 4) & ~0xff) == 0)
-return 4;
-if ((rotl(imm, 6) & ~0xff) == 0)
-return 6;
-/* imm can't be encoded */
+/* Simple case, no rotation required. */
+if ((imm & ~0xff) == 0) {
+return imm;
+}
+
+/* Next, try a simple even shift.  */
+rot = ctz32(imm) & ~1;
+imm8 = imm >> rot;
+rot = 32 - rot;
+if ((imm8 & ~0xff) == 0) {
+goto found;
+}
+
+/*
+ * Finally, try harder with rotations.
+ * The ctz test above will have taken care of rotates >= 8.
+ */
+for (rot = 2; rot < 8; rot += 2) {
+imm8 = rol32(imm, rot);
+if ((imm8 & ~0xff) == 0) {
+goto found;
+}
+}
+/* Fail: imm cannot be encoded. */
 return -1;
+
+ found:
+/* Note that rot is even, and we discard bit 0 by shifting by 7. */
+return rot << 7 | imm8;
+}
+
+static int encode_imm_nofail(uint32_t imm)
+{
+int ret = encode_imm(imm);
+tcg_debug_assert(ret >= 0);
+return ret;
 }
 
 static inline int check_fit_imm(uint32_t imm)
@@ -775,20 +794,18 @@ static void tcg_out_movi_pool(TCGContext *s, int cond, 
int rd, uint32_t arg)
 
 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 {
-int rot, diff, opc, sh1, sh2;
+int imm12, diff, opc, sh1, sh2;
 uint32_t tt0, tt1, tt2;
 
 /* Check a single MOV/MVN before anything else.  */
-rot = encode_imm(arg);
-if (rot >= 0) {
-tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
-rotl(arg, rot) | (rot << 7));
+imm12 = encode_imm(arg);
+if (imm12 >= 0) {
+tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, imm12);
 return;
 }
-rot = encode_imm(~arg);
-if (rot >= 0) {
-tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
-rotl(~arg, rot) | (rot << 7));
+imm12 = encode_imm(~arg);
+if (imm12 >= 0) {
+tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, imm12);
 return;
 }
 
@@ -796,17 +813,15 @@ static void tcg_out_movi32(TCGContext *s, int cond, int 
rd, uint32_t arg)
or within the TB, which is immediately before the code block.  */
 diff = tcg_pcrel_diff(s, (void *)arg) - 8;
 if (diff >= 0) {
-rot = encode_imm(diff);
-if (rot >= 0) {
-tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
-rotl(diff, rot) | (rot << 7));
+imm12 = encode_imm(diff);
+if (imm12 >= 0) {
+tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC, imm12);
 return;
 }
 } else {
-rot = encode_imm(-diff);
-if (rot >= 0) {
-tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
-rotl(-diff, rot) | (rot << 7));
+imm12 = encode_imm(-diff);

[PATCH v3 10/14] tcg/arm: Give enum arm_cond_code_e a typedef and use it

2021-08-18 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 136 +++
 1 file changed, 68 insertions(+), 68 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 327032f0df..b20c313615 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -85,7 +85,7 @@ static const int tcg_target_call_oarg_regs[2] = {
 #define TCG_REG_TMP  TCG_REG_R12
 #define TCG_VEC_TMP  TCG_REG_Q15
 
-enum arm_cond_code_e {
+typedef enum {
 COND_EQ = 0x0,
 COND_NE = 0x1,
 COND_CS = 0x2, /* Unsigned greater or equal */
@@ -101,7 +101,7 @@ enum arm_cond_code_e {
 COND_GT = 0xc,
 COND_LE = 0xd,
 COND_AL = 0xe,
-};
+} ARMCond;
 
 #define TO_CPSR (1 << 20)
 
@@ -540,19 +540,19 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 return 0;
 }
 
-static void tcg_out_b_imm(TCGContext *s, int cond, int32_t offset)
+static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset)
 {
 tcg_out32(s, (cond << 28) | 0x0a00 |
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static void tcg_out_bl_imm(TCGContext *s, int cond, int32_t offset)
+static void tcg_out_bl_imm(TCGContext *s, ARMCond cond, int32_t offset)
 {
 tcg_out32(s, (cond << 28) | 0x0b00 |
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static void tcg_out_blx_reg(TCGContext *s, int cond, int rn)
+static void tcg_out_blx_reg(TCGContext *s, ARMCond cond, int rn)
 {
 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 }
@@ -563,14 +563,14 @@ static void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static void tcg_out_dat_reg(TCGContext *s,
-int cond, int opc, int rd, int rn, int rm, int shift)
+static void tcg_out_dat_reg(TCGContext *s, ARMCond cond, int opc, int rd,
+int rn, int rm, int shift)
 {
 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 (rn << 16) | (rd << 12) | shift | rm);
 }
 
-static void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
+static void tcg_out_mov_reg(TCGContext *s, ARMCond cond, int rd, int rm)
 {
 /* Simple reg-reg move, optimising out the 'do nothing' case */
 if (rd != rm) {
@@ -578,12 +578,12 @@ static void tcg_out_mov_reg(TCGContext *s, int cond, int 
rd, int rm)
 }
 }
 
-static void tcg_out_bx_reg(TCGContext *s, int cond, TCGReg rn)
+static void tcg_out_bx_reg(TCGContext *s, ARMCond cond, TCGReg rn)
 {
 tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
 }
 
-static void tcg_out_b_reg(TCGContext *s, int cond, TCGReg rn)
+static void tcg_out_b_reg(TCGContext *s, ARMCond cond, TCGReg rn)
 {
 /*
  * Unless the C portion of QEMU is compiled as thumb, we don't need
@@ -596,14 +596,14 @@ static void tcg_out_b_reg(TCGContext *s, int cond, TCGReg 
rn)
 }
 }
 
-static void tcg_out_dat_imm(TCGContext *s, int cond, int opc,
+static void tcg_out_dat_imm(TCGContext *s, ARMCond cond, int opc,
 int rd, int rn, int im)
 {
 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 (rn << 16) | (rd << 12) | im);
 }
 
-static void tcg_out_ldstm(TCGContext *s, int cond, int opc,
+static void tcg_out_ldstm(TCGContext *s, ARMCond cond, int opc,
   TCGReg rn, uint16_t mask)
 {
 tcg_out32(s, (cond << 28) | opc | (rn << 16) | mask);
@@ -611,14 +611,14 @@ static void tcg_out_ldstm(TCGContext *s, int cond, int 
opc,
 
 /* Note that this routine is used for both LDR and LDRH formats, so we do
not wish to include an immediate shift at this point.  */
-static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
+static void tcg_out_memop_r(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg 
rt,
 TCGReg rn, TCGReg rm, bool u, bool p, bool w)
 {
 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
   | (w << 21) | (rn << 16) | (rt << 12) | rm);
 }
 
-static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
+static void tcg_out_memop_8(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg 
rt,
 TCGReg rn, int imm8, bool p, bool w)
 {
 bool u = 1;
@@ -630,7 +630,7 @@ static void tcg_out_memop_8(TCGContext *s, int cond, 
ARMInsn opc, TCGReg rt,
   (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
 }
 
-static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
+static void tcg_out_memop_12(TCGContext *s, ARMCond cond, ARMInsn opc, TCGReg 
rt,
  TCGReg rn, int imm12, bool p, bool w)
 {
 bool u = 1;
@@ -642,152 +642,152 @@ static void tcg_out_memop_12(TCGContext *s, int cond, 
ARMInsn opc, TCGReg rt,
   (rn << 16) | (rt << 12) | imm12);
 }
 
-static void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
+static void tcg_out_ld32_12(TCGContext *s, ARMCond cond, TCGReg rt,

[PATCH v3 04/14] tcg/arm: Support armv4t in tcg_out_goto and tcg_out_call

2021-08-18 Thread Richard Henderson

ARMv4T has BX as its only interworking instruction.  In order
to support testing of different architecture revisions with a
qemu binary that may have been built for, say ARMv6T2, fill in
the blank required to make calls to helpers in thumb mode.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 49 
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 0578f9749b..87df812bb5 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1211,7 +1211,8 @@ static inline void tcg_out_st8(TCGContext *s, int cond,
 tcg_out_st8_12(s, cond, rd, rn, offset);
 }
 
-/* The _goto case is normally between TBs within the same code buffer, and
+/*
+ * The _goto case is normally between TBs within the same code buffer, and
  * with the code buffer limited to 16MB we wouldn't need the long case.
  * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
  */
@@ -1219,38 +1220,56 @@ static void tcg_out_goto(TCGContext *s, int cond, const 
tcg_insn_unit *addr)
 {
 intptr_t addri = (intptr_t)addr;
 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
+bool arm_mode = !(addri & 1);
 
-if ((addri & 1) == 0 && disp - 8 < 0x01fd && disp - 8 > -0x01fd) {
+if (arm_mode && disp - 8 < 0x01fd && disp - 8 > -0x01fd) {
 tcg_out_b_imm(s, cond, disp);
 return;
 }
-tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
+
+/* LDR is interworking from v5t. */
+if (arm_mode || use_armv5t_instructions) {
+tcg_out_movi_pool(s, cond, TCG_REG_PC, addri);
+return;
+}
+
+/* else v4t */
+tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
+tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP);
 }
 
-/* The call case is mostly used for helpers - so it's not unreasonable
- * for them to be beyond branch range */
+/*
+ * The call case is mostly used for helpers - so it's not unreasonable
+ * for them to be beyond branch range.
+ */
 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *addr)
 {
 intptr_t addri = (intptr_t)addr;
 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
+bool arm_mode = !(addri & 1);
 
 if (disp - 8 < 0x0200 && disp - 8 >= -0x0200) {
-if (addri & 1) {
-/* Use BLX if the target is in Thumb mode */
-if (!use_armv5t_instructions) {
-tcg_abort();
-}
-tcg_out_blx_imm(s, disp);
-} else {
+if (arm_mode) {
 tcg_out_bl_imm(s, COND_AL, disp);
+return;
 }
-} else if (use_armv7_instructions) {
+if (use_armv5t_instructions) {
+tcg_out_blx_imm(s, disp);
+return;
+}
+}
+
+if (use_armv5t_instructions) {
 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
 tcg_out_blx_reg(s, COND_AL, TCG_REG_TMP);
-} else {
+} else if (arm_mode) {
 /* ??? Know that movi_pool emits exactly 1 insn.  */
-tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 0);
+tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC);
 tcg_out_movi_pool(s, COND_AL, TCG_REG_PC, addri);
+} else {
+tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
+tcg_out_mov_reg(s, COND_AL, TCG_REG_R14, TCG_REG_PC);
+tcg_out_bx_reg(s, COND_AL, TCG_REG_TMP);
 }
 }
 
-- 
2.25.1

[PATCH v3 07/14] tcg/arm: Split out tcg_out_ldstm

2021-08-18 Thread Richard Henderson

Expand these hard-coded instructions symbolically.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index c55167cc84..63b786a3e5 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -134,6 +134,9 @@ typedef enum {
 INSN_CLZ   = 0x016f0f10,
 INSN_RBIT  = 0x06ff0f30,
 
+INSN_LDMIA = 0x08b0,
+INSN_STMDB = 0x0920,
+
 INSN_LDR_IMM   = 0x0410,
 INSN_LDR_REG   = 0x0610,
 INSN_STR_IMM   = 0x0400,
@@ -586,6 +589,12 @@ static inline void tcg_out_dat_imm(TCGContext *s,
 (rn << 16) | (rd << 12) | im);
 }
 
+static void tcg_out_ldstm(TCGContext *s, int cond, int opc,
+  TCGReg rn, uint16_t mask)
+{
+tcg_out32(s, (cond << 28) | opc | (rn << 16) | mask);
+}
+
 /* Note that this routine is used for both LDR and LDRH formats, so we do
not wish to include an immediate shift at this point.  */
 static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
@@ -3119,7 +3128,10 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 {
 /* Calling convention requires us to save r4-r11 and lr.  */
 /* stmdb sp!, { r4 - r11, lr } */
-tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
+tcg_out_ldstm(s, COND_AL, INSN_STMDB, TCG_REG_CALL_STACK,
+  (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
+  (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
+  (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << 
TCG_REG_R14));
 
 /* Reserve callee argument and tcg temp space.  */
 tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
@@ -3147,7 +3159,10 @@ static void tcg_out_epilogue(TCGContext *s)
TCG_REG_CALL_STACK, STACK_ADDEND, 1);
 
 /* ldmia sp!, { r4 - r11, pc } */
-tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
+tcg_out_ldstm(s, COND_AL, INSN_LDMIA, TCG_REG_CALL_STACK,
+  (1 << TCG_REG_R4) | (1 << TCG_REG_R5) | (1 << TCG_REG_R6) |
+  (1 << TCG_REG_R7) | (1 << TCG_REG_R8) | (1 << TCG_REG_R9) |
+  (1 << TCG_REG_R10) | (1 << TCG_REG_R11) | (1 << TCG_REG_PC));
 }
 
 typedef struct {
-- 
2.25.1

Re: [PATCH 2/5] target/mips: Replace GET_LMASK() macro by get_lmask(32) function

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 7:09 PM, Richard Henderson wrote:
> On 8/18/21 6:43 AM, Philippe Mathieu-Daudé wrote:
>> -    if (GET_LMASK(arg2) <= 2) {
>> +    if (get_lmask(env, arg2, 32) <= 2) {
> 
> Whatever you decide to do with respect to the previous patch, the result
> of get_lmask is constant across the function and should be computed only
> once.

Oops I missed that, thanks.

[PATCH v3 09/14] tcg/arm: Drop inline markers

2021-08-18 Thread Richard Henderson

Let the compiler decide about inlining.
Remove tcg_out_nop as unused.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 234 +++
 1 file changed, 114 insertions(+), 120 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 265370b2ee..327032f0df 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -410,7 +410,7 @@ static int encode_imm_nofail(uint32_t imm)
 return ret;
 }
 
-static inline int check_fit_imm(uint32_t imm)
+static bool check_fit_imm(uint32_t imm)
 {
 return encode_imm(imm) >= 0;
 }
@@ -540,42 +540,37 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 return 0;
 }
 
-static inline void tcg_out_b_imm(TCGContext *s, int cond, int32_t offset)
+static void tcg_out_b_imm(TCGContext *s, int cond, int32_t offset)
 {
 tcg_out32(s, (cond << 28) | 0x0a00 |
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static inline void tcg_out_bl_imm(TCGContext *s, int cond, int32_t offset)
+static void tcg_out_bl_imm(TCGContext *s, int cond, int32_t offset)
 {
 tcg_out32(s, (cond << 28) | 0x0b00 |
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static inline void tcg_out_blx_reg(TCGContext *s, int cond, int rn)
+static void tcg_out_blx_reg(TCGContext *s, int cond, int rn)
 {
 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
 }
 
-static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
+static void tcg_out_blx_imm(TCGContext *s, int32_t offset)
 {
 tcg_out32(s, 0xfa00 | ((offset & 2) << 23) |
 (((offset - 8) >> 2) & 0x00ff));
 }
 
-static inline void tcg_out_dat_reg(TCGContext *s,
+static void tcg_out_dat_reg(TCGContext *s,
 int cond, int opc, int rd, int rn, int rm, int shift)
 {
 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
 (rn << 16) | (rd << 12) | shift | rm);
 }
 
-static inline void tcg_out_nop(TCGContext *s)
-{
-tcg_out32(s, INSN_NOP);
-}
-
-static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
+static void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
 {
 /* Simple reg-reg move, optimising out the 'do nothing' case */
 if (rd != rm) {
@@ -601,8 +596,8 @@ static void tcg_out_b_reg(TCGContext *s, int cond, TCGReg 
rn)
 }
 }
 
-static inline void tcg_out_dat_imm(TCGContext *s,
-int cond, int opc, int rd, int rn, int im)
+static void tcg_out_dat_imm(TCGContext *s, int cond, int opc,
+int rd, int rn, int im)
 {
 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
 (rn << 16) | (rd << 12) | im);
@@ -647,141 +642,141 @@ static void tcg_out_memop_12(TCGContext *s, int cond, 
ARMInsn opc, TCGReg rt,
   (rn << 16) | (rt << 12) | imm12);
 }
 
-static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
-   TCGReg rn, int imm12)
+static void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
+TCGReg rn, int imm12)
 {
 tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
 }
 
-static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
-   TCGReg rn, int imm12)
+static void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
+TCGReg rn, int imm12)
 {
 tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
 }
 
-static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
-  TCGReg rn, TCGReg rm)
+static void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
+   TCGReg rn, TCGReg rm)
 {
 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
-  TCGReg rn, TCGReg rm)
+static void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
+   TCGReg rn, TCGReg rm)
 {
 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
-   TCGReg rn, int imm8)
+static void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
+   TCGReg rn, int imm8)
 {
 tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
 }
 
-static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
-  TCGReg rn, TCGReg rm)
+static void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
+   TCGReg rn, TCGReg rm)
 {
 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
 }
 
-static inline void tcg_out_ldrd_rwb(TCGContext *s, int cond, TCGReg rt,
-TCGReg rn, TCGReg rm)
+static void __attribute__((unused))
+tcg_out_ldrd_rwb(TCGContext *s, int cond, TCGReg rt, TCGReg

[PATCH v3 00/14] tcg/arm: Unaligned access and other cleanup

2021-08-18 Thread Richard Henderson

Based-on: <20210818191920.390759-1-richard.hender...@linaro.org>
("[PATCH v3 00/66] Unaligned access for user-only")

Important points:
  * Support unaligned accesses.
  * Add environment variable to for testing older architecture revs.
  * More use of enum types.


r~


Richard Henderson (14):
  tcg/arm: Remove fallback definition of __ARM_ARCH
  tcg/arm: Standardize on tcg_out__{reg,imm}
  tcg/arm: Simplify use_armvt5_instructions
  tcg/arm: Support armv4t in tcg_out_goto and tcg_out_call
  tcg/arm: Examine QEMU_TCG_DEBUG environment variable
  tcg/arm: Support unaligned access for softmmu
  tcg/arm: Split out tcg_out_ldstm
  tcg/arm: Simplify usage of encode_imm
  tcg/arm: Drop inline markers
  tcg/arm: Give enum arm_cond_code_e a typedef and use it
  tcg/arm: More use of the ARMInsn enum
  tcg/arm: More use of the TCGReg enum
  tcg/arm: Reserve a register for guest_base
  tcg/arm: Support raising sigbus for user-only

 tcg/arm/tcg-target.h |   35 +-
 tcg/arm/tcg-target.c.inc | 1010 +++---
 2 files changed, 724 insertions(+), 321 deletions(-)

-- 
2.25.1

[PATCH v3 06/14] tcg/arm: Support unaligned access for softmmu

2021-08-18 Thread Richard Henderson

>From armv6, the architecture supports unaligned accesses.
All we need to do is perform the correct alignment check
in tcg_out_tlb_read and not use LDRD/STRD when the access
is not aligned.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 69 ++--
 1 file changed, 38 insertions(+), 31 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 0c7e4f8411..c55167cc84 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -34,13 +34,6 @@ bool use_idiv_instructions;
 bool use_neon_instructions;
 #endif
 
-/* ??? Ought to think about changing CONFIG_SOFTMMU to always defined.  */
-#ifdef CONFIG_SOFTMMU
-# define USING_SOFTMMU 1
-#else
-# define USING_SOFTMMU 0
-#endif
-
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
@@ -1526,15 +1519,20 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
 int fast_off = TLB_MASK_TABLE_OFS(mem_index);
 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
 int table_off = fast_off + offsetof(CPUTLBDescFast, table);
-unsigned s_bits = opc & MO_SIZE;
-unsigned a_bits = get_alignment_bits(opc);
+unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
+unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
+TCGReg t_addr;
 
 /*
- * We don't support inline unaligned acceses, but we can easily
- * support overalignment checks.
+ * For v7, support for unaligned accesses is mandatory.
+ * For v6, support for unaligned accesses is enabled by SCTLR.U,
+ * which is enabled by (at least) Linux and FreeBSD.
+ * For v4 and v5, unaligned accesses are... complicated, and
+ * unhelped by Linux having a global not per-process flag
+ * for unaligned handling.
  */
-if (a_bits < s_bits) {
-a_bits = s_bits;
+if (!use_armv6_instructions && a_mask < s_mask) {
+a_mask = s_mask;
 }
 
 /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}.  */
@@ -1578,27 +1576,32 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
 
 /*
  * Check alignment, check comparators.
- * Do this in no more than 3 insns.  Use MOVW for v7, if possible,
+ * Do this in 2-4 insns.  Use MOVW for v7, if possible,
  * to reduce the number of sequential conditional instructions.
  * Almost all guests have at least 4k pages, which means that we need
  * to clear at least 9 bits even for an 8-byte memory, which means it
  * isn't worth checking for an immediate operand for BIC.
  */
+/* For unaligned accesses, test the page of the last byte. */
+t_addr = addrlo;
+if (a_mask < s_mask) {
+t_addr = TCG_REG_R0;
+tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
+addrlo, s_mask - a_mask);
+}
 if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
-tcg_target_ulong mask = ~(TARGET_PAGE_MASK | ((1 << a_bits) - 1));
-
-tcg_out_movi32(s, COND_AL, TCG_REG_TMP, mask);
+tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
 tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
-addrlo, TCG_REG_TMP, 0);
+t_addr, TCG_REG_TMP, 0);
 tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
 } else {
-if (a_bits) {
-tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
-(1 << a_bits) - 1);
+if (a_mask) {
+tcg_debug_assert(a_mask <= 0xff);
+tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
 }
-tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, addrlo,
+tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
 SHIFT_IMM_LSR(TARGET_PAGE_BITS));
-tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
+tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
 0, TCG_REG_R2, TCG_REG_TMP,
 SHIFT_IMM_LSL(TARGET_PAGE_BITS));
 }
@@ -1763,8 +1766,9 @@ static inline void tcg_out_qemu_ld_index(TCGContext *s, 
MemOp opc,
 tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
 break;
 case MO_Q:
-/* Avoid ldrd for user-only emulation, to handle unaligned.  */
-if (USING_SOFTMMU && use_armv6_instructions
+/* LDRD requires alignment; double-check that. */
+if (use_armv6_instructions
+&& get_alignment_bits(opc) >= MO_64
 && (datalo & 1) == 0 && datahi == datalo + 1) {
 tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
 } else if (datalo != addend) {
@@ -1806,8 +1810,9 @@ static inline void tcg_out_qemu_ld_direct(TCGContext *s, 
MemOp opc,
 tcg_out_ld32_12(s, COND_AL, da

[PATCH v3 03/14] tcg/arm: Simplify use_armvt5_instructions

2021-08-18 Thread Richard Henderson

According to the Arm ARM DDI 0406C, section A1.3, the valid variants
are ARMv5T, ARMv5TE, ARMv5TEJ -- there is no ARMv5 without Thumb.
Therefore simplify the test from preprocessor ifdefs to base
architecture revision.  Retain the "t" in the name to minimize churn.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.h | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 18bb16c784..f41b809554 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -28,13 +28,7 @@
 
 extern int arm_arch;
 
-#if defined(__ARM_ARCH_5T__) \
-|| defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
-# define use_armv5t_instructions 1
-#else
-# define use_armv5t_instructions use_armv6_instructions
-#endif
-
+#define use_armv5t_instructions (__ARM_ARCH >= 5 || arm_arch >= 5)
 #define use_armv6_instructions  (__ARM_ARCH >= 6 || arm_arch >= 6)
 #define use_armv7_instructions  (__ARM_ARCH >= 7 || arm_arch >= 7)
 
-- 
2.25.1

[PATCH v3 13/14] tcg/arm: Reserve a register for guest_base

2021-08-18 Thread Richard Henderson

Reserve a register for the guest_base using aarch64 for reference.
By doing so, we do not have to recompute it for every memory load.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 39 ---
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 35bd4c68d6..2728035177 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -84,6 +84,9 @@ static const int tcg_target_call_oarg_regs[2] = {
 
 #define TCG_REG_TMP  TCG_REG_R12
 #define TCG_VEC_TMP  TCG_REG_Q15
+#ifndef CONFIG_SOFTMMU
+#define TCG_REG_GUEST_BASE  TCG_REG_R11
+#endif
 
 typedef enum {
 COND_EQ = 0x0,
@@ -1763,7 +1766,8 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 static void tcg_out_qemu_ld_index(TCGContext *s, MemOp opc,
   TCGReg datalo, TCGReg datahi,
-  TCGReg addrlo, TCGReg addend)
+  TCGReg addrlo, TCGReg addend,
+  bool scratch_addend)
 {
 /* Byte swapping is left to middle-end expansion. */
 tcg_debug_assert((opc & MO_BSWAP) == 0);
@@ -1790,7 +1794,7 @@ static void tcg_out_qemu_ld_index(TCGContext *s, MemOp 
opc,
 && get_alignment_bits(opc) >= MO_64
 && (datalo & 1) == 0 && datahi == datalo + 1) {
 tcg_out_ldrd_r(s, COND_AL, datalo, addrlo, addend);
-} else if (datalo != addend) {
+} else if (scratch_addend) {
 tcg_out_ld32_rwb(s, COND_AL, datalo, addend, addrlo);
 tcg_out_ld32_12(s, COND_AL, datahi, addend, 4);
 } else {
@@ -1875,14 +1879,14 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
 label_ptr = s->code_ptr;
 tcg_out_bl_imm(s, COND_NE, 0);
 
-tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
+tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend, true);
 
 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
 s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
 if (guest_base) {
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
-tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
+tcg_out_qemu_ld_index(s, opc, datalo, datahi,
+  addrlo, TCG_REG_GUEST_BASE, false);
 } else {
 tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
 }
@@ -1891,7 +1895,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
 
 static void tcg_out_qemu_st_index(TCGContext *s, ARMCond cond, MemOp opc,
   TCGReg datalo, TCGReg datahi,
-  TCGReg addrlo, TCGReg addend)
+  TCGReg addrlo, TCGReg addend,
+  bool scratch_addend)
 {
 /* Byte swapping is left to middle-end expansion. */
 tcg_debug_assert((opc & MO_BSWAP) == 0);
@@ -1912,9 +1917,14 @@ static void tcg_out_qemu_st_index(TCGContext *s, ARMCond 
cond, MemOp opc,
 && get_alignment_bits(opc) >= MO_64
 && (datalo & 1) == 0 && datahi == datalo + 1) {
 tcg_out_strd_r(s, cond, datalo, addrlo, addend);
-} else {
+} else if (scratch_addend) {
 tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
 tcg_out_st32_12(s, cond, datahi, addend, 4);
+} else {
+tcg_out_dat_reg(s, cond, ARITH_ADD, TCG_REG_TMP,
+addend, addrlo, SHIFT_IMM_LSL(0));
+tcg_out_st32_12(s, cond, datalo, TCG_REG_TMP, 0);
+tcg_out_st32_12(s, cond, datahi, TCG_REG_TMP, 4);
 }
 break;
 default:
@@ -1978,7 +1988,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
 mem_index = get_mmuidx(oi);
 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
 
-tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
+tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi,
+  addrlo, addend, true);
 
 /* The conditional call must come last, as we're going to return here.  */
 label_ptr = s->code_ptr;
@@ -1988,9 +1999,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
 s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
 if (guest_base) {
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
-tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
-  datahi, addrlo, TCG_REG_TMP);
+tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi,
+  addrlo, TCG_REG_GUEST_BASE, false);
 } else {
 tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
 }
@@ -3153,6 +3163,13 @@ static void tcg_target_qemu_prologue(TCGCont

[PATCH v3 01/14] tcg/arm: Remove fallback definition of __ARM_ARCH

2021-08-18 Thread Richard Henderson

GCC since 4.8 provides the definition and we now require 7.5.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.h | 19 ---
 1 file changed, 19 deletions(-)

diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index d113b7f8db..18bb16c784 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -26,25 +26,6 @@
 #ifndef ARM_TCG_TARGET_H
 #define ARM_TCG_TARGET_H
 
-/* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
-#ifndef __ARM_ARCH
-# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
- || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
- || defined(__ARM_ARCH_7EM__)
-#  define __ARM_ARCH 7
-# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
-   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
-   || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
-#  define __ARM_ARCH 6
-# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
-   || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
-   || defined(__ARM_ARCH_5TEJ__)
-#  define __ARM_ARCH 5
-# else
-#  define __ARM_ARCH 4
-# endif
-#endif
-
 extern int arm_arch;
 
 #if defined(__ARM_ARCH_5T__) \
-- 
2.25.1

Re: [PATCH v3 64/66] tcg: Canonicalize alignment flags in MemOp

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 9:19 PM, Richard Henderson wrote:
> Having observed e.g. al8+leq in dumps, canonicalize to al+leq.
> 
> Signed-off-by: Richard Henderson 
> ---
>  tcg/tcg-op.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)

Nice.

Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 58/66] include/exec: Move cpu_signal_handler declaration

2021-08-18 Thread Philippe Mathieu-Daudé

On 8/18/21 9:19 PM, Richard Henderson wrote:
> There is nothing target specific about this.  The implementation
> is host specific, but the declaration is 100% common.
> 
> Signed-off-by: Richard Henderson 
> ---
>  include/exec/exec-all.h | 13 +
>  target/alpha/cpu.h  |  6 --
>  target/arm/cpu.h|  7 ---
>  target/avr/cpu.h|  2 --
>  target/cris/cpu.h   |  8 
>  target/hexagon/cpu.h|  3 ---
>  target/hppa/cpu.h   |  3 ---
>  target/i386/cpu.h   |  7 ---
>  target/m68k/cpu.h   |  8 
>  target/microblaze/cpu.h |  7 ---
>  target/mips/cpu.h   |  3 ---
>  target/mips/internal.h  |  2 --
>  target/nios2/cpu.h  |  2 --
>  target/openrisc/cpu.h   |  2 --
>  target/ppc/cpu.h|  7 ---
>  target/riscv/cpu.h  |  2 --
>  target/rx/cpu.h |  4 
>  target/s390x/cpu.h  |  7 ---
>  target/sh4/cpu.h|  3 ---
>  target/sparc/cpu.h  |  2 --
>  target/tricore/cpu.h|  2 --
>  target/xtensa/cpu.h |  2 --
>  22 files changed, 13 insertions(+), 89 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

1 2 3 4 >

1 - 100 of 319 matches

Mail list logo