date:20240527

Re: [PATCH] targer/riscv: Implement Zabha extension

2024-05-27 Thread LIU Zhiwei


Hi Alexandre,

I have sent the patch set about Zabha before last week.

https://lore.kernel.org/all/fed99165-58da-458c-b68f-a9717fc15...@linux.alibaba.com/T/

Welcome to review it and give comments.

Thanks,
Zhiwei

On 2024/5/28 13:45, Alexandre Ghiti wrote:

From: Gianluca Guida 

Add Zabha implementation.

Signed-off-by: Gianluca Guida 
Signed-off-by: Alexandre Ghiti 
---
  target/riscv/cpu.c  |   2 +
  target/riscv/cpu_cfg.h  |   1 +
  target/riscv/insn32.decode  |  22 +++
  target/riscv/insn_trans/trans_rvzabha.c.inc | 149 
  target/riscv/tcg/tcg-cpu.c  |   5 +
  target/riscv/translate.c|   1 +
  6 files changed, 180 insertions(+)
  create mode 100644 target/riscv/insn_trans/trans_rvzabha.c.inc

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 70d1a527a1..b01f82002b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -116,6 +116,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
  ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
  ISA_EXT_DATA_ENTRY(za64rs, PRIV_VERSION_1_12_0, has_priv_1_11),
  ISA_EXT_DATA_ENTRY(zaamo, PRIV_VERSION_1_12_0, ext_zaamo),
+ISA_EXT_DATA_ENTRY(zabha, PRIV_VERSION_1_12_0, ext_zabha),
  ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas),
  ISA_EXT_DATA_ENTRY(zalrsc, PRIV_VERSION_1_12_0, ext_zalrsc),
  ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
@@ -1464,6 +1465,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
  MULTI_EXT_CFG_BOOL("zicsr", ext_zicsr, true),
  MULTI_EXT_CFG_BOOL("zihintntl", ext_zihintntl, true),
  MULTI_EXT_CFG_BOOL("zihintpause", ext_zihintpause, true),
+MULTI_EXT_CFG_BOOL("zabha", ext_zabha, false),
  MULTI_EXT_CFG_BOOL("zacas", ext_zacas, false),
  MULTI_EXT_CFG_BOOL("zaamo", ext_zaamo, false),
  MULTI_EXT_CFG_BOOL("zalrsc", ext_zalrsc, false),
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index d36c416ef0..7f614da4e2 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -81,6 +81,7 @@ struct RISCVCPUConfig {
  bool ext_svvptc;
  bool ext_zdinx;
  bool ext_zaamo;
+bool ext_zabha;
  bool ext_zacas;
  bool ext_zalrsc;
  bool ext_zawrs;
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f22df04cfd..6d7726120f 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -1010,3 +1010,25 @@ amocas_w00101 . . . . 010 . 010 
@atom_st
  amocas_d00101 . . . . 011 . 010 @atom_st
  # *** RV64 Zacas Standard Extension ***
  amocas_q00101 . . . . 100 . 010 @atom_st
+
+# *** Zabha Standard Extension ***
+amoswap_b  1 . . . . 000 . 010 @atom_st
+amoadd_b   0 . . . . 000 . 010 @atom_st
+amoxor_b   00100 . . . . 000 . 010 @atom_st
+amoand_b   01100 . . . . 000 . 010 @atom_st
+amoor_b01000 . . . . 000 . 010 @atom_st
+amomin_b   1 . . . . 000 . 010 @atom_st
+amomax_b   10100 . . . . 000 . 010 @atom_st
+amominu_b  11000 . . . . 000 . 010 @atom_st
+amomaxu_b  11100 . . . . 000 . 010 @atom_st
+amocas_b   00101 . . . . 000 . 010 @atom_st
+amoswap_h  1 . . . . 001 . 010 @atom_st
+amoadd_h   0 . . . . 001 . 010 @atom_st
+amoxor_h   00100 . . . . 001 . 010 @atom_st
+amoand_h   01100 . . . . 001 . 010 @atom_st
+amoor_h01000 . . . . 001 . 010 @atom_st
+amomin_h   1 . . . . 001 . 010 @atom_st
+amomax_h   10100 . . . . 001 . 010 @atom_st
+amominu_h  11000 . . . . 001 . 010 @atom_st
+amomaxu_h  11100 . . . . 001 . 010 @atom_st
+amocas_h   00101 . . . . 001 . 010 @atom_st
diff --git a/target/riscv/insn_trans/trans_rvzabha.c.inc 
b/target/riscv/insn_trans/trans_rvzabha.c.inc
new file mode 100644
index 00..74f43bb95a
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzabha.c.inc
@@ -0,0 +1,149 @@
+/*
+ * RISC-V translation routines for the Zabha Standard Extension.
+ *
+ * Copyright (c) 2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#define REQUIRE_ZABHA(ctx) do {

Re: [SPAM] Re: [PATCH v4 08/16] aspeed/smc: support 64 bits dma dram address

2024-05-27 Thread Cédric Le Goater


On 5/27/24 18:06, Philippe Mathieu-Daudé wrote:

Hi Jamin,

On 27/5/24 10:02, Jamin Lin wrote:

AST2700 support the maximum dram size is 8GiB
and has a "DMA DRAM Side Address High Part(0x7C)"
register to support 64 bits dma dram address.
Add helper routines functions to compute the dma dram
address, new features and update trace-event
to support 64 bits dram address.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
---
  hw/ssi/aspeed_smc.c | 52 +++--
  hw/ssi/trace-events |  2 +-
  include/hw/ssi/aspeed_smc.h |  1 +
  3 files changed, 46 insertions(+), 9 deletions(-)




+static uint64_t aspeed_smc_dma_dram_addr(AspeedSMCState *s)
+{
+    return s->regs[R_DMA_DRAM_ADDR] |
+    ((uint64_t) s->regs[R_DMA_DRAM_ADDR_HIGH] << 32);
+}
+
  static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)
  {
  AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
@@ -903,24 +921,34 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
  static void aspeed_smc_dma_rw(AspeedSMCState *s)
  {
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+    uint64_t dma_dram_offset;
+    uint64_t dma_dram_addr;
  MemTxResult result;
  uint32_t dma_len;
  uint32_t data;
  dma_len = aspeed_smc_dma_len(s);
+    dma_dram_addr = aspeed_smc_dma_dram_addr(s);
+
+    if (aspeed_smc_has_dma64(asc)) {
+    dma_dram_offset = dma_dram_addr - s->dram_base;
+    } else {
+    dma_dram_offset = dma_dram_addr;


Here s->dram_base is 0x0. Do we really need to check
aspeed_smc_has_dma64?


You are right, it could be done as your proposal below. However,
we should add a comment regarding some values :

R_DMA_DRAM_ADDR_HIGH and s->dram_base are only set on the AST2700
SoC and zero on other Aspeed SoCs.


+    }


Maybe simplify improving aspeed_smc_dma_dram_addr() as:

   static uint64_t aspeed_smc_dma_dram_addr(AspeedSMCState *s)
   {
   return (s->regs[R_DMA_DRAM_ADDR]
   | ((uint64_t) s->regs[R_DMA_DRAM_ADDR_HIGH] << 32))
   - s->dram_base;
   }

Then no need for dma_dram_offset, dma_dram_addr is enough.


we need both, dma_dram_offset for the transaction and dma_dram_addr
to update the R_DMA_DRAM_ADDR_HIGH reg. A bit cumbersome, I agree.

Thanks,

C.





  trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE ?
  "write" : "read",
  s->regs[R_DMA_FLASH_ADDR],
-    s->regs[R_DMA_DRAM_ADDR],
+    dma_dram_offset,
  dma_len);
  while (dma_len) {
  if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
-    data = address_space_ldl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR],
+    data = address_space_ldl_le(&s->dram_as, dma_dram_offset,
  MEMTXATTRS_UNSPECIFIED, &result);
  if (result != MEMTX_OK) {
-    aspeed_smc_error("DRAM read failed @%08x",
- s->regs[R_DMA_DRAM_ADDR]);
+    aspeed_smc_error("DRAM read failed @%" PRIx64,
+ dma_dram_offset);
  return;
  }
@@ -940,11 +968,11 @@ static void aspeed_smc_dma_rw(AspeedSMCState *s)
  return;
  }
-    address_space_stl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR],
+    address_space_stl_le(&s->dram_as, dma_dram_offset,
   data, MEMTXATTRS_UNSPECIFIED, &result);
  if (result != MEMTX_OK) {
-    aspeed_smc_error("DRAM write failed @%08x",
- s->regs[R_DMA_DRAM_ADDR]);
+    aspeed_smc_error("DRAM write failed @%" PRIx64,
+ dma_dram_offset);
  return;
  }
  }
@@ -953,8 +981,12 @@ static void aspeed_smc_dma_rw(AspeedSMCState *s)
   * When the DMA is on-going, the DMA registers are updated
   * with the current working addresses and length.
   */
+    dma_dram_offset += 4;
+    dma_dram_addr += 4;
+
+    s->regs[R_DMA_DRAM_ADDR_HIGH] = dma_dram_addr >> 32;
+    s->regs[R_DMA_DRAM_ADDR] = dma_dram_addr & 0x;
  s->regs[R_DMA_FLASH_ADDR] += 4;
-    s->regs[R_DMA_DRAM_ADDR] += 4;
  dma_len -= 4;
  s->regs[R_DMA_LEN] = dma_len;
  s->regs[R_DMA_CHECKSUM] += data;
@@ -1107,6 +1139,9 @@ static void aspeed_smc_write(void *opaque, hwaddr addr, 
uint64_t data,
  } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN &&
 aspeed_smc_dma_granted(s)) {
  s->regs[addr] = DMA_LENGTH(value);
+    } else if (aspeed_smc_has_dma(asc) && aspeed_smc_has_dma64(asc) &&
+   addr == R_DMA_DRAM_ADDR_HIGH) {
+    s->regs[addr] = DMA_DRAM_ADDR_HIGH(value);
  } else {
  qemu_log_mask(LOG_UNIMP, "%s: not implemented: 0x%" HWADDR_PRIx "\n",
    __func__, addr)

Re: [RFC PATCH 03/10] target/ppc: Improve SPR indirect registers

2024-05-27 Thread Harsh Prateek Bora




Hi Nick,

On 5/26/24 17:56, Nicholas Piggin wrote:

SPRC/SPRD were recently added to all BookS CPUs supported, but
they are only tested on POWER9 and POWER10, so restrict them to
those CPUs.



Hope you mean to restrict to P9/10 for both spapr and pnv or just pnv ?


SPR indirect scratch registers presently replicated per-CPU like
SMT SPRs, but the PnvCore is a better place for them since they
are restricted to P9/P10.

Also add SPR indirect read access to core thread state for POWER9
since skiboot accesses that when booting to check for big-core
mode.

Signed-off-by: Nicholas Piggin 
---
  include/hw/ppc/pnv_core.h |  1 +
  target/ppc/cpu.h  |  3 --
  target/ppc/cpu_init.c | 21 ++--
  target/ppc/misc_helper.c  | 67 ---
  4 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index f434c71547..21297262c1 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -53,6 +53,7 @@ struct PnvCore {
  uint32_t hwid;
  uint64_t hrmor;
  
+target_ulong scratch[8]; /* SCRATCH registers */

  struct pnv_tod_tbst pnv_tod_tbst;
  
  PnvChip *chip;

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 1e86658da6..dac13d4dac 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1253,9 +1253,6 @@ struct CPUArchState {
  ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */
  struct CPUBreakpoint *ciabr_breakpoint;
  struct CPUWatchpoint *dawr0_watchpoint;
-
-/* POWER CPU regs/state */
-target_ulong scratch[8]; /* SCRATCH registers (shared across core) */
  #endif
  target_ulong sr[32];   /* segment registers */
  uint32_t nb_BATs;  /* number of BATs */
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 01e358a4a5..ae483e20c4 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5759,16 +5759,6 @@ static void register_power_common_book4_sprs(CPUPPCState 
*env)
   SPR_NOACCESS, SPR_NOACCESS,
   &spr_read_generic, &spr_core_write_generic,
   0x);
-spr_register_hv(env, SPR_POWER_SPRC, "SPRC",
- SPR_NOACCESS, SPR_NOACCESS,
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_sprc,
- 0x);
-spr_register_hv(env, SPR_POWER_SPRD, "SPRD",
- SPR_NOACCESS, SPR_NOACCESS,
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_sprd, &spr_write_sprd,
- 0x);
  #endif
  }
  
@@ -5781,6 +5771,17 @@ static void register_power9_book4_sprs(CPUPPCState *env)

   SPR_NOACCESS, SPR_NOACCESS,
   &spr_read_generic, &spr_write_generic,
   KVM_REG_PPC_WORT, 0);
+/* SPRC/SPRD exist in earlier CPUs but only tested on POWER9/10 */
+spr_register_hv(env, SPR_POWER_SPRC, "SPRC",
+ SPR_NOACCESS, SPR_NOACCESS,
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_sprc,
+ 0x);
+spr_register_hv(env, SPR_POWER_SPRD, "SPRD",
+ SPR_NOACCESS, SPR_NOACCESS,
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_sprd, &spr_write_sprd,
+ 0x);
  #endif
  }
  
diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c

index fa47be2298..46ba3a5584 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -26,6 +26,7 @@
  #include "qemu/main-loop.h"
  #include "mmu-book3s-v3.h"
  #include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv_core.h"
  
  #include "helper_regs.h"
  
@@ -321,11 +322,25 @@ void helper_store_sprc(CPUPPCState *env, target_ulong val)
  
  target_ulong helper_load_sprd(CPUPPCState *env)

  {
+PowerPCCPU *cpu = env_archcpu(env);
+PnvCore *pc = pnv_cpu_state(cpu)->core;


We may want to avoid creating local variable cpu here also like previous 
patches.


However, is this helper meant to be accessible for spapr as well ?


  target_ulong sprc = env->spr[SPR_POWER_SPRC];
  
-switch (sprc & 0x3c0) {

-case 0: /* SCRATCH0-7 */
-return env->scratch[(sprc >> 3) & 0x7];
+switch (sprc & 0x3e0) {
+case 0: /* SCRATCH0-3 */
+case 1: /* SCRATCH4-7 */
+return pc->scratch[(sprc >> 3) & 0x7];


If so, will pc be uninitialized in case of spapr ?


+case 0x1e0: /* core thread state */
+if (env->excp_model == POWERPC_EXCP_POWER9) {
+/*
+ * Only implement for POWER9 because skiboot uses it to check
+ * big-core mode. Other bits are unimplemented so we would
+ * prefer to get unimplemented message on POWER10 if it were
+ * used.
+ */
+return 0;
+}
+/* fallthru */
  default:
  qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x"

Re: [PATCH 3/4] tests/vm: update centos.aarch64 image to 9

2024-05-27 Thread Thomas Huth


On 21/05/2024 14.53, Alex Bennée wrote:

As Centos Stream 8 goes out of support we need to update. To do this
powertools is replaced by crb and we don't over specify the python3 we
want.

Signed-off-by: Alex Bennée 
---
  tests/vm/centos.aarch64 | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)


Reviewed-by: Thomas Huth

Re: [PATCH 2/4] docs/devel: update references to centos to later version

2024-05-27 Thread Thomas Huth


On 21/05/2024 14.53, Alex Bennée wrote:

 From the website:

"After May 31, 2024, CentOS Stream 8 will be archived and no further
updates will be provided."

We have updated a few bits but there are still references that need
fixing.

Signed-off-by: Alex Bennée 
---
  docs/devel/testing.rst | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst
index fa28e3ecb2..c312465fa7 100644
--- a/docs/devel/testing.rst
+++ b/docs/devel/testing.rst
@@ -387,9 +387,9 @@ make target):
  
  .. code::
  
-  make docker-test-build@centos8

+  make docker-test-build@centos9


Basically ACK, but I wonder whether we should maybe switch the documentation 
to another container here that does not have a version in its name, so we 
don't have to change it again and again each time a new version is released.

For example, what about using @fedora instead of @centos9 ?

 Thomas

Re: [PATCH v3 02/13] hw/riscv: add riscv-iommu-bits.h

2024-05-27 Thread Eric Cheng


On 5/24/2024 1:39 AM, Daniel Henrique Barboza wrote:
...

+/* 5.4 Features control register (32bits) */
+#define RISCV_IOMMU_REG_FCTL0x0008


Looks like doesn't support RISCV_IOMMU_FCTL_BE?
If so, need to implement it as read-only? along with other 2 bits.

IIUC,

diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
index 1b34d226f9..6a6bf1db98 100644
--- a/hw/riscv/riscv-iommu.c
+++ b/hw/riscv/riscv-iommu.c
@@ -2035,6 +2035,7 @@ static void riscv_iommu_realize(DeviceState *dev, Error 
**errp)

 /* Set power-on register state */
 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap);
 stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0);
+stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], ~0);
 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP],
 ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE));
 stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB],



+#define RISCV_IOMMU_FCTL_WSIBIT(1)
+

...

Re: [PATCH v2 4/6] tests/qtest/migration-test: Quieten ppc64 QEMU warnigns

2024-05-27 Thread Thomas Huth




I just noticed that there is a typo in the subject:

s/warnigns/warnings/

On 28/05/2024 02.42, Nicholas Piggin wrote:

Reviewed-by: Thomas Huth 
Signed-off-by: Nicholas Piggin 
---
  tests/qtest/migration-test.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 8247ed98f2..7d64696f7a 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -21,6 +21,7 @@
  #include "chardev/char.h"
  #include "crypto/tlscredspsk.h"
  #include "qapi/qmp/qlist.h"
+#include "ppc-util.h"
  
  #include "migration-helpers.h"

  #include "tests/migration/migration-test.h"
@@ -750,7 +751,8 @@ static int test_migrate_start(QTestState **from, QTestState 
**to,
"until'", end_address, start_address);
  machine_alias = "pseries";
  machine_opts = "vsmt=8";
-arch_opts = g_strdup("-nodefaults");
+arch_opts = g_strdup("-nodefaults "
+ "-machine " PSERIES_DEFAULT_CAPABILITIES);
  } else if (strcmp(arch, "aarch64") == 0) {
  memory_size = "150M";
  machine_alias = "virt";

Re: [PATCH v2 3/6] tests/qtest: Move common define from libqos-spapr.h to new ppc-util.h

2024-05-27 Thread Thomas Huth


On 28/05/2024 02.42, Nicholas Piggin wrote:

The spapr QEMU machine defaults is useful outside libqos, so create a
new header for ppc specific qtests and move it there.

Signed-off-by: Nicholas Piggin 
---
  tests/qtest/libqos/libqos-spapr.h |  7 ---
  tests/qtest/ppc-util.h| 19 +++
  tests/qtest/boot-serial-test.c|  2 +-
  tests/qtest/prom-env-test.c   |  2 +-
  tests/qtest/pxe-test.c|  2 +-
  5 files changed, 22 insertions(+), 10 deletions(-)
  create mode 100644 tests/qtest/ppc-util.h


Reviewed-by: Thomas Huth

Re: [PATCH v4 08/16] aspeed/smc: support 64 bits dma dram address

2024-05-27 Thread Cédric Le Goater


On 5/28/24 03:34, Jamin Lin wrote:

Hi Cedric,


On 5/27/24 10:02, Jamin Lin wrote:

AST2700 support the maximum dram size is 8GiB and has a "DMA DRAM

Side

Address High Part(0x7C)"
register to support 64 bits dma dram address.
Add helper routines functions to compute the dma dram address, new
features and update trace-event to support 64 bits dram address.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 


I will move the addition of the "dram-base" property to another patch. See :

https://patchew.org/QEMU/20240527124315.35356-1-...@redhat.com/

(Please review)

Review done.
If I need to resend v5 patch series, I will remove "dram-base property" from 
this patch.


ok. Wait a bit before resending though. We are not done with v4 yet !

Thanks,

C.



Thanks for your help, Jamin





Else,

Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
   hw/ssi/aspeed_smc.c | 52

+++--

   hw/ssi/trace-events |  2 +-
   include/hw/ssi/aspeed_smc.h |  1 +
   3 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c index
ffb13a12e8..df0c63469c 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -132,6 +132,9 @@
   #define   FMC_WDT2_CTRL_BOOT_SOURCE  BIT(4) /* O: primary

1: alternate */

   #define   FMC_WDT2_CTRL_EN   BIT(0)

+/* DMA DRAM Side Address High Part (AST2700) */
+#define R_DMA_DRAM_ADDR_HIGH   (0x7c / 4)
+
   /* DMA Control/Status Register */
   #define R_DMA_CTRL(0x80 / 4)
   #define   DMA_CTRL_REQUEST  (1 << 31)
@@ -187,6 +190,7 @@
*   0x1FF: 32M bytes
*/
   #define DMA_DRAM_ADDR(asc, val)   ((val) & (asc)->dma_dram_mask)
+#define DMA_DRAM_ADDR_HIGH(val)   ((val) & 0xf)
   #define DMA_FLASH_ADDR(asc, val)  ((val) & (asc)->dma_flash_mask)
   #define DMA_LENGTH(val) ((val) & 0x01FF)

@@ -207,6 +211,7 @@ static const AspeedSegments

aspeed_2500_spi2_segments[];

   #define ASPEED_SMC_FEATURE_DMA   0x1
   #define ASPEED_SMC_FEATURE_DMA_GRANT 0x2
   #define ASPEED_SMC_FEATURE_WDT_CONTROL 0x4
+#define ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH 0x08

   static inline bool aspeed_smc_has_dma(const AspeedSMCClass *asc)
   {
@@ -218,6 +223,11 @@ static inline bool

aspeed_smc_has_wdt_control(const AspeedSMCClass *asc)

   return !!(asc->features & ASPEED_SMC_FEATURE_WDT_CONTROL);
   }

+static inline bool aspeed_smc_has_dma64(const AspeedSMCClass *asc) {
+return !!(asc->features &

ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH);

+}
+
   #define aspeed_smc_error(fmt, ...)

\

   qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt "\n", __func__, ##
__VA_ARGS__)

@@ -747,6 +757,8 @@ static uint64_t aspeed_smc_read(void *opaque,

hwaddr addr, unsigned int size)

   (aspeed_smc_has_dma(asc) && addr == R_DMA_CTRL) ||
   (aspeed_smc_has_dma(asc) && addr == R_DMA_FLASH_ADDR)

||

   (aspeed_smc_has_dma(asc) && addr == R_DMA_DRAM_ADDR)

||

+(aspeed_smc_has_dma(asc) && aspeed_smc_has_dma64(asc)

&&

+ addr == R_DMA_DRAM_ADDR_HIGH) ||
   (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN) ||
   (aspeed_smc_has_dma(asc) && addr == R_DMA_CHECKSUM)

||

   (addr >= R_SEG_ADDR0 &&
@@ -847,6 +859,12 @@ static bool

aspeed_smc_inject_read_failure(AspeedSMCState *s)

   }
   }

+static uint64_t aspeed_smc_dma_dram_addr(AspeedSMCState *s) {
+return s->regs[R_DMA_DRAM_ADDR] |
+((uint64_t) s->regs[R_DMA_DRAM_ADDR_HIGH] << 32); }
+
   static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)
   {
   AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); @@ -903,24
+921,34 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)

   static void aspeed_smc_dma_rw(AspeedSMCState *s)
   {
+AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+uint64_t dma_dram_offset;
+uint64_t dma_dram_addr;
   MemTxResult result;
   uint32_t dma_len;
   uint32_t data;

   dma_len = aspeed_smc_dma_len(s);
+dma_dram_addr = aspeed_smc_dma_dram_addr(s);
+
+if (aspeed_smc_has_dma64(asc)) {
+dma_dram_offset = dma_dram_addr - s->dram_base;
+} else {
+dma_dram_offset = dma_dram_addr;
+}

   trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] &

DMA_CTRL_WRITE ?

   "write" : "read",
   s->regs[R_DMA_FLASH_ADDR],
-s->regs[R_DMA_DRAM_ADDR],
+dma_dram_offset,
   dma_len);
   while (dma_len) {
   if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
-data = address_space_ldl_le(&s->dram_as,

s->regs[R_DMA_DRAM_ADDR],

+data = address_space_ldl_le(&s->dram_as,

dma_dram_offset,



MEMTXATTRS_UNSPECIFIED, &result);

   if (result != MEMTX_OK) {
-aspeed_smc_error("DRAM read failed @%08x",
- s->regs[R_DMA_DRAM_ADDR]);
+aspeed_smc_error("DRAM read failed @

Re: [PATCH v2 1/6] tests/qtest/migration: Run test_mode_reboot outside gitlab CI

2024-05-27 Thread Thomas Huth


On 28/05/2024 02.42, Nicholas Piggin wrote:

As Fabiano points out, this test isn't flaky it just can't run under
gitlab CI since runners have a very small shm size.

Suggested-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
  tests/qtest/migration-test.c | 18 +-
  1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index b7e3406471..04bf1c0092 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -706,6 +706,14 @@ static int test_migrate_start(QTestState **from, 
QTestState **to,
  g_test_skip("/dev/shm is not supported");
  return -1;
  }
+if (getenv("GITLAB_CI")) {
+/*
+ * Gitlab runners are limited to 64MB shm size. See:
+ * https://lore.kernel.org/all/87ttq5fvh7@suse.de/
+ */
+g_test_skip("/dev/shm is not supported in Gitlab CI environment");
+return -1;
+}
  }


Note that there is more than gitlab-CI: We can also run tests on Travis 
(well, hardly anybody beside me is still doing that) or other constraints 
container environments ... so it might be better to check whether enough 
space is available than to tie this to an environment variable (or even 
better change away from /dev/shm if possible like you did in your other 
patch - but I don't have a clue whether that's ok or not for these tests)


 Thomas

Re: [PATCH v4 05/16] aspeed/sdmc: Add AST2700 support

2024-05-27 Thread Cédric Le Goater


On 5/28/24 03:26, Jamin Lin wrote:

Hi Philippe, Cedric


On 27/5/24 13:18, Cédric Le Goater wrote:

On 5/27/24 12:24, Philippe Mathieu-Daudé wrote:

Hi Jamin,

On 27/5/24 10:02, Jamin Lin wrote:

The SDRAM memory controller(DRAMC) controls the access to external
DDR4 and DDR5 SDRAM and power up to DDR4 and DDR5 PHY.

The DRAM memory controller of AST2700 is not backward compatible to
previous chips such AST2600, AST2500 and AST2400.

Max memory is now 8GiB on the AST2700. Introduce new
aspeed_2700_sdmc and class with read/write operation and reset
handlers.

Define DRAMC necessary protected registers and unprotected registers
for AST2700 and increase the register set to 0x1000.

Add unlocked property to change controller protected status.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
Reviewed-by: Cédric Le Goater 
---
   hw/misc/aspeed_sdmc.c | 190
+-
   include/hw/misc/aspeed_sdmc.h |   5 +-
   2 files changed, 193 insertions(+), 2 deletions(-)




diff --git a/include/hw/misc/aspeed_sdmc.h
b/include/hw/misc/aspeed_sdmc.h index ec2d59a14f..61c979583a 100644
--- a/include/hw/misc/aspeed_sdmc.h
+++ b/include/hw/misc/aspeed_sdmc.h
@@ -17,6 +17,7 @@ OBJECT_DECLARE_TYPE(AspeedSDMCState,
AspeedSDMCClass, ASPEED_SDMC)
   #define TYPE_ASPEED_2400_SDMC TYPE_ASPEED_SDMC "-ast2400"
   #define TYPE_ASPEED_2500_SDMC TYPE_ASPEED_SDMC "-ast2500"
   #define TYPE_ASPEED_2600_SDMC TYPE_ASPEED_SDMC "-ast2600"
+#define TYPE_ASPEED_2700_SDMC TYPE_ASPEED_SDMC "-ast2700"
   /*
    * SDMC has 174 documented registers. In addition the u-boot
device tree @@ -29,7 +30,7 @@

OBJECT_DECLARE_TYPE(AspeedSDMCState,

AspeedSDMCClass, ASPEED_SDMC)
    * time, and the other is in the DDR-PHY IP which is used during
DDR-PHY
    * training.
    */
-#define ASPEED_SDMC_NR_REGS (0x500 >> 2)
+#define ASPEED_SDMC_NR_REGS (0x1000 >> 2)


This change breaks the migration stream.


Do you mean migration compat ? We never cared much about that for the
Aspeed machines.


So let's just remove the VMSTATE to reduce code burden?

Otherwise incrementing the vmstate.version is enough.

Regards,

Phil.

If you both okay, I will remove it.
Do I need to create a new patch or just update in this patch?


I don't think this is necessary to do so now. Possibly, increase the
version number in the vmstate when resending a v5.

Also, all Aspeed models should be addressed and that's beyond the scope
of this series.


Thanks,

C.

Re: [RFC 0/6] scripts: Rewrite simpletrace printer in Rust

2024-05-27 Thread Zhao Liu

Hi Stefan,

On Mon, May 27, 2024 at 03:59:44PM -0400, Stefan Hajnoczi wrote:
> Date: Mon, 27 May 2024 15:59:44 -0400
> From: Stefan Hajnoczi 
> Subject: Re: [RFC 0/6] scripts: Rewrite simpletrace printer in Rust
> 
> On Mon, May 27, 2024 at 04:14:15PM +0800, Zhao Liu wrote:
> > Hi maintainers and list,
> > 
> > This RFC series attempts to re-implement simpletrace.py with Rust, which
> > is the 1st task of Paolo's GSoC 2024 proposal.
> > 
> > There are two motivations for this work:
> > 1. This is an open chance to discuss how to integrate Rust into QEMU.
> > 2. Rust delivers faster parsing.
> > 
> > 
> > Introduction
> > 
> > 
> > Code framework
> > --
> > 
> > I choose "cargo" to organize the code, because the current
> > implementation depends on external crates (Rust's library), such as
> > "backtrace" for getting frameinfo, "clap" for parsing the cli, "rex" for
> > regular matching, and so on. (Meson's support for external crates is
> > still incomplete. [2])
> > 
> > The simpletrace-rust created in this series is not yet integrated into
> > the QEMU compilation chain, so it can only be compiled independently, e.g.
> > under ./scripts/simpletrace/, compile it be:
> > 
> > cargo build --release
> 
> Please make sure it's built by .gitlab-ci.d/ so that the continuous
> integration system prevents bitrot. You can add a job that runs the
> cargo build.

Thanks! I'll do this.

> > 
> > The code tree for the entire simpletrace-rust is as follows:
> > 
> > $ script/simpletrace-rust .
> > .
> > ├── Cargo.toml
> > └── src
> > └── main.rs   // The simpletrace logic (similar to simpletrace.py).
> > └── trace.rs  // The Argument and Event abstraction (refer to
> >   // tracetool/__init__.py).
> > 
> > My question about meson v.s. cargo, I put it at the end of the cover
> > letter (the section "Opens on Rust Support").
> > 
> > The following two sections are lessons I've learned from this Rust
> > practice.
> > 
> > 
> > Performance
> > ---
> > 
> > I did the performance comparison using the rust-simpletrace prototype with
> > the python one:
> > 
> > * On the i7-10700 CPU @ 2.90GHz machine, parsing and outputting a 35M
> > trace binary file for 10 times on each item:
> > 
> >   AVE (ms)   Rust v.s. Python
> > Rust   (stdout)   12687.16114.46%
> > Python (stdout)   14521.85
> > 
> > Rust   (file)  1422.44264.99%
> > Python (file)  3769.37
> > 
> > - The "stdout" lines represent output to the screen.
> > - The "file" lines represent output to a file (via "> file").
> > 
> > This Rust version contains some optimizations (including print, regular
> > matching, etc.), but there should be plenty of room for optimization.
> > 
> > The current performance bottleneck is the reading binary trace file,
> > since I am parsing headers and event payloads one after the other, so
> > that the IO read overhead accounts for 33%, which can be further
> > optimized in the future.
> 
> Performance will become more important when large amounts of TCG data is
> captured, as described in the project idea:
> https://wiki.qemu.org/Internships/ProjectIdeas/TCGBinaryTracing
> 
> While I can't think of a time in the past where simpletrace.py's
> performance bothered me, improving performance is still welcome. Just
> don't spend too much time on performance (and making the code more
> complex) unless there is a real need.

Yes, I agree that it shouldn't be over-optimized.

The logic in the current Rust version is pretty much a carbon copy of
the Python version, without additional complex logic introduced, but the
improvements in x2.6 were obtained by optimizing IO:

* reading the event configuration file, where I called the buffered
  interface,
* and the output formatted trace log, which I output all via std_out.lock()
  followed by write_all().

So, just the simple tweak of the interfaces brings much benefits. :-)

> > Security
> > 
> > 
> > This is an example.
> > 
> > Rust is very strict about type-checking, and it found timestamp reversal
> > issue in simpletrace-rust [3] (sorry, haven't gotten around to digging
> > deeper with more time)...in this RFC, I workingaround it by allowing
> > negative values. And the python version, just silently covered this
> > issue up.
> >
> > Opens on Rust Support
> > =
> > 
> > Meson v.s. Cargo
> > 
> > 
> > The first question is whether all Rust code (including under scripts)
> > must be integrated into meson?
> > 
> > If so, because of [2] then I have to discard the external crates and
> > build some more Rust wheels of my own to replace the previous external
> > crates.
> > 
> > For the main part of the QEMU code, I think the answer must be Yes, but
> > for the tools in the scripts directory, would it be possible to allow
> > the use of cargo to build small tools/program for flexibility and
> > migrate to meson later (as meson's

Re: [RFC PATCH 02/10] ppc/pnv: Move timebase state into PnvCore

2024-05-27 Thread Harsh Prateek Bora





On 5/26/24 17:56, Nicholas Piggin wrote:

The timebase state machine is per per-core state and can be driven
by any thread in the core. It is currently implemented as a hack
where the state is in a CPU structure and only thread 0's state is
accessed by the chiptod, which limits programming the timebase
side of the state machine to thread 0 of a core.

Move the state out into PnvCore and share it among all threads.

Signed-off-by: Nicholas Piggin 
---
  include/hw/ppc/pnv_core.h| 17 
  target/ppc/cpu.h | 20 --
  hw/ppc/pnv_chiptod.c |  6 ++--
  target/ppc/timebase_helper.c | 53 
  4 files changed, 49 insertions(+), 47 deletions(-)

diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 30c1e5b1a3..f434c71547 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -25,6 +25,20 @@
  #include "hw/ppc/pnv.h"
  #include "qom/object.h"
  
+/* ChipTOD and TimeBase State Machine */

+struct pnv_tod_tbst {
+int tb_ready_for_tod; /* core TB ready to receive TOD from chiptod */
+int tod_sent_to_tb;   /* chiptod sent TOD to the core TB */
+
+/*
+ * "Timers" for async TBST events are simulated by mfTFAC because TFAC
+ * is polled for such events. These are just used to ensure firmware
+ * performs the polling at least a few times.
+ */
+int tb_state_timer;
+int tb_sync_pulse_timer;
+};
+
  #define TYPE_PNV_CORE "powernv-cpu-core"
  OBJECT_DECLARE_TYPE(PnvCore, PnvCoreClass,
  PNV_CORE)
@@ -38,6 +52,9 @@ struct PnvCore {
  uint32_t pir;
  uint32_t hwid;
  uint64_t hrmor;
+
+struct pnv_tod_tbst pnv_tod_tbst;
+


Now that it is part of struct PnvCore itself, we can drop pnv_ prefix
and just call the member variable as tod_tbst ?


  PnvChip *chip;
  
  MemoryRegion xscom_regs;

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 2015e603d4..1e86658da6 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1196,21 +1196,6 @@ DEXCR_ASPECT(SRAPD, 4)
  DEXCR_ASPECT(NPHIE, 5)
  DEXCR_ASPECT(PHIE, 6)
  
-/*/

-/* PowerNV ChipTOD and TimeBase State Machine */
-struct pnv_tod_tbst {
-int tb_ready_for_tod; /* core TB ready to receive TOD from chiptod */
-int tod_sent_to_tb;   /* chiptod sent TOD to the core TB */
-
-/*
- * "Timers" for async TBST events are simulated by mfTFAC because TFAC
- * is polled for such events. These are just used to ensure firmware
- * performs the polling at least a few times.
- */
-int tb_state_timer;
-int tb_sync_pulse_timer;
-};
-
  
/*/
  /* The whole PowerPC CPU context */
  
@@ -1292,11 +1277,6 @@ struct CPUArchState {

  #define TLB_NEED_LOCAL_FLUSH   0x1
  #define TLB_NEED_GLOBAL_FLUSH  0x2
  
-#if defined(TARGET_PPC64)

-/* PowerNV chiptod / timebase facility state. */
-/* Would be nice to put these into PnvCore */
-struct pnv_tod_tbst pnv_tod_tbst;
-#endif
  #endif
  
  /* Other registers */

diff --git a/hw/ppc/pnv_chiptod.c b/hw/ppc/pnv_chiptod.c
index 3831a72101..3eaddd66f0 100644
--- a/hw/ppc/pnv_chiptod.c
+++ b/hw/ppc/pnv_chiptod.c
@@ -365,7 +365,7 @@ static void pnv_chiptod_xscom_write(void *opaque, hwaddr 
addr,
" TOD_MOVE_TOD_TO_TB_REG with no slave target\n");
  } else {
  PowerPCCPU *cpu = chiptod->slave_pc_target->threads[0];
-CPUPPCState *env = &cpu->env;
+PnvCore *pc = pnv_cpu_state(cpu)->core;
  
  /*

   * Moving TOD to TB will set the TB of all threads in a
@@ -377,8 +377,8 @@ static void pnv_chiptod_xscom_write(void *opaque, hwaddr 
addr,
   * thread 0.
   */
  
-if (env->pnv_tod_tbst.tb_ready_for_tod) {

-env->pnv_tod_tbst.tod_sent_to_tb = 1;
+if (pc->pnv_tod_tbst.tb_ready_for_tod) {
+pc->pnv_tod_tbst.tod_sent_to_tb = 1;
  } else {
  qemu_log_mask(LOG_GUEST_ERROR, "pnv_chiptod: xscom write reg"
" TOD_MOVE_TOD_TO_TB_REG with TB not ready to"
diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c
index 39d397416e..788c498d63 100644
--- a/target/ppc/timebase_helper.c
+++ b/target/ppc/timebase_helper.c
@@ -19,6 +19,7 @@
  #include "qemu/osdep.h"
  #include "cpu.h"
  #include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv_core.h"
  #include "exec/helper-proto.h"
  #include "exec/exec-all.h"
  #include "qemu/log.h"
@@ -298,8 +299,17 @@ static void write_tfmr(CPUPPCState *env, target_ulong val)
  }
  }
  
+static struct pnv_tod_tbst *cpu_get_tbst(PowerPCCPU *cpu)

+{
+PnvCore *pc = pnv_cpu_state(cpu)->core;
+
+return &pc->pnv_tod_tbst;
+}
+
  static void tb_state_machine_step(CPUPPCState *env)
  {
+PowerPCCPU *cpu = env_arc

[PATCH 1/2] ppc/pnv: Fix loss of LPC SERIRQ interrupts

2024-05-27 Thread Nicholas Piggin

From: Glenn Miles 

The LPC HC irq status register bits are set when an LPC IRQSER input is
asserted. These irq status bits drive the PSI irq to the CPU interrupt
controller. The LPC HC irq status bits are cleared by software writing
to the register with 1's for the bits to clear.

Existing register write was clearing the irq status bits even when the
input was asserted, this results in interrupts being lost.

This fix changes the behavior to keep track of the device IRQ status
in internal state that is separate from the irq status register, and
only allowing the irq status bits to be cleared if the associated
input is not asserted.

[np: rebased before P9 PSI SERIRQ patch, adjust changelog/comments]
Signed-off-by: Glenn Miles 
Signed-off-by: Nicholas Piggin 
---
 include/hw/ppc/pnv_lpc.h |  3 +++
 hw/ppc/pnv_lpc.c | 22 +++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h
index 5d22c45570..97c6872c3f 100644
--- a/include/hw/ppc/pnv_lpc.h
+++ b/include/hw/ppc/pnv_lpc.h
@@ -73,6 +73,9 @@ struct PnvLpcController {
 uint32_t opb_irq_pol;
 uint32_t opb_irq_input;
 
+/* LPC device IRQ state */
+uint32_t lpc_hc_irq_inputs;
+
 /* LPC HC registers */
 uint32_t lpc_hc_fw_seg_idsel;
 uint32_t lpc_hc_fw_rd_acc_size;
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index d692858bee..252690dcaa 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -505,7 +505,14 @@ static void lpc_hc_write(void *opaque, hwaddr addr, 
uint64_t val,
 pnv_lpc_eval_irqs(lpc);
 break;
 case LPC_HC_IRQSTAT:
-lpc->lpc_hc_irqstat &= ~val;
+/*
+ * This register is write-to-clear for the IRQSER (LPC device IRQ)
+ * status. However if the device has not de-asserted its interrupt
+ * that will just raise this IRQ status bit again. Model this by
+ * keeping track of the inputs and only clearing if the inputs are
+ * deasserted.
+ */
+lpc->lpc_hc_irqstat &= ~(val & ~lpc->lpc_hc_irq_inputs);
 pnv_lpc_eval_irqs(lpc);
 break;
 case LPC_HC_ERROR_ADDRESS:
@@ -803,11 +810,20 @@ static void pnv_lpc_isa_irq_handler_cpld(void *opaque, 
int n, int level)
 static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
 {
 PnvLpcController *lpc = PNV_LPC(opaque);
+uint32_t irq_bit = LPC_HC_IRQ_SERIRQ0 >> n;
 
-/* The Naples HW latches the 1 levels, clearing is done by SW */
 if (level) {
-lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SERIRQ0 >> n;
+lpc->lpc_hc_irq_inputs |= irq_bit;
+
+/*
+* The LPC HC in Naples and later latches LPC IRQ into a bit field in
+* the IRQSTAT register, and that drives the PSI IRQ to the IC.
+* Software clears this bit manually (see LPC_HC_IRQSTAT handler).
+ */
+lpc->lpc_hc_irqstat |= irq_bit;
 pnv_lpc_eval_irqs(lpc);
+} else {
+lpc->lpc_hc_irq_inputs &= ~irq_bit;
 }
 }
 
-- 
2.43.0

[PATCH 2/2] ppc/pnv: Implement POWER9 LPC PSI serirq outputs and auto-clear function

2024-05-27 Thread Nicholas Piggin

The POWER8 LPC ISA device irqs all get combined and reported to the line
connected the PSI LPCHC irq. POWER9 changed this so only internal LPC
host controller irqs use that line, and the device irqs get routed to
4 new lines connected to PSI SERIRQ0-3.

POWER9 also introduced a new feature that automatically clears the irq
status in the LPC host controller when EOI'ed, so software does not have
to.

The powernv OPAL (skiboot) firmware managed to work because the LPCHC
irq handler scanned all LPC irqs and handled those including clearing
status even on POWER9 systems. So LPC irqs worked despite OPAL thinking
it was running in POWER9 mode. After this change, UART interrupts show
up on serirq1 which is where OPAL routes them to:

 cat /proc/interrupts
 ...
 20:  0  XIVE-IRQ 1048563 Level opal-psi#0:lpchc
 ...
 25: 34  XIVE-IRQ 1048568 Level opal-psi#0:lpc_serirq_mux1

Whereas they previously turn up on lpchc.

Signed-off-by: Nicholas Piggin 
---
Since v1:
- Fix and test power8
- Rebase onto Glenn's fix
- Move irq_to_serirq_route from global into PnvLpcController
- Don't have SERIRQ irqs latch the OPB irq status register, docs don't
  suggest they do and skiboot does not clear that bit for SERIRQ path.
- Have the SERIRQ path use the LPCHC IRQ mask (missed in previous
  patch).

 include/hw/ppc/pnv_lpc.h |  14 -
 hw/ppc/pnv.c |  36 +--
 hw/ppc/pnv_lpc.c | 128 ---
 3 files changed, 148 insertions(+), 30 deletions(-)

diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h
index 97c6872c3f..e0fd5e4130 100644
--- a/include/hw/ppc/pnv_lpc.h
+++ b/include/hw/ppc/pnv_lpc.h
@@ -23,6 +23,7 @@
 #include "exec/memory.h"
 #include "hw/ppc/pnv.h"
 #include "hw/qdev-core.h"
+#include "hw/isa/isa.h" /* For ISA_NUM_IRQS */
 
 #define TYPE_PNV_LPC "pnv-lpc"
 typedef struct PnvLpcClass PnvLpcClass;
@@ -87,8 +88,19 @@ struct PnvLpcController {
 /* XSCOM registers */
 MemoryRegion xscom_regs;
 
+/*
+ * In P8, ISA irqs are combined with internal sources to drive the
+ * LPCHC interrupt output. P9 ISA irqs raise one of 4 lines that
+ * drive PSI SERIRQ irqs, routing according to OPB routing registers.
+ */
+bool psi_has_serirq;
+
 /* PSI to generate interrupts */
-qemu_irq psi_irq;
+qemu_irq psi_irq_lpchc;
+
+/* P9 serirq lines and irq routing table */
+qemu_irq psi_irq_serirq[4];
+int irq_to_serirq_route[ISA_NUM_IRQS];
 };
 
 struct PnvLpcClass {
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 6e3a5ccdec..f6c3e91b3a 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -728,7 +728,8 @@ static ISABus *pnv_chip_power8_isa_create(PnvChip *chip, 
Error **errp)
 Pnv8Chip *chip8 = PNV8_CHIP(chip);
 qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip8->psi), PSIHB_IRQ_EXTERNAL);
 
-qdev_connect_gpio_out(DEVICE(&chip8->lpc), 0, irq);
+qdev_connect_gpio_out_named(DEVICE(&chip8->lpc), "LPCHC", 0, irq);
+
 return pnv_lpc_isa_create(&chip8->lpc, true, errp);
 }
 
@@ -737,25 +738,48 @@ static ISABus *pnv_chip_power8nvl_isa_create(PnvChip 
*chip, Error **errp)
 Pnv8Chip *chip8 = PNV8_CHIP(chip);
 qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip8->psi), PSIHB_IRQ_LPC_I2C);
 
-qdev_connect_gpio_out(DEVICE(&chip8->lpc), 0, irq);
+qdev_connect_gpio_out_named(DEVICE(&chip8->lpc), "LPCHC", 0, irq);
+
 return pnv_lpc_isa_create(&chip8->lpc, false, errp);
 }
 
 static ISABus *pnv_chip_power9_isa_create(PnvChip *chip, Error **errp)
 {
 Pnv9Chip *chip9 = PNV9_CHIP(chip);
-qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPCHC);
+qemu_irq irq;
+
+irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPCHC);
+qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "LPCHC", 0, irq);
+
+irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ0);
+qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 0, irq);
+irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ1);
+qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 1, irq);
+irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ2);
+qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 2, irq);
+irq = qdev_get_gpio_in(DEVICE(&chip9->psi), PSIHB9_IRQ_LPC_SIRQ3);
+qdev_connect_gpio_out_named(DEVICE(&chip9->lpc), "SERIRQ", 3, irq);
 
-qdev_connect_gpio_out(DEVICE(&chip9->lpc), 0, irq);
 return pnv_lpc_isa_create(&chip9->lpc, false, errp);
 }
 
 static ISABus *pnv_chip_power10_isa_create(PnvChip *chip, Error **errp)
 {
 Pnv10Chip *chip10 = PNV10_CHIP(chip);
-qemu_irq irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPCHC);
+qemu_irq irq;
+
+irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPCHC);
+qdev_connect_gpio_out_named(DEVICE(&chip10->lpc), "LPCHC", 0, irq);
+
+irq = qdev_get_gpio_in(DEVICE(&chip10->psi), PSIHB9_IRQ_LPC_SIRQ0);
+qdev_connect_gpio_out_named(DEVICE(&chip10->lpc),

Re: [RFC PATCH 01/10] ppc/pnv: Add pointer from PnvCPUState to PnvCore

2024-05-27 Thread Harsh Prateek Bora





On 5/26/24 17:56, Nicholas Piggin wrote:

This helps move core state from CPU to core structures.

Signed-off-by: Nicholas Piggin 
---
  include/hw/ppc/pnv_core.h | 1 +
  hw/ppc/pnv_core.c | 3 +++
  2 files changed, 4 insertions(+)

diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index c6d62fd145..30c1e5b1a3 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -54,6 +54,7 @@ struct PnvCoreClass {
  #define PNV_CORE_TYPE_NAME(cpu_model) cpu_model PNV_CORE_TYPE_SUFFIX
  
  typedef struct PnvCPUState {

+PnvCore *core;


Naming it *pc might be more intuitive with the most of its usage, 
although I see few usage as "pnv_core" as well.


Reviewed-by: Harsh Prateek Bora 


  Object *intc;
  } PnvCPUState;
  
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c

index f40ab721d6..7b0ea7812b 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -225,6 +225,7 @@ static const MemoryRegionOps pnv_core_power10_xscom_ops = {
  static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp,
   int thread_index)
  {
+PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
  CPUPPCState *env = &cpu->env;
  int core_hwid;
  ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
@@ -232,6 +233,8 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU 
*cpu, Error **errp,
  Error *local_err = NULL;
  PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
  
+pnv_cpu->core = pc;

+
  if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
  return;
  }

[PATCH 0/2] ppc/pnv: LPC interrupt fixes

2024-05-27 Thread Nicholas Piggin

Here is v2 of the POWER9 PSI serirq patch with changes suggested by
Cedric and some other things. But also in front of that we have a fix
from Glenn for a lost interrupt problem.

I rebased Glenn's patch and also changed some comments and changelog
a bit so any bugs or silly comments are probably my fault, but debug
and fix is credit to him.

Thanks,
Nick

Glenn Miles (1):
  ppc/pnv: Fix loss of LPC SERIRQ interrupts

Nicholas Piggin (1):
  ppc/pnv: Implement POWER9 LPC PSI serirq outputs and auto-clear
function

 include/hw/ppc/pnv_lpc.h |  17 -
 hw/ppc/pnv.c |  36 --
 hw/ppc/pnv_lpc.c | 150 ---
 3 files changed, 170 insertions(+), 33 deletions(-)

-- 
2.43.0

[PULL 2/2] hw/ufs: Add support MCQ of UFSHCI 4.0

2024-05-27 Thread Jeuk Kim

From: Minwoo Im 

This patch adds support for MCQ defined in UFSHCI 4.0.  This patch
utilized the legacy I/O codes as much as possible to support MCQ.

MCQ operation & runtime register is placed at 0x1000 offset of UFSHCI
register statically with no spare space among four registers (48B):

UfsMcqSqReg, UfsMcqSqIntReg, UfsMcqCqReg, UfsMcqCqIntReg

The maxinum number of queue is 32 as per spec, and the default
MAC(Multiple Active Commands) are 32 in the device.

Example:
-device ufs,serial=foo,id=ufs0,mcq=true,mcq-maxq=8

Signed-off-by: Minwoo Im 
Reviewed-by: Jeuk Kim 
Message-Id: <20240528023106.856777-3-minwoo...@samsung.com>
Signed-off-by: Jeuk Kim 
---
 hw/ufs/trace-events |  17 ++
 hw/ufs/ufs.c| 475 ++--
 hw/ufs/ufs.h|  98 -
 include/block/ufs.h |  23 ++-
 4 files changed, 593 insertions(+), 20 deletions(-)

diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events
index 665e1a942b..dda7f8a2e5 100644
--- a/hw/ufs/trace-events
+++ b/hw/ufs/trace-events
@@ -11,13 +11,18 @@ ufs_exec_nop_cmd(uint32_t slot) "UTRLDBR slot %"PRIu32""
 ufs_exec_scsi_cmd(uint32_t slot, uint8_t lun, uint8_t opcode) "slot %"PRIu32", 
lun 0x%"PRIx8", opcode 0x%"PRIx8""
 ufs_exec_query_cmd(uint32_t slot, uint8_t opcode) "slot %"PRIu32", opcode 
0x%"PRIx8""
 ufs_process_uiccmd(uint32_t uiccmd, uint32_t ucmdarg1, uint32_t ucmdarg2, 
uint32_t ucmdarg3) "uiccmd 0x%"PRIx32", ucmdarg1 0x%"PRIx32", ucmdarg2 
0x%"PRIx32", ucmdarg3 0x%"PRIx32""
+ufs_mcq_complete_req(uint8_t qid) "sqid %"PRIu8""
+ufs_mcq_create_sq(uint8_t sqid, uint8_t cqid, uint64_t addr, uint16_t size) 
"mcq create sq sqid %"PRIu8", cqid %"PRIu8", addr 0x%"PRIx64", size %"PRIu16""
+ufs_mcq_create_cq(uint8_t cqid, uint64_t addr, uint16_t size) "mcq create cq 
cqid %"PRIu8", addr 0x%"PRIx64", size %"PRIu16""
 
 # error condition
 ufs_err_dma_read_utrd(uint32_t slot, uint64_t addr) "failed to read utrd. 
UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
 ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) "failed to read req 
upiu. UTRLDBR slot %"PRIu32", request upiu addr %"PRIu64""
 ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. 
UTRLDBR slot %"PRIu32", prdt addr %"PRIu64""
+ufs_err_dma_read_sq(uint8_t qid, uint64_t addr) "failed to read sqe. SQ qid 
%"PRIu8", sqe addr %"PRIu64""
 ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to write utrd. 
UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
 ufs_err_dma_write_rsp_upiu(uint32_t slot, uint64_t addr) "failed to write rsp 
upiu. UTRLDBR slot %"PRIu32", response upiu addr %"PRIu64""
+ufs_err_dma_write_cq(uint32_t cqid, uint64_t addr) "failed to write cq entry. 
cqid %"PRIu8", hwaddr %"PRIu64""
 ufs_err_utrl_slot_error(uint32_t slot) "UTRLDBR slot %"PRIu32" is in error"
 ufs_err_utrl_slot_busy(uint32_t slot) "UTRLDBR slot %"PRIu32" is busy"
 ufs_err_unsupport_register_offset(uint32_t offset) "Register offset 
0x%"PRIx32" is not yet supported"
@@ -31,3 +36,15 @@ ufs_err_query_invalid_opcode(uint8_t opcode) "query request 
has invalid opcode.
 ufs_err_query_invalid_idn(uint8_t opcode, uint8_t idn) "query request has 
invalid idn. opcode: 0x%"PRIx8", idn 0x%"PRIx8""
 ufs_err_query_invalid_index(uint8_t opcode, uint8_t index) "query request has 
invalid index. opcode: 0x%"PRIx8", index 0x%"PRIx8""
 ufs_err_invalid_trans_code(uint32_t slot, uint8_t trans_code) "request upiu 
has invalid transaction code. slot: %"PRIu32", trans_code: 0x%"PRIx8""
+ufs_err_mcq_db_wr_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8""
+ufs_err_mcq_db_wr_invalid_db(uint8_t qid, uint32_t db) "invalid mcq doorbell 
sqid %"PRIu8", db %"PRIu32""
+ufs_err_mcq_create_sq_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8""
+ufs_err_mcq_create_sq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8""
+ufs_err_mcq_create_sq_already_exists(uint8_t qid) "mcq sqid %"PRIu8 "already 
exists"
+ufs_err_mcq_delete_sq_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8""
+ufs_err_mcq_delete_sq_not_exists(uint8_t qid) "mcq sqid %"PRIu8 "not exists"
+ufs_err_mcq_create_cq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8""
+ufs_err_mcq_create_cq_already_exists(uint8_t qid) "mcq cqid %"PRIu8 "already 
exists"
+ufs_err_mcq_delete_cq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8""
+ufs_err_mcq_delete_cq_not_exists(uint8_t qid) "mcq cqid %"PRIu8 "not exists"
+ufs_err_mcq_delete_cq_sq_not_deleted(uint8_t sqid, uint8_t cqid) "mcq sq 
%"PRIu8" still has cq %"PRIu8""
diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
index bac78a32bb..71a88d221c 100644
--- a/hw/ufs/ufs.c
+++ b/hw/ufs/ufs.c
@@ -9,7 +9,7 @@
  */
 
 /**
- * Reference Specs: https://www.jedec.org/, 3.1
+ * Reference Specs: https://www.jedec.org/, 4.0
  *
  * Usage
  * -
@@ -28,10 +28,45 @@
 #include "trace.h"
 #include "ufs.h"
 
-/* The QEMU-UFS device follows spec version 3.1 */
-#define UFS_SPEC_VER 0x0310
+/* The QEMU-UFS device follows spec version 4.0 */
+#define UFS_SPEC_VER 0x0400
 #define U

[PULL 1/2] hw/ufs: Update MCQ-related fields to block/ufs.h

2024-05-27 Thread Jeuk Kim

From: Minwoo Im 

This patch is a prep patch for the following MCQ support patch for
hw/ufs.  This patch updated minimal mandatory fields to support MCQ
based on UFSHCI 4.0.

Signed-off-by: Minwoo Im 
Reviewed-by: Jeuk Kim 
Message-Id: <20240528023106.856777-2-minwoo...@samsung.com>
Signed-off-by: Jeuk Kim 
---
 include/block/ufs.h | 108 +++-
 1 file changed, 106 insertions(+), 2 deletions(-)

diff --git a/include/block/ufs.h b/include/block/ufs.h
index d61598b8f3..3513b6e772 100644
--- a/include/block/ufs.h
+++ b/include/block/ufs.h
@@ -7,7 +7,7 @@
 
 typedef struct QEMU_PACKED UfsReg {
 uint32_t cap;
-uint32_t rsvd0;
+uint32_t mcqcap;
 uint32_t ver;
 uint32_t rsvd1;
 uint32_t hcpid;
@@ -46,6 +46,13 @@ typedef struct QEMU_PACKED UfsReg {
 uint32_t rsvd7[4];
 uint32_t rsvd8[16];
 uint32_t ccap;
+uint32_t rsvd9[127];
+uint32_t config;
+uint32_t rsvd10[3];
+uint32_t rsvd11[28];
+uint32_t mcqconfig;
+uint32_t esilba;
+uint32_t esiuba;
 } UfsReg;
 
 REG32(CAP, offsetof(UfsReg, cap))
@@ -57,6 +64,15 @@ REG32(CAP, offsetof(UfsReg, cap))
 FIELD(CAP, OODDS, 25, 1)
 FIELD(CAP, UICDMETMS, 26, 1)
 FIELD(CAP, CS, 28, 1)
+FIELD(CAP, LSDBS, 29, 1)
+FIELD(CAP, MCQS, 30, 1)
+REG32(MCQCAP, offsetof(UfsReg, mcqcap))
+FIELD(MCQCAP, MAXQ, 0, 8)
+FIELD(MCQCAP, SP, 8, 1)
+FIELD(MCQCAP, RRP, 9, 1)
+FIELD(MCQCAP, EIS, 10, 1)
+FIELD(MCQCAP, QCFGPTR, 16, 8)
+FIELD(MCQCAP, MIAG, 24, 8)
 REG32(VER, offsetof(UfsReg, ver))
 REG32(HCPID, offsetof(UfsReg, hcpid))
 REG32(HCMID, offsetof(UfsReg, hcmid))
@@ -78,6 +94,7 @@ REG32(IS, offsetof(UfsReg, is))
 FIELD(IS, HCFES, 16, 1)
 FIELD(IS, SBFES, 17, 1)
 FIELD(IS, CEFES, 18, 1)
+FIELD(IS, CQES, 20, 1)
 REG32(IE, offsetof(UfsReg, ie))
 FIELD(IE, UTRCE, 0, 1)
 FIELD(IE, UDEPRIE, 1, 1)
@@ -95,6 +112,7 @@ REG32(IE, offsetof(UfsReg, ie))
 FIELD(IE, HCFEE, 16, 1)
 FIELD(IE, SBFEE, 17, 1)
 FIELD(IE, CEFEE, 18, 1)
+FIELD(IE, CQEE, 20, 1)
 REG32(HCS, offsetof(UfsReg, hcs))
 FIELD(HCS, DP, 0, 1)
 FIELD(HCS, UTRLRDY, 1, 1)
@@ -128,6 +146,10 @@ REG32(UCMDARG1, offsetof(UfsReg, ucmdarg1))
 REG32(UCMDARG2, offsetof(UfsReg, ucmdarg2))
 REG32(UCMDARG3, offsetof(UfsReg, ucmdarg3))
 REG32(CCAP, offsetof(UfsReg, ccap))
+REG32(CONFIG, offsetof(UfsReg, config))
+FIELD(CONFIG, QT, 0, 1)
+REG32(MCQCONFIG, offsetof(UfsReg, mcqconfig))
+FIELD(MCQCONFIG, MAC, 8, 8)
 
 #define UFS_INTR_MASK\
 ((1 << R_IS_CEFES_SHIFT) | (1 << R_IS_SBFES_SHIFT) | \
@@ -157,6 +179,69 @@ REG32(CCAP, offsetof(UfsReg, ccap))
 ((be32_to_cpu(dword2) >> UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_SHIFT) & \
  UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_MASK)
 
+typedef struct QEMU_PACKED UfsMcqReg {
+uint32_t sqattr;
+uint32_t sqlba;
+uint32_t squba;
+uint32_t sqdao;
+uint32_t sqisao;
+uint32_t sqcfg;
+uint32_t rsvd0[2];
+uint32_t cqattr;
+uint32_t cqlba;
+uint32_t cquba;
+uint32_t cqdao;
+uint32_t cqisao;
+uint32_t cqcfg;
+uint32_t rsvd1[2];
+} UfsMcqReg;
+
+REG32(SQATTR, offsetof(UfsMcqReg, sqattr))
+FIELD(SQATTR, SIZE, 0, 16)
+FIELD(SQATTR, CQID, 16, 8)
+FIELD(SQATTR, SQPL, 28, 3)
+FIELD(SQATTR, SQEN, 31, 1)
+REG32(SQLBA, offsetof(UfsMcqReg, sqlba))
+REG32(SQUBA, offsetof(UfsMcqReg, squba))
+REG32(SQDAO, offsetof(UfsMcqReg, sqdao))
+REG32(SQISAO, offsetof(UfsMcqReg, sqisao))
+REG32(SQCFG, offsetof(UfsMcqReg, sqcfg))
+REG32(CQATTR, offsetof(UfsMcqReg, cqattr))
+FIELD(CQATTR, SIZE, 0, 16)
+FIELD(CQATTR, CQEN, 31, 1)
+REG32(CQLBA, offsetof(UfsMcqReg, cqlba))
+REG32(CQUBA, offsetof(UfsMcqReg, cquba))
+REG32(CQDAO, offsetof(UfsMcqReg, cqdao))
+REG32(CQISAO, offsetof(UfsMcqReg, cqisao))
+REG32(CQCFG, offsetof(UfsMcqReg, cqcfg))
+
+typedef struct QEMU_PACKED UfsMcqSqReg {
+uint32_t hp;
+uint32_t tp;
+uint32_t rtc;
+uint32_t cti;
+uint32_t rts;
+} UfsMcqSqReg;
+
+typedef struct QEMU_PACKED UfsMcqCqReg {
+uint32_t hp;
+uint32_t tp;
+} UfsMcqCqReg;
+
+typedef struct QEMU_PACKED UfsMcqSqIntReg {
+uint32_t is;
+uint32_t ie;
+} UfsMcqSqIntReg;
+
+typedef struct QEMU_PACKED UfsMcqCqIntReg {
+uint32_t is;
+uint32_t ie;
+uint32_t iacr;
+} UfsMcqCqIntReg;
+
+REG32(CQIS, offsetof(UfsMcqCqIntReg, is))
+FIELD(CQIS, TEPS, 0, 1)
+
 typedef struct QEMU_PACKED DeviceDescriptor {
 uint8_t length;
 uint8_t descriptor_idn;
@@ -1064,9 +1149,26 @@ typedef struct QEMU_PACKED UtpUpiuRsp {
 };
 } UtpUpiuRsp;
 
+/*
+ * MCQ Completion Queue Entry
+ */
+typedef UtpTransferReqDesc UfsSqEntry;
+typedef struct QEMU_PACKED UfsCqEntry {
+uint64_t utp_addr;
+uint16_t resp_len;
+uint16_t resp_off;
+uint16_t prdt_len;
+uint16_t prdt_off;
+uint8_t status;
+uint8_t error;
+uint16_t rsvd1;
+uint32_t rsvd2[3];
+} UfsCqEntry;
+
 static inline void _ufs_check_size(void)
 {
-QE

[PULL 0/2] ufs queue

2024-05-27 Thread Jeuk Kim

From: Jeuk Kim 

The following changes since commit ad10b4badc1dd5b28305f9b9f1168cf0aa3ae946:

  Merge tag 'pull-error-2024-05-27' of https://repo.or.cz/qemu/armbru into 
staging (2024-05-27 06:40:42 -0700)

are available in the Git repository at:

  https://gitlab.com/jeuk20.kim/qemu.git tags/pull-ufs-20240528

for you to fetch changes up to 71a82d3f0555e65c98df129ce0e38b2aa5681ec0:

  hw/ufs: Add support MCQ of UFSHCI 4.0 (2024-05-28 14:42:32 +0900)


Add support MCQ of UFSHCI 4.0


Minwoo Im (2):
  hw/ufs: Update MCQ-related fields to block/ufs.h
  hw/ufs: Add support MCQ of UFSHCI 4.0

 hw/ufs/trace-events |  17 ++
 hw/ufs/ufs.c| 475 ++--
 hw/ufs/ufs.h|  98 ++-
 include/block/ufs.h | 131 ++-
 4 files changed, 699 insertions(+), 22 deletions(-)

Re: [PATCH v3 05/11] ppc/pnv: Add a Power11 Pnv11Chip, and a Power11 Machine

2024-05-27 Thread Aditya Gupta

On Mon, May 27, 2024 at 05:15:05PM GMT, Cédric Le Goater wrote:
> On 5/27/24 09:10, Aditya Gupta wrote:
> > Power11 core is same as Power10, use the existing functionalities to
> > introduce a Power11 chip and machine, with Power10 chip as parent of
> > Power11 chip, thus going through similar class_init paths
> > 
> > Cc: Cédric Le Goater 
> > Cc: Frédéric Barrat 
> > Cc: Mahesh J Salgaonkar 
> > Cc: Madhavan Srinivasan 
> > Cc: Nicholas Piggin 
> > Signed-off-by: Aditya Gupta 
> > ---
> >   docs/system/ppc/powernv.rst |   9 +--
> >   hw/ppc/pnv.c| 119 ++--
> >   hw/ppc/pnv_core.c   |  11 
> >   include/hw/ppc/pnv.h|   5 ++
> >   include/hw/ppc/pnv_chip.h   |   7 +++
> >   include/hw/ppc/pnv_core.h   |   1 +
> >   6 files changed, 144 insertions(+), 8 deletions(-)
> > 
> > diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst
> > index 09f39658587d..65606aa767aa 100644
> > --- a/docs/system/ppc/powernv.rst
> > +++ b/docs/system/ppc/powernv.rst
> > @@ -1,5 +1,5 @@
> > -PowerNV family boards (``powernv8``, ``powernv9``, ``powernv10``)
> > -==
> > +PowerNV family boards (``powernv8``, ``powernv9``, ``powernv10``, 
> > ``powernv11``)
> > +
> >   PowerNV (as Non-Virtualized) is the "bare metal" platform using the
> >   OPAL firmware. It runs Linux on IBM and OpenPOWER systems and it can
> > @@ -15,11 +15,12 @@ beyond the scope of what QEMU addresses today.
> >   Supported devices
> >   -
> > - * Multi processor support for POWER8, POWER8NVL and POWER9.
> > + * Multi processor support for POWER8, POWER8NVL, POWER9, Power10 and 
> > Power11.
> >* XSCOM, serial communication sideband bus to configure chiplets.
> >* Simple LPC Controller.
> >* Processor Service Interface (PSI) Controller.
> > - * Interrupt Controller, XICS (POWER8) and XIVE (POWER9) and XIVE2 
> > (Power10).
> > + * Interrupt Controller, XICS (POWER8) and XIVE (POWER9) and XIVE2 
> > (Power10 &
> > +   Power11).
> >* POWER8 PHB3 PCIe Host bridge and POWER9 PHB4 PCIe Host bridge.
> >* Simple OCC is an on-chip micro-controller used for power management 
> > tasks.
> >* iBT device to handle BMC communication, with the internal BMC simulator
> > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> > index 6e3a5ccdec76..939163f91784 100644
> > --- a/hw/ppc/pnv.c
> > +++ b/hw/ppc/pnv.c
> > @@ -456,6 +456,33 @@ static void pnv_chip_power10_dt_populate(PnvChip 
> > *chip, void *fdt)
> >   pnv_dt_lpc(chip, fdt, 0, PNV10_LPCM_BASE(chip), PNV10_LPCM_SIZE);
> >   }
> > +static void pnv_chip_power11_dt_populate(PnvChip *chip, void *fdt)
> > +{
> > +static const char compat[] = "ibm,power11-xscom\0ibm,xscom";
> > +int i;
> > +
> > +pnv_dt_xscom(chip, fdt, 0,
> > + cpu_to_be64(PNV10_XSCOM_BASE(chip)),
> > + cpu_to_be64(PNV10_XSCOM_SIZE),
> > + compat, sizeof(compat));
> > +
> > +for (i = 0; i < chip->nr_cores; i++) {
> > +PnvCore *pnv_core = chip->cores[i];
> > +int offset;
> > +
> > +offset = pnv_dt_core(chip, pnv_core, fdt);
> > +
> > +_FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
> > +   pa_features_31, sizeof(pa_features_31;
> > +}
> > +
> > +if (chip->ram_size) {
> > +pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
> > +}
> > +
> > +pnv_dt_lpc(chip, fdt, 0, PNV10_LPCM_BASE(chip), PNV10_LPCM_SIZE);
> > +}
> > +
> >   static void pnv_dt_rtc(ISADevice *d, void *fdt, int lpc_off)
> >   {
> >   uint32_t io_base = d->ioport_id;
> > @@ -1288,6 +1315,8 @@ static void pnv_chip_power10_intc_print_info(PnvChip 
> > *chip, PowerPCCPU *cpu,
> >   #define POWER10_CORE_MASK  (0xffull)
> > +#define POWER11_CORE_MASK  (0xffull)
> > +
> >   static void pnv_chip_power8_instance_init(Object *obj)
> >   {
> >   Pnv8Chip *chip8 = PNV8_CHIP(obj);
> > @@ -1831,7 +1860,8 @@ static void pnv_chip_power10_instance_init(Object 
> > *obj)
> >   }
> >   }
> > -static void pnv_chip_power10_quad_realize(Pnv10Chip *chip10, Error **errp)
> > +static void pnv_chip_power10_quad_realize(Pnv10Chip *chip10, Error **errp,
> > +const char *cpu_model)
> >   {
> >   PnvChip *chip = PNV_CHIP(chip10);
> >   int i;
> > @@ -1843,7 +1873,7 @@ static void pnv_chip_power10_quad_realize(Pnv10Chip 
> > *chip10, Error **errp)
> >   PnvQuad *eq = &chip10->quads[i];
> >   pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
> > -  PNV_QUAD_TYPE_NAME("power10"));
> > +  PNV_QUAD_TYPE_NAME_DYN(cpu_model));
> >   pnv_xscom_add_subregion(chip, PNV10_XSCOM_EQ_BASE(eq->quad_id),
> >   &eq->xscom_regs);
> > @@ -1881,7 +1

Re: [RFC 0/6] scripts: Rewrite simpletrace printer in Rust

2024-05-27 Thread Zhao Liu

Hi Mads,

On Mon, May 27, 2024 at 12:49:06PM +0200, Mads Ynddal wrote:
> Date: Mon, 27 May 2024 12:49:06 +0200
> From: Mads Ynddal 
> Subject: Re: [RFC 0/6] scripts: Rewrite simpletrace printer in Rust
> X-Mailer: Apple Mail (2.3774.600.62)
> 
> Hi,
> 
> Interesting work. I don't have any particular comments for the code, but I
> wanted to address a few of the points here.
> 
> > 2. Rust delivers faster parsing.
> 
> For me, the point of simpletrace.py is not to be the fastest at parsing, but
> rather to open the door for using Python libraries like numpy, matplotlib, 
> etc.
> for analysis.
> 
> There might be room for improvement in the Python version, especially in
> minimizing memory usage, when parsing large traces.

Thanks for pointing this out, the Python version is also very extensible
and easy to develop.

Perhaps ease of scalability vs. performance could be the difference that
the two versions focus on?

> > Security
> > 
> > 
> > This is an example.
> > 
> > Rust is very strict about type-checking, and it found timestamp reversal
> > issue in simpletrace-rust [3] (sorry, haven't gotten around to digging
> > deeper with more time)...in this RFC, I workingaround it by allowing
> > negative values. And the python version, just silently covered this
> > issue up.
> 
> I'm not particularly worried about the security of the Python version. We're 
> not
> doing anything obviously exploitable.

I agree with this, this tool is mainly for parsing. I think one of the
starting points for providing a Rust version was also to explore whether
this could be an opportunity to integrate Rust into QEMU.

Thanks,
Zhao

[PATCH] targer/riscv: Implement Zabha extension

2024-05-27 Thread Alexandre Ghiti

From: Gianluca Guida 

Add Zabha implementation.

Signed-off-by: Gianluca Guida 
Signed-off-by: Alexandre Ghiti 
---
 target/riscv/cpu.c  |   2 +
 target/riscv/cpu_cfg.h  |   1 +
 target/riscv/insn32.decode  |  22 +++
 target/riscv/insn_trans/trans_rvzabha.c.inc | 149 
 target/riscv/tcg/tcg-cpu.c  |   5 +
 target/riscv/translate.c|   1 +
 6 files changed, 180 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvzabha.c.inc

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 70d1a527a1..b01f82002b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -116,6 +116,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul),
 ISA_EXT_DATA_ENTRY(za64rs, PRIV_VERSION_1_12_0, has_priv_1_11),
 ISA_EXT_DATA_ENTRY(zaamo, PRIV_VERSION_1_12_0, ext_zaamo),
+ISA_EXT_DATA_ENTRY(zabha, PRIV_VERSION_1_12_0, ext_zabha),
 ISA_EXT_DATA_ENTRY(zacas, PRIV_VERSION_1_12_0, ext_zacas),
 ISA_EXT_DATA_ENTRY(zalrsc, PRIV_VERSION_1_12_0, ext_zalrsc),
 ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs),
@@ -1464,6 +1465,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 MULTI_EXT_CFG_BOOL("zicsr", ext_zicsr, true),
 MULTI_EXT_CFG_BOOL("zihintntl", ext_zihintntl, true),
 MULTI_EXT_CFG_BOOL("zihintpause", ext_zihintpause, true),
+MULTI_EXT_CFG_BOOL("zabha", ext_zabha, false),
 MULTI_EXT_CFG_BOOL("zacas", ext_zacas, false),
 MULTI_EXT_CFG_BOOL("zaamo", ext_zaamo, false),
 MULTI_EXT_CFG_BOOL("zalrsc", ext_zalrsc, false),
diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index d36c416ef0..7f614da4e2 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -81,6 +81,7 @@ struct RISCVCPUConfig {
 bool ext_svvptc;
 bool ext_zdinx;
 bool ext_zaamo;
+bool ext_zabha;
 bool ext_zacas;
 bool ext_zalrsc;
 bool ext_zawrs;
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index f22df04cfd..6d7726120f 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -1010,3 +1010,25 @@ amocas_w00101 . . . . 010 . 010 
@atom_st
 amocas_d00101 . . . . 011 . 010 @atom_st
 # *** RV64 Zacas Standard Extension ***
 amocas_q00101 . . . . 100 . 010 @atom_st
+
+# *** Zabha Standard Extension ***
+amoswap_b  1 . . . . 000 . 010 @atom_st
+amoadd_b   0 . . . . 000 . 010 @atom_st
+amoxor_b   00100 . . . . 000 . 010 @atom_st
+amoand_b   01100 . . . . 000 . 010 @atom_st
+amoor_b01000 . . . . 000 . 010 @atom_st
+amomin_b   1 . . . . 000 . 010 @atom_st
+amomax_b   10100 . . . . 000 . 010 @atom_st
+amominu_b  11000 . . . . 000 . 010 @atom_st
+amomaxu_b  11100 . . . . 000 . 010 @atom_st
+amocas_b   00101 . . . . 000 . 010 @atom_st
+amoswap_h  1 . . . . 001 . 010 @atom_st
+amoadd_h   0 . . . . 001 . 010 @atom_st
+amoxor_h   00100 . . . . 001 . 010 @atom_st
+amoand_h   01100 . . . . 001 . 010 @atom_st
+amoor_h01000 . . . . 001 . 010 @atom_st
+amomin_h   1 . . . . 001 . 010 @atom_st
+amomax_h   10100 . . . . 001 . 010 @atom_st
+amominu_h  11000 . . . . 001 . 010 @atom_st
+amomaxu_h  11100 . . . . 001 . 010 @atom_st
+amocas_h   00101 . . . . 001 . 010 @atom_st
diff --git a/target/riscv/insn_trans/trans_rvzabha.c.inc 
b/target/riscv/insn_trans/trans_rvzabha.c.inc
new file mode 100644
index 00..74f43bb95a
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzabha.c.inc
@@ -0,0 +1,149 @@
+/*
+ * RISC-V translation routines for the Zabha Standard Extension.
+ *
+ * Copyright (c) 2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#define REQUIRE_ZABHA(ctx) do { \
+if (!ctx->cfg_ptr->ext_zabha) { \
+return false;   \
+}   \
+} while (0)
+
+#define REQUIRE_ZABHA_AND_ZACAS(ctx) do {   \
+if (!ctx->cfg_ptr->ext_za

[PATCH] tests/qtest/migrate-test: Use regular file file for shared-memory tests

2024-05-27 Thread Nicholas Piggin

There is no need to use /dev/shm for file-backed memory devices, and
it is too small to be usable in gitlab CI. Switch to using a regular
file in /tmp/ which will usually have more space available.

Signed-off-by: Nicholas Piggin 
---
Am I missing something? AFAIKS there is not even any point using
/dev/shm aka tmpfs anyway, there is not much special about it as a
filesystem. This applies on top of the series just sent, and passes
gitlab CI qtests including aarch64.

Thanks,
Nick

 tests/qtest/migration-test.c | 41 
 1 file changed, 13 insertions(+), 28 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 45830eb213..86eace354e 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -552,7 +552,7 @@ typedef struct {
  * unconditionally, because it means the user would like to be verbose.
  */
 bool hide_stderr;
-bool use_shmem;
+bool use_memfile;
 /* only launch the target process */
 bool only_target;
 /* Use dirty ring if true; dirty logging otherwise */
@@ -672,29 +672,14 @@ static int test_migrate_start(QTestState **from, 
QTestState **to,
 g_autofree gchar *cmd_source = NULL;
 g_autofree gchar *cmd_target = NULL;
 const gchar *ignore_stderr;
-g_autofree char *shmem_opts = NULL;
-g_autofree char *shmem_path = NULL;
+g_autofree char *memfile_opts = NULL;
+g_autofree char *memfile_path = NULL;
 const char *kvm_opts = NULL;
 const char *arch = qtest_get_arch();
 const char *memory_size;
 const char *machine_alias, *machine_opts = "";
 g_autofree char *machine = NULL;
 
-if (args->use_shmem) {
-if (!g_file_test("/dev/shm", G_FILE_TEST_IS_DIR)) {
-g_test_skip("/dev/shm is not supported");
-return -1;
-}
-if (getenv("GITLAB_CI")) {
-/*
- * Gitlab runners are limited to 64MB shm size. See:
- * https://lore.kernel.org/all/87ttq5fvh7@suse.de/
- */
-g_test_skip("/dev/shm is not supported in Gitlab CI environment");
-return -1;
-}
-}
-
 dst_state = (QTestMigrationState) { };
 src_state = (QTestMigrationState) { };
 bootfile_create(tmpfs, args->suspend_me);
@@ -754,12 +739,12 @@ static int test_migrate_start(QTestState **from, 
QTestState **to,
 ignore_stderr = "";
 }
 
-if (args->use_shmem) {
-shmem_path = g_strdup_printf("/dev/shm/qemu-%d", getpid());
-shmem_opts = g_strdup_printf(
+if (args->use_memfile) {
+memfile_path = g_strdup_printf("/%s/qemu-%d", tmpfs, getpid());
+memfile_opts = g_strdup_printf(
 "-object memory-backend-file,id=mem0,size=%s"
 ",mem-path=%s,share=on -numa node,memdev=mem0",
-memory_size, shmem_path);
+memory_size, memfile_path);
 }
 
 if (args->use_dirty_ring) {
@@ -788,7 +773,7 @@ static int test_migrate_start(QTestState **from, QTestState 
**to,
  memory_size, tmpfs,
  arch_opts ? arch_opts : "",
  arch_source ? arch_source : "",
- shmem_opts ? shmem_opts : "",
+ memfile_opts ? memfile_opts : "",
  args->opts_source ? args->opts_source : "",
  ignore_stderr);
 if (!args->only_target) {
@@ -810,7 +795,7 @@ static int test_migrate_start(QTestState **from, QTestState 
**to,
  memory_size, tmpfs, uri,
  arch_opts ? arch_opts : "",
  arch_target ? arch_target : "",
- shmem_opts ? shmem_opts : "",
+ memfile_opts ? memfile_opts : "",
  args->opts_target ? args->opts_target : "",
  ignore_stderr);
 *to = qtest_init_with_env(QEMU_ENV_DST, cmd_target);
@@ -822,8 +807,8 @@ static int test_migrate_start(QTestState **from, QTestState 
**to,
  * Remove shmem file immediately to avoid memory leak in test failed case.
  * It's valid because QEMU has already opened this file
  */
-if (args->use_shmem) {
-unlink(shmem_path);
+if (args->use_memfile) {
+unlink(memfile_path);
 }
 
 return 0;
@@ -1875,7 +1860,7 @@ static void test_ignore_shared(void)
 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
 QTestState *from, *to;
 MigrateStart args = {
-.use_shmem = true,
+.use_memfile = true,
 };
 
 if (test_migrate_start(&from, &to, uri, &args)) {
@@ -2033,7 +2018,7 @@ static void test_mode_reboot(void)
 g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
FILE_TEST_FILENAME);
 M

Re: [PATCH] x86: cpu: fixup number of addressable IDs for processor cores in the physical package

2024-05-27 Thread Chuang Xu


Hi Zhao,

On 2024/5/28 上午10:31, Zhao Liu wrote:

Hi Chuang,

On Mon, May 27, 2024 at 11:13:33AM +0800, Chuang Xu wrote:

Date: Mon, 27 May 2024 11:13:33 +0800
From: Chuang Xu 
Subject: [PATCH] x86: cpu: fixup number of addressable IDs for processor
  cores in the physical package

According to the usual practice of QEMU commits, people tend to use
"i386/cpu" as the subject prefix, which indicates the code path.


X-Mailer: git-send-email 2.24.3 (Apple Git-128)

When QEMU is started with:
-cpu host,host-cache-info=on,l3-cache=off \

Just a discussion, "l3-cache=off" doesn't work in host cache pssthu
case, do you have a specific need that you don't want to see l3 cache?


No specific need, just generated by libvirt.

-smp 2,sockets=1,dies=1,cores=1,threads=2
Guest can't acquire maximum number of addressable IDs for processor cores in
the physical package from CPUID[04H].

This bug was introduced in commit d7caf13b5fcf742e5680c1d3448ba070fc811644.
Fix it by changing the judgement condition to a >= 1.

Pls add a "Fixes" tag like:

Fixes: d7caf13b5fcf ("x86: cpu: fixup number of addressable IDs for logical 
processors sharing cache")

Since this is a historical issue that deserves to be ported to the
stable branch, you can cc stable list by:

Cc: qemu-sta...@nongnu.org


Signed-off-by: Chuang Xu 

As the patch sender, it's better to put your signature on the last line.
;-)


Signed-off-by: Guixiong Wei 
Signed-off-by: Yipeng Yin 
---
  target/i386/cpu.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index cd16cb893d..0369c01153 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6097,7 +6097,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
  if (*eax & 31) {
  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
  int vcpus_per_socket = cs->nr_cores * cs->nr_threads;
-if (cs->nr_cores > 1) {
+if (cs->nr_cores >= 1) {

Like Igor suggested, this condition could be removed since cs->nr_cores can't
be 0.


  *eax &= ~0xFC00;
  *eax |= (pow2ceil(cs->nr_cores) - 1) << 26;
  }

...the code is outdated, pls rebase on the latest master branch.

My fault, sorry for forgetting to pull the latest code..

Regards,
Zhao


Thanks for all your suggestions!

Chuang

Re: TCG change broke MorphOS boot on sam460ex

2024-05-27 Thread Nicholas Piggin

On Tue May 28, 2024 at 8:23 AM AEST, BALATON Zoltan wrote:
> On Wed, 3 Apr 2024, Nicholas Piggin wrote:
> > On Tue Apr 2, 2024 at 9:32 PM AEST, BALATON Zoltan wrote:
> >> On Thu, 21 Mar 2024, BALATON Zoltan wrote:
> >>> On 27/2/24 17:47, BALATON Zoltan wrote:
>  Hello,
> 
>  Commit 18a536f1f8 (accel/tcg: Always require can_do_io) broke booting
>  MorphOS on sam460ex (this was before 8.2.0 and I thought I've verified it
>  before that release but apparently missed it back then). It can be
>  reproduced with https://www.morphos-team.net/morphos-3.18.iso and 
>  following
>  command:
> 
>  qemu-system-ppc -M sam460ex -serial stdio -d unimp,guest_errors \
>     -drive if=none,id=cd,format=raw,file=morphos-3.18.iso \
>     -device ide-cd,drive=cd,bus=ide.1
> >>
> >> Any idea on this one? While MorphOS boots on other machines and other OSes
> >> seem to boot on this machine it may still suggest there's some problem
> >> somewhere as this worked before. So it may worth investigating it to make
> >> sure there's no bug that could affect other OSes too even if they boot. I
> >> don't know how to debug this so some help would be needed.
> >
> > In the bad case it crashes after running this TB:
> >
> > 
> > IN:
> > 0x00c01354:  38c00040  li   r6, 0x40
> > 0x00c01358:  38e10204  addi r7, r1, 0x204
> > 0x00c0135c:  39010104  addi r8, r1, 0x104
> > 0x00c01360:  39410004  addi r10, r1, 4
> > 0x00c01364:  3920  li   r9, 0
> > 0x00c01368:  7cc903a6  mtctrr6
> > 0x00c0136c:  84c70004  lwzu r6, 4(r7)
> > 0x00c01370:  7cc907a4  tlbwehi  r6, r9
> > 0x00c01374:  84c80004  lwzu r6, 4(r8)
> > 0x00c01378:  7cc90fa4  tlbwelo  r6, r9
> > 0x00c0137c:  84ca0004  lwzu r6, 4(r10)
> > 0x00c01380:  7cc917a4  tlbwehi  r6, r9
> > 0x00c01384:  39290001  addi r9, r9, 1
> > 0x00c01388:  4200ffe4  bdnz 0xc0136c
> > 
> > IN:
> > 0x00c01374: unable to read memory
> > 
> >
> > "unable to read memory" is the tracer, it does actually translate
> > the address, but it points to a wayward real address which returns
> > 0 to TCG, which is an invalid instruction.
> >
> > The good case instead doesn't exit the TB after 0x00c01370 but after
> > the complete loop at the bdnz. That look like this after the same
> > first TB:
> >
> > 
> > IN:
> > 0x00c0136c:  84c70004  lwzu r6, 4(r7)
> > 0x00c01370:  7cc907a4  tlbwehi  r6, r9
> > 0x00c01374:  84c80004  lwzu r6, 4(r8)
> > 0x00c01378:  7cc90fa4  tlbwelo  r6, r9
> > 0x00c0137c:  84ca0004  lwzu r6, 4(r10)
> > 0x00c01380:  7cc917a4  tlbwehi  r6, r9
> > 0x00c01384:  39290001  addi r9, r9, 1
> > 0x00c01388:  4200ffe4  bdnz 0xc0136c
> > 
> > IN:
> > 0x00c0138c:  4c00012c  isync
> >
> > All the tlbwe are executed in the same TB. MMU tracing shows the
> > first tlbwehi creates a new valid(!) TLB for 0x-0x1
> > that has a garbage RPN because the tlbwelo did not run yet.
> >
> > What's happening in the bad case is that the translator breaks
> > and "re-fetches" instructions in the middle of that sequence, and
> > that's where the bogus translation causes 0 to be returned. The
> > good case the whole block is executed in the same fetch which
> > creates correct translations.
> >
> > So it looks like a morphos bug, the can-do-io change just happens
> > to cause it to re-fetch in that place, but that could happen for
> > a number of reasons, so you can't rely on TLB *only* changing or
> > ifetch *only* re-fetching at a sync point like isync.
> >
> > I would expect code like this to write an invalid entry with tlbwehi,
> > then tlbwelo to set the correct RPN, then make the entry valid with
> > the second tlbwehi. It would probably fix the bug if you just did the
> > first tlbwehi with r6=0 (or at least without the 0x200 bit set).
>
> Revisiting this, I've found in the docs that PPC440 has shadow TLBs so 
> this code can rely upon the TLB not being invalidated until isync and 
> works on real machine but breaks on QEMU.

I never programmed for 440 but it's unclear to me from the docs how
much you can rely on this programatically (you would have to ensure
no page crossings, disable interrupts, hope for no machine check,
etc).

But it does break real software so whether or not it is following
exact letter of the law, it would be good to fix.

> We would either need to make 
> sure the TB runs until the sync or somehow emulate the shadow TLB. I've 
> experimented with the latter but I could not make it work (and 
> unexpectedly keeping a cache of the most recently used entries is slower 
> than always searching through all TLB entries as done now so I've 
> abandoned that idea). The problem is that an entry is modified by multiple 
> tlbwe instructions but these can come in any order (and sometimes only one 
> of them is done like invalidating an entry seems to only do one write) so 
> I don't know when to copy the new

Re: [PATCH 1/1] vhost-vsock: add VIRTIO_F_RING_PACKED to feaure_bits

2024-05-27 Thread Jason Wang

On Mon, May 27, 2024 at 7:27 PM Halil Pasic  wrote:
>
> On Thu, 16 May 2024 10:39:42 +0200
> Stefano Garzarella  wrote:
>
> [..]
> > >---
> > >
> > >This is a minimal fix, that follows the current patterns in the
> > >codebase, and not necessarily the best one.
> >
> > Yeah, I did something similar with commit 562a7d23bf ("vhost: mask
> > VIRTIO_F_RING_RESET for vhost and vhost-user devices") so I think for
> > now is the right approach.
> >
> > I suggest to check also other devices like we did in that commit (e.g.
> > hw/scsi/vhost-scsi.c, hw/scsi/vhost-user-scsi.c, etc. )
>
> Hi Stefano!
>
> Thank you for chiming in, and sorry for the late response. I was hoping
> that Michael is going to chime in and that I can base my reply on his
> take. Anyway here I  go.
>
> A very valid observation! I do agree that we need this for
> basically every vhost device, and since:
> * net/vhost-vdpa.c
> * hw/net/vhost_net.c
> * hw/virtio/vhost-user-fs.c
> already have it, that translates to shotgun it to the rest. Which
> isn't nice in my opinion, which is why I am hoping for a discussion
> on this topic, and a better solution (even if it turns out to be
> something like a common macro).
> [..]
> > >
> > >The documentation however does kind of state, that feature_bits is
> > >supposed to contain the supported features. And under the assumption
> > >that feature bit not in feature_bits implies that the corresponding bit
> > >must not be set in the 3rd argument (features), then even with the
> > >current implementation we do end up with the intersection of the three
> > >as stated. And then vsock would be at fault for violating that
> > >assumption, and my fix would be the best thing to do -- I guess.
> > >
> > >Is the implementation the way it is for a good reason, I can't judge
> > >that with certainty for myself.
> >
> > Yes, I think we should fix the documentation, and after a few years of
> > not looking at it I'm confused again about what it does.
> >
>
> I would prefer to fix the algorithm and make whole thing less fragile.
>
> > But re-reading my commit for VIRTIO_F_RING_RESET, it seems that I had
> > interpreted `feature_bits` (2nd argument) as a list of features that
> > QEMU doesn't know how to emulate and therefore are required by the
> > backend (vhost/vhost-user/vdpa). Because the problem is that `features`
> > (3rd argument) is a set of features required by the driver that can be
> > provided by both QEMU and the backend.
>
> Hm. I would say, this does sound like the sanest explanation, that might
> justify the current code, but I will argue that for me, it isn't sane
> enough.
>
> Here comes my argument.
>
> 1) The uses is explicitly asking for a vhost device and giving the user
> a non vhost device is not an option.
>
> 2) The whole purpose of vhost is that at least the data plane is
> implemented outside of QEMU (I am maybe a little sloppy here with
> dataplane). That means a rather substantial portion of the device
> implementation is not in QEMU, while QEMU remains in charge of the
> setup.
>
> 3) Thus I would argue, that all the "transport feature bits" from 24 to
> 40 should have a corresponding vhost feature because the vhost part needs
> some sort of a support.
>
> What do we have there in bits from 24 to 40 according to the spec?
> * VIRTIO_F_INDIRECT_DESC
> * VIRTIO_F_EVENT_IDX
> * VIRTIO_F_VERSION_1
> * VIRTIO_F_ACCESS_PLATFORM
> * VIRTIO_F_RING_PACKED
> * VIRTIO_F_IN_ORDER
> * VIRTIO_F_ORDER_PLATFORM
> * VIRTIO_F_SR_IOV
> * VIRTIO_F_NOTIFICATION_DATA
> * VIRTIO_F_NOTIF_CONFIG_DATA
> * VIRTIO_F_RING_RESET
> and for transitional:
> * VIRTIO_F_NOTIFY_ON_EMPTY
> * VIRTIO_F_ANY_LAYOUT
> * UNUSED
>
> I would say, form these only VIRTIO_F_SR_IOV and
> VIRTIO_F_NOTIF_CONFIG_DATA look iffy in a sense things may work out
> for vhost devices without the vhost part doing something for it. And
> even there, I don't think it would hurt to make vhost part of the
> negotiation (I don't think those are supported by QEMU at this point).
>
> I would very much prefer having a consolidated and safe handling for
> these.
>
> 4) I would also argue that a bunch of the device specific feature bits
> should have vhost feature bits as well for the same reason:
> features are also such that for a vhost device, the vhost part needs
> some sort of a support.
>
> Looking through all of these would require a lot of time, so instead
> of that, let me use SCSI as an example. The features are:
> * VIRTIO_SCSI_F_INOUT
> * VIRTIO_SCSI_F_HOTPLUG
> * VIRTIO_SCSI_F_CHANGE
> * VIRTIO_SCSI_F_T10_PI
>
> The in the Linux kernel we have
> VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << 
> VIRTIO_SCSI_F_HOTPLUG) |
>(1ULL << VIRTIO_SCSI_F_T10_PI)
> but in QEMU kernel_feature_bits does not have
> VIRTIO_SCSI_F_T10_PI which together does not make much sense to me. And I 
> would
> also expect VIRTIO_SCSI_F_INOUT to be a part of the negotiation, because
> to me that the side that is process

Re: [PATCH] intel_iommu: Use the latest fault reasons defined by spec

2024-05-27 Thread Jason Wang

On Mon, May 27, 2024 at 2:50 PM Michael S. Tsirkin  wrote:
>
> On Mon, May 27, 2024 at 06:44:58AM +, Duan, Zhenzhong wrote:
> > Hi Jason,
> >
> > >-Original Message-
> > >From: Duan, Zhenzhong
> > >Subject: RE: [PATCH] intel_iommu: Use the latest fault reasons defined by
> > >spec
> > >
> > >
> > >
> > >>-Original Message-
> > >>From: Jason Wang 
> > >>Subject: Re: [PATCH] intel_iommu: Use the latest fault reasons defined by
> > >>spec
> > >>
> > >>On Fri, May 24, 2024 at 4:41 PM Duan, Zhenzhong
> > >> wrote:
> > >>>
> > >>>
> > >>>
> > >>> >-Original Message-
> > >>> >From: Jason Wang 
> > >>> >Subject: Re: [PATCH] intel_iommu: Use the latest fault reasons defined
> > >by
> > >>> >spec
> > >>> >
> > >>> >On Tue, May 21, 2024 at 6:25 PM Duan, Zhenzhong
> > >>> > wrote:
> > >>> >>
> > >>> >>
> > >>> >>
> > >>> >> >-Original Message-
> > >>> >> >From: Jason Wang 
> > >>> >> >Subject: Re: [PATCH] intel_iommu: Use the latest fault reasons
> > >defined
> > >>by
> > >>> >> >spec
> > >>> >> >
> > >>> >> >On Mon, May 20, 2024 at 12:15 PM Liu, Yi L 
> > >>wrote:
> > >>> >> >>
> > >>> >> >> > From: Duan, Zhenzhong 
> > >>> >> >> > Sent: Monday, May 20, 2024 11:41 AM
> > >>> >> >> >
> > >>> >> >> >
> > >>> >> >> >
> > >>> >> >> > >-Original Message-
> > >>> >> >> > >From: Jason Wang 
> > >>> >> >> > >Sent: Monday, May 20, 2024 8:44 AM
> > >>> >> >> > >To: Duan, Zhenzhong 
> > >>> >> >> > >Cc: qemu-devel@nongnu.org; Liu, Yi L ; 
> > >>> >> >> > >Peng,
> > >>> >Chao
> > >>> >> >P
> > >>> >> >> > >; Yu Zhang
> > >>;
> > >>> >> >Michael
> > >>> >> >> > >S. Tsirkin ; Paolo Bonzini
> > >>> >;
> > >>> >> >> > >Richard Henderson ; Eduardo
> > >>> >Habkost
> > >>> >> >> > >; Marcel Apfelbaum
> > >>> >> >
> > >>> >> >> > >Subject: Re: [PATCH] intel_iommu: Use the latest fault reasons
> > >>> >defined
> > >>> >> >by
> > >>> >> >> > >spec
> > >>> >> >> > >
> > >>> >> >> > >On Fri, May 17, 2024 at 6:26 PM Zhenzhong Duan
> > >>> >> >> > > wrote:
> > >>> >> >> > >>
> > >>> >> >> > >> From: Yu Zhang 
> > >>> >> >> > >>
> > >>> >> >> > >> Currently we use only VTD_FR_PASID_TABLE_INV as fault
> > >>reason.
> > >>> >> >> > >> Update with more detailed fault reasons listed in VT-d spec
> > >>7.2.3.
> > >>> >> >> > >>
> > >>> >> >> > >> Signed-off-by: Yu Zhang 
> > >>> >> >> > >> Signed-off-by: Zhenzhong Duan 
> > >>> >> >> > >> ---
> > >>> >> >> > >
> > >>> >> >> > >I wonder if this could be noticed by the guest or not. If yes
> > >should
> > >>> >> >> > >we consider starting to add thing like version to vtd emulation
> > >>code?
> > >>> >> >> >
> > >>> >> >> > Kernel only dumps the reason like below:
> > >>> >> >> >
> > >>> >> >> > DMAR: [DMA Write NO_PASID] Request device [20:00.0] fault
> > >addr
> > >>> >> >0x123460
> > >>> >> >> > [fault reason 0x71] SM: Present bit in first-level paging entry 
> > >>> >> >> > is
> > >>clear
> > >>> >> >>
> > >>> >> >> Yes, guest kernel would notice it as the fault would be injected 
> > >>> >> >> to
> > >vm.
> > >>> >> >>
> > >>> >> >> > Maybe bump 1.0 -> 1.1?
> > >>> >> >> > My understanding version number is only informational and is
> > >far
> > >>> >from
> > >>> >> >> > accurate to mark if a feature supported. Driver should check
> > >>cap/ecap
> > >>> >> >> > bits instead.
> > >>> >> >>
> > >>> >> >> Should the version ID here be aligned with VT-d spec?
> > >>> >> >
> > >>> >> >Probably, this might be something that could be noticed by the
> > >>> >> >management to migration compatibility.
> > >>> >>
> > >>> >> Could you elaborate what we need to do for migration compatibility?
> > >>> >> I see version is already exported so libvirt can query it, see:
> > >>> >>
> > >>> >> DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0),
> > >>> >
> > >>> >It is the Qemu command line parameters not the version of the vmstate.
> > >>> >
> > >>> >For example -device intel-iommu,version=3.0
> > >>> >
> > >>> >Qemu then knows it should behave as 3.0.
> > >>>
> > >>> So you want to bump vtd_vmstate.version?
> > >>
> > >>Well, as I said, it's not a direct bumping.
> > >>
> > >>>
> > >>> In fact, this series change intel_iommu property from x-scalable-
> > >>mode=["on"|"off"]"
> > >>> to x-scalable-mode=["legacy"|"modern"|"off"]".
> > >>>
> > >>> My understanding management app should use same qemu cmdline
> > >>> in source and destination, so compatibility is already guaranteed even 
> > >>> if
> > >>> we don't bump vtd_vmstate.version.
> > >>
> > >>Exactly, so the point is to
> > >>
> > >>vtd=3.0, the device works exactly as vtd spec 3.0.
> > >>vtd=3.3, the device works exactly as vtd spec 3.3.
> >
> > Yi just found version ID stored in VT-d VER_REG is not aligned with the 
> > VT-d spec version.
> > For example, we see a local hw with vtd version 6.0 which is beyond VT-d 
> > spec version.
> > We are asking VTD arch, will get back soon.
> >
> > Or will you plan qemu vVT-d having its own version policy?
> >
> > Thanks
> > Zhenzhong
>
> Not unless there

Re: [PATCH v2 0/2] hw/ufs: Add support MCQ

2024-05-27 Thread Jeuk Kim




On 5/28/2024 11:31 AM, Minwoo Im wrote:

UFSHCI 4.0 spec introduced MCQ(Multi-Circular Queue) to support multiple
command queues for UFS controller.  To test ufs-mcq path of kernel, MCQ
emulated device would be a good choice to go with.

The first patch added newly introduced fields in UFSHCI 4.0 to support
MCQ.  The other one made the actual changes for MCQ.

v2:
   It fixed printing error event trace even in normal shutdown cases for
SQ/CQ tear-down by checking whether each SQ/CQ is valid or not.  The
default value of mcq-maxq was updated to 2 from 1 to prevent the kernel
from allocating a single queue as a poll_queue by default and to ensure
that io_queues exist to handle device commands.

Please review.

Thanks,

Minwoo Im (2):
   hw/ufs: Update MCQ-related fields to block/ufs.h
   hw/ufs: Add support MCQ of UFSHCI 4.0

  hw/ufs/trace-events |  17 ++
  hw/ufs/ufs.c| 478 ++--
  hw/ufs/ufs.h|  98 -
  include/block/ufs.h | 131 +++-
  4 files changed, 702 insertions(+), 22 deletions(-)


Thank you for the patch.


Reviewed-by: Jeuk Kim

RE: [PATCH v5 19/19] intel_iommu: Check compatibility with host IOMMU capabilities

2024-05-27 Thread Duan, Zhenzhong



>-Original Message-
>From: Cédric Le Goater 
>Subject: Re: [PATCH v5 19/19] intel_iommu: Check compatibility with host
>IOMMU capabilities
>
>On 5/8/24 11:03, Zhenzhong Duan wrote:
>> If check fails, host device (either VFIO or VDPA device) is not
>> compatible with current vIOMMU config and should not be passed to
>> guest.
>>
>> Only aw_bits is checked for now, we don't care other capabilities
>> before scalable modern mode is introduced.
>>
>> Signed-off-by: Yi Liu 
>> Signed-off-by: Zhenzhong Duan 
>> ---
>>   hw/i386/intel_iommu.c | 27 +++
>>   1 file changed, 27 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 747c988bc4..07bfd4f99e 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -20,6 +20,7 @@
>>*/
>>
>>   #include "qemu/osdep.h"
>> +#include CONFIG_DEVICES /* CONFIG_HOST_IOMMU_DEVICE */
>>   #include "qemu/error-report.h"
>>   #include "qemu/main-loop.h"
>>   #include "qapi/error.h"
>> @@ -3819,6 +3820,26 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>   return vtd_dev_as;
>>   }
>>
>> +static bool vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hdev,
>> +   Error **errp)
>> +{
>> +#ifdef CONFIG_HOST_IOMMU_DEVICE
>> +HostIOMMUDevice *hiod = vtd_hdev->dev;
>> +int ret;
>> +
>> +/* Common checks */
>> +ret = host_iommu_device_get_cap(hiod,
>HOST_IOMMU_DEVICE_CAP_AW_BITS, errp);
>
>To avoid CONFIG_HOST_IOMMU_DEVICE, host_iommu_device_get_cap()
>could be
>open coded.

Thanks for suggesting, it works for build on both windows and linux.

>
>> +if (ret < 0) {
>> +return false;
>> +}
>> +if (s->aw_bits > ret) {
>> +error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret);
>> +return false;
>> +}
>> +#endif
>> +return true;
>> +}
>> +
>>   static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>devfn,
>>HostIOMMUDevice *hiod, Error **errp)
>>   {
>> @@ -3848,6 +3869,12 @@ static bool
>vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>   vtd_hdev->iommu_state = s;
>>   vtd_hdev->dev = hiod;
>>
>> +if (!vtd_check_hdev(s, vtd_hdev, errp)) {
>> +g_free(vtd_hdev);
>> +vtd_iommu_unlock(s);
>> +return false;
>> +}
>
>This check could be first done before allocating vtd_hdev.

OK, will do.
I made it that way to facilitate this patch:
https://github.com/yiliu1765/qemu/commit/d589a470002ccf607b5743b2951612f7b4790833

Thanks
Zhenzhong

[PULL 26/28] riscv, gdbstub.c: fix reg_width in ricsv_gen_dynamic_vector_feature()

2024-05-27 Thread Alistair Francis

From: Daniel Henrique Barboza 

Commit 33a24910ae changed 'reg_width' to use 'vlenb', i.e. vector length
in bytes, when in this context we want 'reg_width' as the length in
bits.

Fix 'reg_width' back to the value in bits like 7cb59921c05a
("target/riscv/gdbstub.c: use 'vlenb' instead of shifting 'vlen'") set
beforehand.

While we're at it, rename 'reg_width' to 'bitsize' to provide a bit more
clarity about what the variable represents. 'bitsize' is also used in
riscv_gen_dynamic_csr_feature() with the same purpose, i.e. as an input to
gdb_feature_builder_append_reg().

Cc: Akihiko Odaki 
Cc: Alex Bennée 
Reported-by: Robin Dapp 
Fixes: 33a24910ae ("target/riscv: Use GDBFeature for dynamic XML")
Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: LIU Zhiwei 
Acked-by: Alex Bennée 
Reviewed-by: Akihiko Odaki 
Reviewed-by: Alistair Francis 
Cc: qemu-stable 
Message-ID: <20240517203054.880861-2-dbarb...@ventanamicro.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/gdbstub.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
index d0cc5762c2..c07df972f1 100644
--- a/target/riscv/gdbstub.c
+++ b/target/riscv/gdbstub.c
@@ -288,7 +288,7 @@ static GDBFeature *riscv_gen_dynamic_csr_feature(CPUState 
*cs, int base_reg)
 static GDBFeature *ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg)
 {
 RISCVCPU *cpu = RISCV_CPU(cs);
-int reg_width = cpu->cfg.vlenb;
+int bitsize = cpu->cfg.vlenb << 3;
 GDBFeatureBuilder builder;
 int i;
 
@@ -298,7 +298,7 @@ static GDBFeature 
*ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg)
 
 /* First define types and totals in a whole VL */
 for (i = 0; i < ARRAY_SIZE(vec_lanes); i++) {
-int count = reg_width / vec_lanes[i].size;
+int count = bitsize / vec_lanes[i].size;
 gdb_feature_builder_append_tag(
 &builder, "",
 vec_lanes[i].id, vec_lanes[i].gdb_type, count);
@@ -316,7 +316,7 @@ static GDBFeature 
*ricsv_gen_dynamic_vector_feature(CPUState *cs, int base_reg)
 /* Define vector registers */
 for (i = 0; i < 32; i++) {
 gdb_feature_builder_append_reg(&builder, g_strdup_printf("v%d", i),
-   reg_width, i, "riscv_vector", "vector");
+   bitsize, i, "riscv_vector", "vector");
 }
 
 gdb_feature_builder_end(&builder);
-- 
2.45.1

[PULL 22/28] target/riscv: do not set mtval2 for non guest-page faults

2024-05-27 Thread Alistair Francis

From: Alexei Filippov 

Previous patch fixed the PMP priority in raise_mmu_exception() but we're still
setting mtval2 incorrectly. In riscv_cpu_tlb_fill(), after pmp check in 2 stage
translation part, mtval2 will be set in case of successes 2 stage translation 
but
failed pmp check.

In this case we gonna set mtval2 via env->guest_phys_fault_addr in context of
riscv_cpu_tlb_fill(), as this was a guest-page-fault, but it didn't and mtval2
should be zero, according to RISCV privileged spec sect. 9.4.4: When a guest
page-fault is taken into M-mode, mtval2 is written with either zero or guest
physical address that faulted, shifted by 2 bits. *For other traps, mtval2
is set to zero...*

Signed-off-by: Alexei Filippov 
Reviewed-by: Daniel Henrique Barboza 
Reviewed-by: Alistair Francis 
Message-ID: <20240503103052.6819-1-alexei.filip...@syntacore.com>
Cc: qemu-stable 
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu_helper.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 574886a694..a02497d778 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1376,17 +1376,17 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
int size,
   __func__, pa, ret, prot_pmp, tlb_size);
 
 prot &= prot_pmp;
-}
-
-if (ret != TRANSLATE_SUCCESS) {
+} else {
 /*
  * Guest physical address translation failed, this is a HS
  * level exception
  */
 first_stage_error = false;
-env->guest_phys_fault_addr = (im_address |
-  (address &
-   (TARGET_PAGE_SIZE - 1))) >> 2;
+if (ret != TRANSLATE_PMP_FAIL) {
+env->guest_phys_fault_addr = (im_address |
+  (address &
+   (TARGET_PAGE_SIZE - 1))) >> 
2;
+}
 }
 }
 } else {
-- 
2.45.1

[PULL 12/28] target/riscv: Relax vector register check in RISCV gdbstub

2024-05-27 Thread Alistair Francis

From: Jason Chien 

In current implementation, the gdbstub allows reading vector registers
only if V extension is supported. However, all vector extensions and
vector crypto extensions have the vector registers and they all depend
on Zve32x. The gdbstub should check for Zve32x instead.

Signed-off-by: Jason Chien 
Reviewed-by: Frank Chang 
Reviewed-by: Max Chou 
Message-ID: <20240328022343.6871-4-jason.ch...@sifive.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/gdbstub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
index be7a02cd90..d0cc5762c2 100644
--- a/target/riscv/gdbstub.c
+++ b/target/riscv/gdbstub.c
@@ -338,7 +338,7 @@ void riscv_cpu_register_gdb_regs_for_features(CPUState *cs)
  
gdb_find_static_feature("riscv-32bit-fpu.xml"),
  0);
 }
-if (env->misa_ext & RVV) {
+if (cpu->cfg.ext_zve32x) {
 gdb_register_coprocessor(cs, riscv_gdb_get_vector,
  riscv_gdb_set_vector,
  ricsv_gen_dynamic_vector_feature(cs, 
cs->gdb_num_regs),
-- 
2.45.1

[PULL 10/28] target/riscv: Add support for Zve32x extension

2024-05-27 Thread Alistair Francis

From: Jason Chien 

Add support for Zve32x extension and replace some checks for Zve32f with
Zve32x, since Zve32f depends on Zve32x.

Signed-off-by: Jason Chien 
Reviewed-by: Frank Chang 
Reviewed-by: Max Chou 
Reviewed-by: Daniel Henrique Barboza 
Message-ID: <20240328022343.6871-2-jason.ch...@sifive.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu_cfg.h  |  1 +
 target/riscv/cpu.c  |  2 ++
 target/riscv/cpu_helper.c   |  2 +-
 target/riscv/csr.c  |  2 +-
 target/riscv/tcg/tcg-cpu.c  | 16 
 target/riscv/insn_trans/trans_rvv.c.inc |  4 ++--
 6 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index cb750154bd..dce49050c0 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -91,6 +91,7 @@ struct RISCVCPUConfig {
 bool ext_zhinx;
 bool ext_zhinxmin;
 bool ext_zve32f;
+bool ext_zve32x;
 bool ext_zve64f;
 bool ext_zve64d;
 bool ext_zvbb;
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index eb1a2e7d6d..d744594cc4 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -153,6 +153,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb),
 ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc),
 ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f),
+ISA_EXT_DATA_ENTRY(zve32x, PRIV_VERSION_1_10_0, ext_zve32x),
 ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f),
 ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d),
 ISA_EXT_DATA_ENTRY(zvfbfmin, PRIV_VERSION_1_12_0, ext_zvfbfmin),
@@ -1472,6 +1473,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 MULTI_EXT_CFG_BOOL("zfh", ext_zfh, false),
 MULTI_EXT_CFG_BOOL("zfhmin", ext_zfhmin, false),
 MULTI_EXT_CFG_BOOL("zve32f", ext_zve32f, false),
+MULTI_EXT_CFG_BOOL("zve32x", ext_zve32x, false),
 MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false),
 MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false),
 MULTI_EXT_CFG_BOOL("zvfbfmin", ext_zvfbfmin, false),
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 179cf3d1a1..d71245a8cb 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -73,7 +73,7 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
 *pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc;
 *cs_base = 0;
 
-if (cpu->cfg.ext_zve32f) {
+if (cpu->cfg.ext_zve32x) {
 /*
  * If env->vl equals to VLMAX, we can use generic vector operation
  * expanders (GVEC) to accerlate the vector operations.
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 829d8346ed..58ef7079dc 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -93,7 +93,7 @@ static RISCVException fs(CPURISCVState *env, int csrno)
 
 static RISCVException vs(CPURISCVState *env, int csrno)
 {
-if (riscv_cpu_cfg(env)->ext_zve32f) {
+if (riscv_cpu_cfg(env)->ext_zve32x) {
 #if !defined(CONFIG_USER_ONLY)
 if (!env->debugger && !riscv_cpu_vector_enabled(env)) {
 return RISCV_EXCP_ILLEGAL_INST;
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index 40054a391a..e2cf5f429d 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -511,9 +511,13 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, 
Error **errp)
 return;
 }
 
-if (cpu->cfg.ext_zve32f && !riscv_has_ext(env, RVF)) {
-error_setg(errp, "Zve32f/Zve64f extensions require F extension");
-return;
+/* The Zve32f extension depends on the Zve32x extension */
+if (cpu->cfg.ext_zve32f) {
+if (!riscv_has_ext(env, RVF)) {
+error_setg(errp, "Zve32f/Zve64f extensions require F extension");
+return;
+}
+cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true);
 }
 
 if (cpu->cfg.ext_zvfh) {
@@ -658,13 +662,9 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, 
Error **errp)
 cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zvbc), true);
 }
 
-/*
- * In principle Zve*x would also suffice here, were they supported
- * in qemu
- */
 if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkb || cpu->cfg.ext_zvkg ||
  cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed ||
- cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) {
+ cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32x) {
 error_setg(errp,
"Vector crypto extensions require V or Zve* extensions");
 return;
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 7d84e7d812..eec2939e23 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -149,7 +149,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, 
TCGv s2)
 {
 TC

Re: [PATCH v1 1/4] target/riscv/kvm: add software breakpoints support

2024-05-27 Thread Chao Du

On 2024-05-27 23:41, Andrew Jones  wrote:
> On Mon, May 27, 2024 at 02:19:13AM GMT, Chao Du wrote:
> > This patch implements insert/remove software breakpoint process:
> > 
> > Add an input parameter for kvm_arch_insert_sw_breakpoint() and
> > kvm_arch_remove_sw_breakpoint() to pass the length information,
> > which helps us to know whether it is a RVC instruction.
> > For some remove cases, we do not have the length info, so we need
> > to judge by ourselves.
> > 
> > For RISC-V, GDB treats single-step similarly to breakpoint: add a
> > breakpoint at the next step address, then continue. So this also
> > works for single-step debugging.
> > 
> > Add some stubs which are necessary for building, and will be
> > implemented later.
> > 
> > Signed-off-by: Chao Du 
> > ---
> >  accel/kvm/kvm-all.c|  8 ++--
> >  include/sysemu/kvm.h   |  6 ++-
> >  target/arm/kvm.c   |  6 ++-
> >  target/i386/kvm/kvm.c  |  6 ++-
> >  target/mips/kvm.c  |  6 ++-
> >  target/ppc/kvm.c   |  6 ++-
> >  target/riscv/kvm/kvm-cpu.c | 79 ++
> >  target/s390x/kvm/kvm.c |  6 ++-
> >  8 files changed, 107 insertions(+), 16 deletions(-)
> > 
> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> > index c0be9f5eed..d27e77dbb2 100644
> > --- a/accel/kvm/kvm-all.c
> > +++ b/accel/kvm/kvm-all.c
> > @@ -3357,7 +3357,7 @@ int kvm_insert_breakpoint(CPUState *cpu, int type, 
> > vaddr addr, vaddr len)
> >  bp = g_new(struct kvm_sw_breakpoint, 1);
> >  bp->pc = addr;
> >  bp->use_count = 1;
> > -err = kvm_arch_insert_sw_breakpoint(cpu, bp);
> > +err = kvm_arch_insert_sw_breakpoint(cpu, bp, len);
> >  if (err) {
> >  g_free(bp);
> >  return err;
> > @@ -3396,7 +3396,7 @@ int kvm_remove_breakpoint(CPUState *cpu, int type, 
> > vaddr addr, vaddr len)
> >  return 0;
> >  }
> >  
> > -err = kvm_arch_remove_sw_breakpoint(cpu, bp);
> > +err = kvm_arch_remove_sw_breakpoint(cpu, bp, len);
> >  if (err) {
> >  return err;
> >  }
> > @@ -3426,10 +3426,10 @@ void kvm_remove_all_breakpoints(CPUState *cpu)
> >  CPUState *tmpcpu;
> >  
> >  QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
> > -if (kvm_arch_remove_sw_breakpoint(cpu, bp) != 0) {
> > +if (kvm_arch_remove_sw_breakpoint(cpu, bp, 0) != 0) {
> >  /* Try harder to find a CPU that currently sees the 
> > breakpoint. */
> >  CPU_FOREACH(tmpcpu) {
> > -if (kvm_arch_remove_sw_breakpoint(tmpcpu, bp) == 0) {
> > +if (kvm_arch_remove_sw_breakpoint(tmpcpu, bp, 0) == 0) {
> 
> It's not nice to need to add 'len' to all arch insert/remove sw breakpoint
> implementations, and the fact we have to pass zero sometimes implies it's
> not the right approach.

Actually, I've considered checking the instruction length from the tail two
bits, as you suggested. But it may bring one additional memory read.
So I turned to use the length information from gdb. In most cases, we can
get the exact length and read/write accordingly. But the side effect is we
need to add an input parameter and hence all arch implementations need to
be adapted.
I chose the second way after a 'balance'.

Now I think the first one may be a 'cleaner' solution.
Will send the V2 series later.

Thanks.
Chao

> 
> >  break;
> >  }
> >  }
> > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> > index c31d9c7356..340e094ffb 100644
> > --- a/include/sysemu/kvm.h
> > +++ b/include/sysemu/kvm.h
> > @@ -391,9 +391,11 @@ struct kvm_sw_breakpoint 
> > *kvm_find_sw_breakpoint(CPUState *cpu,
> >  int kvm_sw_breakpoints_active(CPUState *cpu);
> >  
> >  int kvm_arch_insert_sw_breakpoint(CPUState *cpu,
> > -  struct kvm_sw_breakpoint *bp);
> > +  struct kvm_sw_breakpoint *bp,
> > +  vaddr len);
> >  int kvm_arch_remove_sw_breakpoint(CPUState *cpu,
> > -  struct kvm_sw_breakpoint *bp);
> > +  struct kvm_sw_breakpoint *bp,
> > +  vaddr len);
> >  int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type);
> >  int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type);
> >  void kvm_arch_remove_all_hw_breakpoints(void);
> > diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> > index 7cf5cf31de..84593db544 100644
> > --- a/target/arm/kvm.c
> > +++ b/target/arm/kvm.c
> > @@ -2402,7 +2402,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, 
> > void *addr)
> >  /* C6.6.29 BRK instruction */
> >  static const uint32_t brk_insn = 0xd420;
> >  
> > -int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint 
> > *bp)
> > +int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_br

[PULL 14/28] target/riscv/cpu.c: fix Zvkb extension config

2024-05-27 Thread Alistair Francis

From: Yangyu Chen 

This code has a typo that writes zvkb to zvkg, causing users can't
enable zvkb through the config. This patch gets this fixed.

Signed-off-by: Yangyu Chen 
Fixes: ea61ef7097d0 ("target/riscv: Move vector crypto extensions to 
riscv_cpu_extensions")
Reviewed-by: LIU Zhiwei 
Reviewed-by: Alistair Francis 
Reviewed-by: Max Chou 
Reviewed-by:  Weiwei Li 
Message-ID: 
Cc: qemu-stable 
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index a74f0eb29c..0d6fb9b4ba 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1539,7 +1539,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 /* Vector cryptography extensions */
 MULTI_EXT_CFG_BOOL("zvbb", ext_zvbb, false),
 MULTI_EXT_CFG_BOOL("zvbc", ext_zvbc, false),
-MULTI_EXT_CFG_BOOL("zvkb", ext_zvkg, false),
+MULTI_EXT_CFG_BOOL("zvkb", ext_zvkb, false),
 MULTI_EXT_CFG_BOOL("zvkg", ext_zvkg, false),
 MULTI_EXT_CFG_BOOL("zvkned", ext_zvkned, false),
 MULTI_EXT_CFG_BOOL("zvknha", ext_zvknha, false),
-- 
2.45.1

[PULL 06/28] target/riscv: change RISCV_EXCP_SEMIHOST exception number to 63

2024-05-27 Thread Alistair Francis

From: Clément Léger 

The current semihost exception number (16) is a reserved number (range
[16-17]). The upcoming double trap specification uses that number for
the double trap exception. Since the privileged spec (Table 22) defines
ranges for custom uses change the semihosting exception number to 63
which belongs to the range [48-63] in order to avoid any future
collisions with reserved exception.

Signed-off-by: Clément Léger 

Reviewed-by: Alistair Francis 
Message-ID: <20240422135840.1959967-1-cle...@rivosinc.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu_bits.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index fc2068ee4d..74318a925c 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -670,11 +670,11 @@ typedef enum RISCVException {
 RISCV_EXCP_INST_PAGE_FAULT = 0xc, /* since: priv-1.10.0 */
 RISCV_EXCP_LOAD_PAGE_FAULT = 0xd, /* since: priv-1.10.0 */
 RISCV_EXCP_STORE_PAGE_FAULT = 0xf, /* since: priv-1.10.0 */
-RISCV_EXCP_SEMIHOST = 0x10,
 RISCV_EXCP_INST_GUEST_PAGE_FAULT = 0x14,
 RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT = 0x15,
 RISCV_EXCP_VIRT_INSTRUCTION_FAULT = 0x16,
 RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT = 0x17,
+RISCV_EXCP_SEMIHOST = 0x3f,
 } RISCVException;
 
 #define RISCV_EXCP_INT_FLAG0x8000
-- 
2.45.1

[PULL 04/28] target/riscv/kvm: implement SBI debug console (DBCN) calls

2024-05-27 Thread Alistair Francis

From: Daniel Henrique Barboza 

SBI defines a Debug Console extension "DBCN" that will, in time, replace
the legacy console putchar and getchar SBI extensions.

The appeal of the DBCN extension is that it allows multiple bytes to be
read/written in the SBI console in a single SBI call.

As far as KVM goes, the DBCN calls are forwarded by an in-kernel KVM
module to userspace. But this will only happens if the KVM module
actually supports this SBI extension and we activate it.

We'll check for DBCN support during init time, checking if get-reg-list
is advertising KVM_RISCV_SBI_EXT_DBCN. In that case, we'll enable it via
kvm_set_one_reg() during kvm_arch_init_vcpu().

Finally, change kvm_riscv_handle_sbi() to handle the incoming calls for
SBI_EXT_DBCN, reading and writing as required.

A simple KVM guest with 'earlycon=sbi', running in an emulated RISC-V
host, takes around 20 seconds to boot without using DBCN. With this
patch we're taking around 14 seconds to boot due to the speed-up in the
terminal output.  There's no change in boot time if the guest isn't
using earlycon.

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Andrew Jones 
Message-ID: <20240425155012.581366-1-dbarb...@ventanamicro.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/sbi_ecall_interface.h |  17 +
 target/riscv/kvm/kvm-cpu.c | 111 +
 2 files changed, 128 insertions(+)

diff --git a/target/riscv/sbi_ecall_interface.h 
b/target/riscv/sbi_ecall_interface.h
index 43899d08f6..7dfe5f72c6 100644
--- a/target/riscv/sbi_ecall_interface.h
+++ b/target/riscv/sbi_ecall_interface.h
@@ -12,6 +12,17 @@
 
 /* clang-format off */
 
+#define SBI_SUCCESS  0
+#define SBI_ERR_FAILED  -1
+#define SBI_ERR_NOT_SUPPORTED   -2
+#define SBI_ERR_INVALID_PARAM   -3
+#define SBI_ERR_DENIED  -4
+#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE   -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
+#define SBI_ERR_NO_SHMEM-9
+
 /* SBI Extension IDs */
 #define SBI_EXT_0_1_SET_TIMER   0x0
 #define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1
@@ -27,6 +38,7 @@
 #define SBI_EXT_IPI 0x735049
 #define SBI_EXT_RFENCE  0x52464E43
 #define SBI_EXT_HSM 0x48534D
+#define SBI_EXT_DBCN0x4442434E
 
 /* SBI function IDs for BASE extension */
 #define SBI_EXT_BASE_GET_SPEC_VERSION   0x0
@@ -57,6 +69,11 @@
 #define SBI_EXT_HSM_HART_STOP   0x1
 #define SBI_EXT_HSM_HART_GET_STATUS 0x2
 
+/* SBI function IDs for DBCN extension */
+#define SBI_EXT_DBCN_CONSOLE_WRITE  0x0
+#define SBI_EXT_DBCN_CONSOLE_READ   0x1
+#define SBI_EXT_DBCN_CONSOLE_WRITE_BYTE 0x2
+
 #define SBI_HSM_HART_STATUS_STARTED 0x0
 #define SBI_HSM_HART_STATUS_STOPPED 0x1
 #define SBI_HSM_HART_STATUS_START_PENDING   0x2
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index b8136c7ef8..d2491d84e2 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -409,6 +409,12 @@ static KVMCPUConfig kvm_v_vlenb = {
KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)
 };
 
+static KVMCPUConfig kvm_sbi_dbcn = {
+.name = "sbi_dbcn",
+.kvm_reg_id = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+  KVM_REG_RISCV_SBI_EXT | KVM_RISCV_SBI_EXT_DBCN
+};
+
 static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs)
 {
 CPURISCVState *env = &cpu->env;
@@ -1037,6 +1043,20 @@ static int uint64_cmp(const void *a, const void *b)
 return 0;
 }
 
+static void kvm_riscv_check_sbi_dbcn_support(RISCVCPU *cpu,
+ KVMScratchCPU *kvmcpu,
+ struct kvm_reg_list *reglist)
+{
+struct kvm_reg_list *reg_search;
+
+reg_search = bsearch(&kvm_sbi_dbcn.kvm_reg_id, reglist->reg, reglist->n,
+ sizeof(uint64_t), uint64_cmp);
+
+if (reg_search) {
+kvm_sbi_dbcn.supported = true;
+}
+}
+
 static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu,
  struct kvm_reg_list *reglist)
 {
@@ -1142,6 +1162,8 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, 
KVMScratchCPU *kvmcpu)
 if (riscv_has_ext(&cpu->env, RVV)) {
 kvm_riscv_read_vlenb(cpu, kvmcpu, reglist);
 }
+
+kvm_riscv_check_sbi_dbcn_support(cpu, kvmcpu, reglist);
 }
 
 static void riscv_init_kvm_registers(Object *cpu_obj)
@@ -1316,6 +1338,17 @@ static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, 
CPUState *cs)
 return ret;
 }
 
+static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs)
+{
+target_ulong reg = 1;
+
+if (!kvm_sbi_dbcn.supported) {
+return 0;
+}
+
+return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, ®);
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
 int ret = 0;
@@ -1333,6 +1366,8 @@

[PULL 23/28] target/riscv: Remove experimental prefix from "B" extension

2024-05-27 Thread Alistair Francis

From: Rob Bradford 

This extension has now been ratified:
https://jira.riscv.org/browse/RVS-2006 so the "x-" prefix can be
removed.

Since this is now a ratified extension add it to the list of extensions
included in the "max" CPU variant.

Signed-off-by: Rob Bradford 
Reviewed-by: Andrew Jones 
Reviewed-by: Alistair Francis 
Reviewed-by: Daniel Henrique Barboza 
Reviewed-by: LIU Zhiwei 
Message-ID: <20240514110217.22516-1-rbradf...@rivosinc.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu.c | 2 +-
 target/riscv/tcg/tcg-cpu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 2946ac298a..cee6fc4a9a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1400,7 +1400,7 @@ static const MISAExtInfo misa_ext_info_arr[] = {
 MISA_EXT_INFO(RVJ, "x-j", "Dynamic translated languages"),
 MISA_EXT_INFO(RVV, "v", "Vector operations"),
 MISA_EXT_INFO(RVG, "g", "General purpose (IMAFD_Zicsr_Zifencei)"),
-MISA_EXT_INFO(RVB, "x-b", "Bit manipulation (Zba_Zbb_Zbs)")
+MISA_EXT_INFO(RVB, "b", "Bit manipulation (Zba_Zbb_Zbs)")
 };
 
 static void riscv_cpu_validate_misa_mxl(RISCVCPUClass *mcc)
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index f59b5d7f2d..683f604d9f 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -1301,7 +1301,7 @@ static void riscv_init_max_cpu_extensions(Object *obj)
 const RISCVCPUMultiExtConfig *prop;
 
 /* Enable RVG, RVJ and RVV that are disabled by default */
-riscv_cpu_set_misa_ext(env, env->misa_ext | RVG | RVJ | RVV);
+riscv_cpu_set_misa_ext(env, env->misa_ext | RVB | RVG | RVJ | RVV);
 
 for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
 isa_ext_update_enabled(cpu, prop->offset, true);
-- 
2.45.1

[PULL 25/28] target/riscv/kvm.c: Fix the hart bit setting of AIA

2024-05-27 Thread Alistair Francis

From: Yong-Xuan Wang 

In AIA spec, each hart (or each hart within a group) has a unique hart
number to locate the memory pages of interrupt files in the address
space. The number of bits required to represent any hart number is equal
to ceil(log2(hmax + 1)), where hmax is the largest hart number among
groups.

However, if the largest hart number among groups is a power of 2, QEMU
will pass an inaccurate hart-index-bit setting to Linux. For example, when
the guest OS has 4 harts, only ceil(log2(3 + 1)) = 2 bits are sufficient
to represent 4 harts, but we passes 3 to Linux. The code needs to be
updated to ensure accurate hart-index-bit settings.

Additionally, a Linux patch[1] is necessary to correctly recover the hart
index when the guest OS has only 1 hart, where the hart-index-bit is 0.

[1] 
https://lore.kernel.org/lkml/20240415064905.25184-1-yongxuan.w...@sifive.com/t/

Signed-off-by: Yong-Xuan Wang 
Reviewed-by: Andrew Jones 
Cc: qemu-stable 
Message-ID: <20240515091129.28116-1-yongxuan.w...@sifive.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/kvm/kvm-cpu.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 473416649f..235e2cdaca 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1777,7 +1777,14 @@ void kvm_riscv_aia_create(MachineState *machine, 
uint64_t group_shift,
 }
 }
 
-hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1;
+
+if (max_hart_per_socket > 1) {
+max_hart_per_socket--;
+hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1;
+} else {
+hart_bits = 0;
+}
+
 ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
 KVM_DEV_RISCV_AIA_CONFIG_HART_BITS,
 &hart_bits, true, NULL);
-- 
2.45.1

[PULL 27/28] disas/riscv: Decode all of the pmpcfg and pmpaddr CSRs

2024-05-27 Thread Alistair Francis

From: Alistair Francis 

Previously we only listed a single pmpcfg CSR and the first 16 pmpaddr
CSRs. This patch fixes this to list all 16 pmpcfg and all 64 pmpaddr
CSRs are part of the disassembly.

Reported-by: Eric DeVolder 
Signed-off-by: Alistair Francis 
Fixes: ea10325917 ("RISC-V Disassembler")
Reviewed-by: Daniel Henrique Barboza 
Cc: qemu-stable 
Message-ID: <20240514051615.330979-1-alistair.fran...@wdc.com>
Signed-off-by: Alistair Francis 
---
 disas/riscv.c | 65 ++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/disas/riscv.c b/disas/riscv.c
index e236c8b5b7..297cfa2f63 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -2190,7 +2190,22 @@ static const char *csr_name(int csrno)
 case 0x0383: return "mibound";
 case 0x0384: return "mdbase";
 case 0x0385: return "mdbound";
-case 0x03a0: return "pmpcfg3";
+case 0x03a0: return "pmpcfg0";
+case 0x03a1: return "pmpcfg1";
+case 0x03a2: return "pmpcfg2";
+case 0x03a3: return "pmpcfg3";
+case 0x03a4: return "pmpcfg4";
+case 0x03a5: return "pmpcfg5";
+case 0x03a6: return "pmpcfg6";
+case 0x03a7: return "pmpcfg7";
+case 0x03a8: return "pmpcfg8";
+case 0x03a9: return "pmpcfg9";
+case 0x03aa: return "pmpcfg10";
+case 0x03ab: return "pmpcfg11";
+case 0x03ac: return "pmpcfg12";
+case 0x03ad: return "pmpcfg13";
+case 0x03ae: return "pmpcfg14";
+case 0x03af: return "pmpcfg15";
 case 0x03b0: return "pmpaddr0";
 case 0x03b1: return "pmpaddr1";
 case 0x03b2: return "pmpaddr2";
@@ -2207,6 +,54 @@ static const char *csr_name(int csrno)
 case 0x03bd: return "pmpaddr13";
 case 0x03be: return "pmpaddr14";
 case 0x03bf: return "pmpaddr15";
+case 0x03c0: return "pmpaddr16";
+case 0x03c1: return "pmpaddr17";
+case 0x03c2: return "pmpaddr18";
+case 0x03c3: return "pmpaddr19";
+case 0x03c4: return "pmpaddr20";
+case 0x03c5: return "pmpaddr21";
+case 0x03c6: return "pmpaddr22";
+case 0x03c7: return "pmpaddr23";
+case 0x03c8: return "pmpaddr24";
+case 0x03c9: return "pmpaddr25";
+case 0x03ca: return "pmpaddr26";
+case 0x03cb: return "pmpaddr27";
+case 0x03cc: return "pmpaddr28";
+case 0x03cd: return "pmpaddr29";
+case 0x03ce: return "pmpaddr30";
+case 0x03cf: return "pmpaddr31";
+case 0x03d0: return "pmpaddr32";
+case 0x03d1: return "pmpaddr33";
+case 0x03d2: return "pmpaddr34";
+case 0x03d3: return "pmpaddr35";
+case 0x03d4: return "pmpaddr36";
+case 0x03d5: return "pmpaddr37";
+case 0x03d6: return "pmpaddr38";
+case 0x03d7: return "pmpaddr39";
+case 0x03d8: return "pmpaddr40";
+case 0x03d9: return "pmpaddr41";
+case 0x03da: return "pmpaddr42";
+case 0x03db: return "pmpaddr43";
+case 0x03dc: return "pmpaddr44";
+case 0x03dd: return "pmpaddr45";
+case 0x03de: return "pmpaddr46";
+case 0x03df: return "pmpaddr47";
+case 0x03e0: return "pmpaddr48";
+case 0x03e1: return "pmpaddr49";
+case 0x03e2: return "pmpaddr50";
+case 0x03e3: return "pmpaddr51";
+case 0x03e4: return "pmpaddr52";
+case 0x03e5: return "pmpaddr53";
+case 0x03e6: return "pmpaddr54";
+case 0x03e7: return "pmpaddr55";
+case 0x03e8: return "pmpaddr56";
+case 0x03e9: return "pmpaddr57";
+case 0x03ea: return "pmpaddr58";
+case 0x03eb: return "pmpaddr59";
+case 0x03ec: return "pmpaddr60";
+case 0x03ed: return "pmpaddr61";
+case 0x03ee: return "pmpaddr62";
+case 0x03ef: return "pmpaddr63";
 case 0x0780: return "mtohost";
 case 0x0781: return "mfromhost";
 case 0x0782: return "mreset";
-- 
2.45.1

[PULL 17/28] target/riscv: rvv: Fix Zvfhmin checking for vfwcvt.f.f.v and vfncvt.f.f.w instructions

2024-05-27 Thread Alistair Francis

From: Max Chou 

According v spec 18.4, only the vfwcvt.f.f.v and vfncvt.f.f.w
instructions will be affected by Zvfhmin extension.
And the vfwcvt.f.f.v and vfncvt.f.f.w instructions only support the
conversions of

* From 1*SEW(16/32) to 2*SEW(32/64)
* From 2*SEW(32/64) to 1*SEW(16/32)

Signed-off-by: Max Chou 
Reviewed-by: Daniel Henrique Barboza 
Cc: qemu-stable 
Message-ID: <20240322092600.1198921-2-max.c...@sifive.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index eec2939e23..678b34b759 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -50,6 +50,22 @@ static bool require_rvf(DisasContext *s)
 }
 }
 
+static bool require_rvfmin(DisasContext *s)
+{
+if (s->mstatus_fs == EXT_STATUS_DISABLED) {
+return false;
+}
+
+switch (s->sew) {
+case MO_16:
+return s->cfg_ptr->ext_zvfhmin;
+case MO_32:
+return s->cfg_ptr->ext_zve32f;
+default:
+return false;
+}
+}
+
 static bool require_scale_rvf(DisasContext *s)
 {
 if (s->mstatus_fs == EXT_STATUS_DISABLED) {
@@ -75,8 +91,6 @@ static bool require_scale_rvfmin(DisasContext *s)
 }
 
 switch (s->sew) {
-case MO_8:
-return s->cfg_ptr->ext_zvfhmin;
 case MO_16:
 return s->cfg_ptr->ext_zve32f;
 case MO_32:
@@ -2685,6 +2699,7 @@ static bool opxfv_widen_check(DisasContext *s, arg_rmr *a)
 static bool opffv_widen_check(DisasContext *s, arg_rmr *a)
 {
 return opfv_widen_check(s, a) &&
+   require_rvfmin(s) &&
require_scale_rvfmin(s) &&
(s->sew != MO_8);
 }
@@ -2790,6 +2805,7 @@ static bool opfxv_narrow_check(DisasContext *s, arg_rmr 
*a)
 static bool opffv_narrow_check(DisasContext *s, arg_rmr *a)
 {
 return opfv_narrow_check(s, a) &&
+   require_rvfmin(s) &&
require_scale_rvfmin(s) &&
(s->sew != MO_8);
 }
-- 
2.45.1

[PULL 15/28] target/riscv: Implement dynamic establishment of custom decoder

2024-05-27 Thread Alistair Francis

From: Huang Tao 

In this patch, we modify the decoder to be a freely composable data
structure instead of a hardcoded one. It can be dynamically builded up
according to the extensions.
This approach has several benefits:
1. Provides support for heterogeneous cpu architectures. As we add decoder in
   RISCVCPU, each cpu can have their own decoder, and the decoders can be
   different due to cpu's features.
2. Improve the decoding efficiency. We run the guard_func to see if the decoder
   can be added to the dynamic_decoder when building up the decoder. Therefore,
   there is no need to run the guard_func when decoding each instruction. It can
   improve the decoding efficiency
3. For vendor or dynamic cpus, it allows them to customize their own decoder
   functions to improve decoding efficiency, especially when vendor-defined
   instruction sets increase. Because of dynamic building up, it can skip the 
other
   decoder guard functions when decoding.
4. Pre patch for allowing adding a vendor decoder before decode_insn32() with 
minimal
   overhead for users that don't need this particular vendor decoder.

Signed-off-by: Huang Tao 
Suggested-by: Christoph Muellner 
Co-authored-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
Message-ID: <20240506023607.29544-1-eric.hu...@linux.alibaba.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu.h |  1 +
 target/riscv/tcg/tcg-cpu.h | 15 +++
 target/riscv/cpu.c |  1 +
 target/riscv/tcg/tcg-cpu.c | 15 +++
 target/riscv/translate.c   | 31 +++
 5 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 746efd099a..04ab0f153a 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -455,6 +455,7 @@ struct ArchCPU {
 uint32_t pmu_avail_ctrs;
 /* Mapping of events to counters */
 GHashTable *pmu_event_ctr_map;
+const GPtrArray *decoders;
 };
 
 /**
diff --git a/target/riscv/tcg/tcg-cpu.h b/target/riscv/tcg/tcg-cpu.h
index f7b32417f8..ce94253fe4 100644
--- a/target/riscv/tcg/tcg-cpu.h
+++ b/target/riscv/tcg/tcg-cpu.h
@@ -26,4 +26,19 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error 
**errp);
 void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp);
 bool riscv_cpu_tcg_compatible(RISCVCPU *cpu);
 
+struct DisasContext;
+struct RISCVCPUConfig;
+typedef struct RISCVDecoder {
+bool (*guard_func)(const struct RISCVCPUConfig *);
+bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t);
+} RISCVDecoder;
+
+typedef bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t);
+
+extern const size_t decoder_table_size;
+
+extern const RISCVDecoder decoder_table[];
+
+void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu);
+
 #endif
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 0d6fb9b4ba..abeb50369c 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1134,6 +1134,7 @@ void riscv_cpu_finalize_features(RISCVCPU *cpu, Error 
**errp)
 error_propagate(errp, local_err);
 return;
 }
+riscv_tcg_cpu_finalize_dynamic_decoder(cpu);
 } else if (kvm_enabled()) {
 riscv_kvm_cpu_finalize_features(cpu, &local_err);
 if (local_err != NULL) {
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index fedc035313..f59b5d7f2d 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -863,6 +863,21 @@ void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error 
**errp)
 }
 }
 
+void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu)
+{
+GPtrArray *dynamic_decoders;
+dynamic_decoders = g_ptr_array_sized_new(decoder_table_size);
+for (size_t i = 0; i < decoder_table_size; ++i) {
+if (decoder_table[i].guard_func &&
+decoder_table[i].guard_func(&cpu->cfg)) {
+g_ptr_array_add(dynamic_decoders,
+(gpointer)decoder_table[i].riscv_cpu_decode_fn);
+}
+}
+
+cpu->decoders = dynamic_decoders;
+}
+
 bool riscv_cpu_tcg_compatible(RISCVCPU *cpu)
 {
 return object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST) == NULL;
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 2c27fd4ce1..4cd6480558 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -35,6 +35,8 @@
 #include "exec/helper-info.c.inc"
 #undef  HELPER_H
 
+#include "tcg/tcg-cpu.h"
+
 /* global register indices */
 static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc, cpu_vl, cpu_vstart;
 static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
@@ -114,6 +116,7 @@ typedef struct DisasContext {
 /* FRM is known to contain a valid value. */
 bool frm_valid;
 bool insn_start_updated;
+const GPtrArray *decoders;
 } DisasContext;
 
 static inline bool has_ext(DisasContext *ctx, uint32_t ext)
@@ -1123,21 +1126,16 @@ static inline int insn_len(uint16_t first_word)
 return (first_word & 3) == 3 ?

[PULL 24/28] target/riscv: rvzicbo: Fixup CBO extension register calculation

2024-05-27 Thread Alistair Francis

From: Alistair Francis 

When running the instruction

```
cbo.flush 0(x0)
```

QEMU would segfault.

The issue was in cpu_gpr[a->rs1] as QEMU does not have cpu_gpr[0]
allocated.

In order to fix this let's use the existing get_address()
helper. This also has the benefit of performing pointer mask
calculations on the address specified in rs1.

The pointer masking specificiation specifically states:

"""
Cache Management Operations: All instructions in Zicbom, Zicbop and Zicboz
"""

So this is the correct behaviour and we previously have been incorrectly
not masking the address.

Signed-off-by: Alistair Francis 
Reported-by: Fabian Thomas 
Fixes: e05da09b7cfd ("target/riscv: implement Zicbom extension")
Reviewed-by: Richard Henderson 
Cc: qemu-stable 
Message-ID: <20240514023910.301766-1-alistair.fran...@wdc.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvzicbo.c.inc | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvzicbo.c.inc 
b/target/riscv/insn_trans/trans_rvzicbo.c.inc
index d5d7095903..15711c3140 100644
--- a/target/riscv/insn_trans/trans_rvzicbo.c.inc
+++ b/target/riscv/insn_trans/trans_rvzicbo.c.inc
@@ -31,27 +31,35 @@
 static bool trans_cbo_clean(DisasContext *ctx, arg_cbo_clean *a)
 {
 REQUIRE_ZICBOM(ctx);
-gen_helper_cbo_clean_flush(tcg_env, cpu_gpr[a->rs1]);
+TCGv src = get_address(ctx, a->rs1, 0);
+
+gen_helper_cbo_clean_flush(tcg_env, src);
 return true;
 }
 
 static bool trans_cbo_flush(DisasContext *ctx, arg_cbo_flush *a)
 {
 REQUIRE_ZICBOM(ctx);
-gen_helper_cbo_clean_flush(tcg_env, cpu_gpr[a->rs1]);
+TCGv src = get_address(ctx, a->rs1, 0);
+
+gen_helper_cbo_clean_flush(tcg_env, src);
 return true;
 }
 
 static bool trans_cbo_inval(DisasContext *ctx, arg_cbo_inval *a)
 {
 REQUIRE_ZICBOM(ctx);
-gen_helper_cbo_inval(tcg_env, cpu_gpr[a->rs1]);
+TCGv src = get_address(ctx, a->rs1, 0);
+
+gen_helper_cbo_inval(tcg_env, src);
 return true;
 }
 
 static bool trans_cbo_zero(DisasContext *ctx, arg_cbo_zero *a)
 {
 REQUIRE_ZICBOZ(ctx);
-gen_helper_cbo_zero(tcg_env, cpu_gpr[a->rs1]);
+TCGv src = get_address(ctx, a->rs1, 0);
+
+gen_helper_cbo_zero(tcg_env, src);
 return true;
 }
-- 
2.45.1

[PULL 21/28] target/riscv: prioritize pmp errors in raise_mmu_exception()

2024-05-27 Thread Alistair Francis

From: Daniel Henrique Barboza 

raise_mmu_exception(), as is today, is prioritizing guest page faults by
checking first if virt_enabled && !first_stage, and then considering the
regular inst/load/store faults.

There's no mention in the spec about guest page fault being a higher
priority that PMP faults. In fact, privileged spec section 3.7.1 says:

"Attempting to fetch an instruction from a PMP region that does not have
execute permissions raises an instruction access-fault exception.
Attempting to execute a load or load-reserved instruction which accesses
a physical address within a PMP region without read permissions raises a
load access-fault exception. Attempting to execute a store,
store-conditional, or AMO instruction which accesses a physical address
within a PMP region without write permissions raises a store
access-fault exception."

So, in fact, we're doing it wrong - PMP faults should always be thrown,
regardless of also being a first or second stage fault.

The way riscv_cpu_tlb_fill() and get_physical_address() work is
adequate: a TRANSLATE_PMP_FAIL error is immediately reported and
reflected in the 'pmp_violation' flag. What we need is to change
raise_mmu_exception() to prioritize it.

Reported-by: Joseph Chan 
Fixes: 82d53adfbb ("target/riscv/cpu_helper.c: Invalid exception on MMU 
translation stage")
Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Alistair Francis 
Message-ID: <20240413105929.7030-1-alexei.filip...@syntacore.com>
Cc: qemu-stable 
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu_helper.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index d71245a8cb..574886a694 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1177,28 +1177,30 @@ static void raise_mmu_exception(CPURISCVState *env, 
target_ulong address,
 
 switch (access_type) {
 case MMU_INST_FETCH:
-if (env->virt_enabled && !first_stage) {
+if (pmp_violation) {
+cs->exception_index = RISCV_EXCP_INST_ACCESS_FAULT;
+} else if (env->virt_enabled && !first_stage) {
 cs->exception_index = RISCV_EXCP_INST_GUEST_PAGE_FAULT;
 } else {
-cs->exception_index = pmp_violation ?
-RISCV_EXCP_INST_ACCESS_FAULT : RISCV_EXCP_INST_PAGE_FAULT;
+cs->exception_index = RISCV_EXCP_INST_PAGE_FAULT;
 }
 break;
 case MMU_DATA_LOAD:
-if (two_stage && !first_stage) {
+if (pmp_violation) {
+cs->exception_index = RISCV_EXCP_LOAD_ACCESS_FAULT;
+} else if (two_stage && !first_stage) {
 cs->exception_index = RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT;
 } else {
-cs->exception_index = pmp_violation ?
-RISCV_EXCP_LOAD_ACCESS_FAULT : RISCV_EXCP_LOAD_PAGE_FAULT;
+cs->exception_index = RISCV_EXCP_LOAD_PAGE_FAULT;
 }
 break;
 case MMU_DATA_STORE:
-if (two_stage && !first_stage) {
+if (pmp_violation) {
+cs->exception_index = RISCV_EXCP_STORE_AMO_ACCESS_FAULT;
+} else if (two_stage && !first_stage) {
 cs->exception_index = RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT;
 } else {
-cs->exception_index = pmp_violation ?
-RISCV_EXCP_STORE_AMO_ACCESS_FAULT :
-RISCV_EXCP_STORE_PAGE_FAULT;
+cs->exception_index = RISCV_EXCP_STORE_PAGE_FAULT;
 }
 break;
 default:
-- 
2.45.1

[PULL 28/28] target/riscv: raise an exception when CSRRS/CSRRC writes a read-only CSR

2024-05-27 Thread Alistair Francis

From: Yu-Ming Chang 

Both CSRRS and CSRRC always read the addressed CSR and cause any read side
effects regardless of rs1 and rd fields. Note that if rs1 specifies a register
holding a zero value other than x0, the instruction will still attempt to write
the unmodified value back to the CSR and will cause any attendant side effects.

So if CSRRS or CSRRC tries to write a read-only CSR with rs1 which specifies
a register holding a zero value, an illegal instruction exception should be
raised.

Signed-off-by: Yu-Ming Chang 
Reviewed-by: Alistair Francis 
Message-ID: <20240403070823.80897-1-yumin...@andestech.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu.h   |  4 
 target/riscv/csr.c   | 51 
 target/riscv/op_helper.c |  6 ++---
 3 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 12d8b5344a..1501868008 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -709,6 +709,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc,
 void riscv_cpu_update_mask(CPURISCVState *env);
 bool riscv_cpu_is_32bit(RISCVCPU *cpu);
 
+RISCVException riscv_csrr(CPURISCVState *env, int csrno,
+  target_ulong *ret_value);
 RISCVException riscv_csrrw(CPURISCVState *env, int csrno,
target_ulong *ret_value,
target_ulong new_value, target_ulong write_mask);
@@ -741,6 +743,8 @@ typedef RISCVException (*riscv_csr_op_fn)(CPURISCVState 
*env, int csrno,
   target_ulong new_value,
   target_ulong write_mask);
 
+RISCVException riscv_csrr_i128(CPURISCVState *env, int csrno,
+   Int128 *ret_value);
 RISCVException riscv_csrrw_i128(CPURISCVState *env, int csrno,
 Int128 *ret_value,
 Int128 new_value, Int128 write_mask);
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 58ef7079dc..57f831fedc 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -4322,7 +4322,7 @@ static RISCVException rmw_seed(CPURISCVState *env, int 
csrno,
 
 static inline RISCVException riscv_csrrw_check(CPURISCVState *env,
int csrno,
-   bool write_mask)
+   bool write)
 {
 /* check privileges and return RISCV_EXCP_ILLEGAL_INST if check fails */
 bool read_only = get_field(csrno, 0xC00) == 3;
@@ -4344,7 +4344,7 @@ static inline RISCVException 
riscv_csrrw_check(CPURISCVState *env,
 }
 
 /* read / write check */
-if (write_mask && read_only) {
+if (write && read_only) {
 return RISCV_EXCP_ILLEGAL_INST;
 }
 
@@ -4431,11 +4431,22 @@ static RISCVException riscv_csrrw_do64(CPURISCVState 
*env, int csrno,
 return RISCV_EXCP_NONE;
 }
 
+RISCVException riscv_csrr(CPURISCVState *env, int csrno,
+   target_ulong *ret_value)
+{
+RISCVException ret = riscv_csrrw_check(env, csrno, false);
+if (ret != RISCV_EXCP_NONE) {
+return ret;
+}
+
+return riscv_csrrw_do64(env, csrno, ret_value, 0, 0);
+}
+
 RISCVException riscv_csrrw(CPURISCVState *env, int csrno,
target_ulong *ret_value,
target_ulong new_value, target_ulong write_mask)
 {
-RISCVException ret = riscv_csrrw_check(env, csrno, write_mask);
+RISCVException ret = riscv_csrrw_check(env, csrno, true);
 if (ret != RISCV_EXCP_NONE) {
 return ret;
 }
@@ -4483,13 +4494,45 @@ static RISCVException riscv_csrrw_do128(CPURISCVState 
*env, int csrno,
 return RISCV_EXCP_NONE;
 }
 
+RISCVException riscv_csrr_i128(CPURISCVState *env, int csrno,
+   Int128 *ret_value)
+{
+RISCVException ret;
+
+ret = riscv_csrrw_check(env, csrno, false);
+if (ret != RISCV_EXCP_NONE) {
+return ret;
+}
+
+if (csr_ops[csrno].read128) {
+return riscv_csrrw_do128(env, csrno, ret_value,
+ int128_zero(), int128_zero());
+}
+
+/*
+ * Fall back to 64-bit version for now, if the 128-bit alternative isn't
+ * at all defined.
+ * Note, some CSRs don't need to extend to MXLEN (64 upper bits non
+ * significant), for those, this fallback is correctly handling the
+ * accesses
+ */
+target_ulong old_value;
+ret = riscv_csrrw_do64(env, csrno, &old_value,
+   (target_ulong)0,
+   (target_ulong)0);
+if (ret == RISCV_EXCP_NONE && ret_value) {
+*ret_value = int128_make64(old_value);
+}
+return ret;
+}
+
 RISCVException riscv_csrrw_i128(CPURISCVState *env, int csrno,
 Int128 *ret_value,
 Int128 new_value, Int128 write_mask)
 {

[PULL 20/28] target/riscv: rvv: Remove redudant SEW checking for vector fp narrow/widen instructions

2024-05-27 Thread Alistair Francis

From: Max Chou 

If the checking functions check both the single and double width
operators at the same time, then the single width operator checking
functions (require_rvf[min]) will check whether the SEW is 8.

Signed-off-by: Max Chou 
Reviewed-by: Daniel Henrique Barboza 
Cc: qemu-stable 
Message-ID: <20240322092600.1198921-5-max.c...@sifive.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 16 
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index c3af38af80..3a3896ba06 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2333,7 +2333,6 @@ static bool opfvv_widen_check(DisasContext *s, arg_rmrr 
*a)
 return require_rvv(s) &&
require_rvf(s) &&
require_scale_rvf(s) &&
-   (s->sew != MO_8) &&
vext_check_isa_ill(s) &&
vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
 }
@@ -2373,7 +2372,6 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr 
*a)
 return require_rvv(s) &&
require_rvf(s) &&
require_scale_rvf(s) &&
-   (s->sew != MO_8) &&
vext_check_isa_ill(s) &&
vext_check_ds(s, a->rd, a->rs2, a->vm);
 }
@@ -2406,7 +2404,6 @@ static bool opfwv_widen_check(DisasContext *s, arg_rmrr 
*a)
 return require_rvv(s) &&
require_rvf(s) &&
require_scale_rvf(s) &&
-   (s->sew != MO_8) &&
vext_check_isa_ill(s) &&
vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm);
 }
@@ -2446,7 +2443,6 @@ static bool opfwf_widen_check(DisasContext *s, arg_rmrr 
*a)
 return require_rvv(s) &&
require_rvf(s) &&
require_scale_rvf(s) &&
-   (s->sew != MO_8) &&
vext_check_isa_ill(s) &&
vext_check_dd(s, a->rd, a->rs2, a->vm);
 }
@@ -2704,8 +2700,7 @@ static bool opffv_widen_check(DisasContext *s, arg_rmr *a)
 {
 return opfv_widen_check(s, a) &&
require_rvfmin(s) &&
-   require_scale_rvfmin(s) &&
-   (s->sew != MO_8);
+   require_scale_rvfmin(s);
 }
 
 #define GEN_OPFV_WIDEN_TRANS(NAME, CHECK, HELPER, FRM) \
@@ -2810,16 +2805,14 @@ static bool opffv_narrow_check(DisasContext *s, arg_rmr 
*a)
 {
 return opfv_narrow_check(s, a) &&
require_rvfmin(s) &&
-   require_scale_rvfmin(s) &&
-   (s->sew != MO_8);
+   require_scale_rvfmin(s);
 }
 
 static bool opffv_rod_narrow_check(DisasContext *s, arg_rmr *a)
 {
 return opfv_narrow_check(s, a) &&
require_rvf(s) &&
-   require_scale_rvf(s) &&
-   (s->sew != MO_8);
+   require_scale_rvf(s);
 }
 
 #define GEN_OPFV_NARROW_TRANS(NAME, CHECK, HELPER, FRM)\
@@ -2947,8 +2940,7 @@ static bool freduction_widen_check(DisasContext *s, 
arg_rmrr *a)
 {
 return reduction_widen_check(s, a) &&
require_rvf(s) &&
-   require_scale_rvf(s) &&
-   (s->sew != MO_8);
+   require_scale_rvf(s);
 }
 
 GEN_OPFVV_WIDEN_TRANS(vfwredusum_vs, freduction_widen_check)
-- 
2.45.1

[PULL 13/28] target/riscv: Fix the element agnostic function problem

2024-05-27 Thread Alistair Francis

From: Huang Tao 

In RVV and vcrypto instructions, the masked and tail elements are set to 1s
using vext_set_elems_1s function if the vma/vta bit is set. It is the element
agnostic policy.

However, this function can't deal the big endian situation. This patch fixes
the problem by adding handling of such case.

Signed-off-by: Huang Tao 
Suggested-by: Richard Henderson 
Reviewed-by: LIU Zhiwei 
Cc: qemu-stable 
Message-ID: <20240325021654.6594-1-eric.hu...@linux.alibaba.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/vector_internals.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c
index 996c21eb31..05b2d01e58 100644
--- a/target/riscv/vector_internals.c
+++ b/target/riscv/vector_internals.c
@@ -30,6 +30,28 @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, 
uint32_t cnt,
 if (tot - cnt == 0) {
 return ;
 }
+
+if (HOST_BIG_ENDIAN) {
+/*
+ * Deal the situation when the elements are insdie
+ * only one uint64 block including setting the
+ * masked-off element.
+ */
+if (((tot - 1) ^ cnt) < 8) {
+memset(base + H1(tot - 1), -1, tot - cnt);
+return;
+}
+/*
+ * Otherwise, at least cross two uint64_t blocks.
+ * Set first unaligned block.
+ */
+if (cnt % 8 != 0) {
+uint32_t j = ROUND_UP(cnt, 8);
+memset(base + H1(j - 1), -1, j - cnt);
+cnt = j;
+}
+/* Set other 64bit aligend blocks */
+}
 memset(base + cnt, -1, tot - cnt);
 }
 
-- 
2.45.1

[PULL 19/28] target/riscv: rvv: Check single width operator for vfncvt.rod.f.f.w

2024-05-27 Thread Alistair Francis

From: Max Chou 

The opfv_narrow_check needs to check the single width float operator by
require_rvf.

Signed-off-by: Max Chou 
Reviewed-by: Daniel Henrique Barboza 
Cc: qemu-stable 
Message-ID: <20240322092600.1198921-4-max.c...@sifive.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index a7217aed4e..c3af38af80 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2817,6 +2817,7 @@ static bool opffv_narrow_check(DisasContext *s, arg_rmr 
*a)
 static bool opffv_rod_narrow_check(DisasContext *s, arg_rmr *a)
 {
 return opfv_narrow_check(s, a) &&
+   require_rvf(s) &&
require_scale_rvf(s) &&
(s->sew != MO_8);
 }
-- 
2.45.1

[PULL 05/28] hw/riscv/boot.c: Support 64-bit address for initrd

2024-05-27 Thread Alistair Francis

From: Cheng Yang 

Use qemu_fdt_setprop_u64() instead of qemu_fdt_setprop_cell()
to set the address of initrd in FDT to support 64-bit address.

Signed-off-by: Cheng Yang 
Reviewed-by: Alistair Francis 
Message-ID: 
Signed-off-by: Alistair Francis 
---
 hw/riscv/boot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 09878e722c..47281ca853 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -209,8 +209,8 @@ static void riscv_load_initrd(MachineState *machine, 
uint64_t kernel_entry)
 /* Some RISC-V machines (e.g. opentitan) don't have a fdt. */
 if (fdt) {
 end = start + size;
-qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", start);
-qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", end);
+qemu_fdt_setprop_u64(fdt, "/chosen", "linux,initrd-start", start);
+qemu_fdt_setprop_u64(fdt, "/chosen", "linux,initrd-end", end);
 }
 }
 
-- 
2.45.1

[PULL 11/28] target/riscv: Add support for Zve64x extension

2024-05-27 Thread Alistair Francis

From: Jason Chien 

Add support for Zve64x extension. Enabling Zve64f enables Zve64x and
enabling Zve64x enables Zve32x according to their dependency.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2107
Signed-off-by: Jason Chien 
Reviewed-by: Frank Chang 
Reviewed-by: Max Chou 
Reviewed-by: Daniel Henrique Barboza 
Message-ID: <20240328022343.6871-3-jason.ch...@sifive.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu_cfg.h |  1 +
 target/riscv/cpu.c |  2 ++
 target/riscv/tcg/tcg-cpu.c | 17 +++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h
index dce49050c0..e1e4f32698 100644
--- a/target/riscv/cpu_cfg.h
+++ b/target/riscv/cpu_cfg.h
@@ -94,6 +94,7 @@ struct RISCVCPUConfig {
 bool ext_zve32x;
 bool ext_zve64f;
 bool ext_zve64d;
+bool ext_zve64x;
 bool ext_zvbb;
 bool ext_zvbc;
 bool ext_zvkb;
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index d744594cc4..a74f0eb29c 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -156,6 +156,7 @@ const RISCVIsaExtData isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(zve32x, PRIV_VERSION_1_10_0, ext_zve32x),
 ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f),
 ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d),
+ISA_EXT_DATA_ENTRY(zve64x, PRIV_VERSION_1_10_0, ext_zve64x),
 ISA_EXT_DATA_ENTRY(zvfbfmin, PRIV_VERSION_1_12_0, ext_zvfbfmin),
 ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma),
 ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh),
@@ -1476,6 +1477,7 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = {
 MULTI_EXT_CFG_BOOL("zve32x", ext_zve32x, false),
 MULTI_EXT_CFG_BOOL("zve64f", ext_zve64f, false),
 MULTI_EXT_CFG_BOOL("zve64d", ext_zve64d, false),
+MULTI_EXT_CFG_BOOL("zve64x", ext_zve64x, false),
 MULTI_EXT_CFG_BOOL("zvfbfmin", ext_zvfbfmin, false),
 MULTI_EXT_CFG_BOOL("zvfbfwma", ext_zvfbfwma, false),
 MULTI_EXT_CFG_BOOL("zvfh", ext_zvfh, false),
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index e2cf5f429d..fedc035313 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -498,17 +498,22 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, 
Error **errp)
 
 /* The Zve64d extension depends on the Zve64f extension */
 if (cpu->cfg.ext_zve64d) {
+if (!riscv_has_ext(env, RVD)) {
+error_setg(errp, "Zve64d/V extensions require D extension");
+return;
+}
 cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64f), true);
 }
 
-/* The Zve64f extension depends on the Zve32f extension */
+/* The Zve64f extension depends on the Zve64x and Zve32f extensions */
 if (cpu->cfg.ext_zve64f) {
+cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve64x), true);
 cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32f), true);
 }
 
-if (cpu->cfg.ext_zve64d && !riscv_has_ext(env, RVD)) {
-error_setg(errp, "Zve64d/V extensions require D extension");
-return;
+/* The Zve64x extension depends on the Zve32x extension */
+if (cpu->cfg.ext_zve64x) {
+cpu_cfg_ext_auto_update(cpu, CPU_CFG_OFFSET(ext_zve32x), true);
 }
 
 /* The Zve32f extension depends on the Zve32x extension */
@@ -670,10 +675,10 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, 
Error **errp)
 return;
 }
 
-if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) {
+if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64x) {
 error_setg(
 errp,
-"Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions");
+"Zvbc and Zvknhb extensions require V or Zve64x extensions");
 return;
 }
 
-- 
2.45.1

[PULL 07/28] target/riscv/kvm: tolerate KVM disable ext errors

2024-05-27 Thread Alistair Francis

From: Daniel Henrique Barboza 

Running a KVM guest using a 6.9-rc3 kernel, in a 6.8 host that has zkr
enabled, will fail with a kernel oops SIGILL right at the start. The
reason is that we can't expose zkr without implementing the SEED CSR.
Disabling zkr in the guest would be a workaround, but if the KVM doesn't
allow it we'll error out and never boot.

In hindsight this is too strict. If we keep proceeding, despite not
disabling the extension in the KVM vcpu, we'll not add the extension in
the riscv,isa. The guest kernel will be unaware of the extension, i.e.
it doesn't matter if the KVM vcpu has it enabled underneath or not. So
it's ok to keep booting in this case.

Change our current logic to not error out if we fail to disable an
extension in kvm_set_one_reg(), but show a warning and keep booting. It
is important to throw a warning because we must make the user aware that
the extension is still available in the vcpu, meaning that an
ill-behaved guest can ignore the riscv,isa settings and  use the
extension.

The case we're handling happens with an EINVAL error code. If we fail to
disable the extension in KVM for any other reason, error out.

We'll also keep erroring out when we fail to enable an extension in KVM,
since adding the extension in riscv,isa at this point will cause a guest
malfunction because the extension isn't enabled in the vcpu.

Suggested-by: Andrew Jones 
Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Andrew Jones 
Cc: qemu-stable 
Message-ID: <20240422171425.333037-2-dbarb...@ventanamicro.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/kvm/kvm-cpu.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index d2491d84e2..473416649f 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -433,10 +433,14 @@ static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU 
*cpu, CPUState *cs)
 reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
 ret = kvm_set_one_reg(cs, id, ®);
 if (ret != 0) {
-error_report("Unable to %s extension %s in KVM, error %d",
- reg ? "enable" : "disable",
- multi_ext_cfg->name, ret);
-exit(EXIT_FAILURE);
+if (!reg && ret == -EINVAL) {
+warn_report("KVM cannot disable extension %s",
+multi_ext_cfg->name);
+} else {
+error_report("Unable to enable extension %s in KVM, error %d",
+ multi_ext_cfg->name, ret);
+exit(EXIT_FAILURE);
+}
 }
 }
 }
-- 
2.45.1

[PULL 16/28] riscv: thead: Add th.sxstatus CSR emulation

2024-05-27 Thread Alistair Francis

From: Christoph Müllner 

The th.sxstatus CSR can be used to identify available custom extension
on T-Head CPUs. The CSR is documented here:
  
https://github.com/T-head-Semi/thead-extension-spec/blob/master/xtheadsxstatus.adoc

An important property of this patch is, that the th.sxstatus MAEE field
is not set (indicating that XTheadMae is not available).
XTheadMae is a memory attribute extension (similar to Svpbmt) which is
implemented in many T-Head CPUs (C906, C910, etc.) and utilizes bits
in PTEs that are marked as reserved. QEMU maintainers prefer to not
implement XTheadMae, so we need give kernels a mechanism to identify
if XTheadMae is available in a system or not. And this patch introduces
this mechanism in QEMU in a way that's compatible with real HW
(i.e., probing the th.sxstatus.MAEE bit).

Further context can be found on the list:
https://lists.gnu.org/archive/html/qemu-devel/2024-02/msg00775.html

Reviewed-by: LIU Zhiwei 
Reviewed-by: Alistair Francis 
Signed-off-by: Christoph Müllner 
Message-ID: <20240429073656.2486732-1-christoph.muell...@vrull.eu>
Signed-off-by: Alistair Francis 
---
 MAINTAINERS  |  1 +
 target/riscv/cpu.h   |  3 ++
 target/riscv/cpu.c   |  1 +
 target/riscv/th_csr.c| 79 
 target/riscv/meson.build |  1 +
 5 files changed, 85 insertions(+)
 create mode 100644 target/riscv/th_csr.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 448dc951c5..e9d861e8ef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -343,6 +343,7 @@ L: qemu-ri...@nongnu.org
 S: Supported
 F: target/riscv/insn_trans/trans_xthead.c.inc
 F: target/riscv/xthead*.decode
+F: target/riscv/th_*
 F: disas/riscv-xthead*
 
 RISC-V XVentanaCondOps extension
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 04ab0f153a..12d8b5344a 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -826,4 +826,7 @@ target_ulong riscv_new_csr_seed(target_ulong new_value,
 uint8_t satp_mode_max_from_map(uint32_t map);
 const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit);
 
+/* Implemented in th_csr.c */
+void th_register_custom_csrs(RISCVCPU *cpu);
+
 #endif /* RISCV_CPU_H */
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index abeb50369c..2946ac298a 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -547,6 +547,7 @@ static void rv64_thead_c906_cpu_init(Object *obj)
 cpu->cfg.mvendorid = THEAD_VENDOR_ID;
 #ifndef CONFIG_USER_ONLY
 set_satp_mode_max_supported(cpu, VM_1_10_SV39);
+th_register_custom_csrs(cpu);
 #endif
 
 /* inherited from parent obj via riscv_cpu_init() */
diff --git a/target/riscv/th_csr.c b/target/riscv/th_csr.c
new file mode 100644
index 00..6c970d4e81
--- /dev/null
+++ b/target/riscv/th_csr.c
@@ -0,0 +1,79 @@
+/*
+ * T-Head-specific CSRs.
+ *
+ * Copyright (c) 2024 VRULL GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "cpu_vendorid.h"
+
+#define CSR_TH_SXSTATUS 0x5c0
+
+/* TH_SXSTATUS bits */
+#define TH_SXSTATUS_UCMEBIT(16)
+#define TH_SXSTATUS_MAEEBIT(21)
+#define TH_SXSTATUS_THEADISAEE  BIT(22)
+
+typedef struct {
+int csrno;
+int (*insertion_test)(RISCVCPU *cpu);
+riscv_csr_operations csr_ops;
+} riscv_csr;
+
+static RISCVException smode(CPURISCVState *env, int csrno)
+{
+if (riscv_has_ext(env, RVS)) {
+return RISCV_EXCP_NONE;
+}
+
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+static int test_thead_mvendorid(RISCVCPU *cpu)
+{
+if (cpu->cfg.mvendorid != THEAD_VENDOR_ID) {
+return -1;
+}
+
+return 0;
+}
+
+static RISCVException read_th_sxstatus(CPURISCVState *env, int csrno,
+   target_ulong *val)
+{
+/* We don't set MAEE here, because QEMU does not implement MAEE. */
+*val = TH_SXSTATUS_UCME | TH_SXSTATUS_THEADISAEE;
+return RISCV_EXCP_NONE;
+}
+
+static riscv_csr th_csr_list[] = {
+{
+.csrno = CSR_TH_SXSTATUS,
+.insertion_test = test_thead_mvendorid,
+.csr_ops = { "th.sxstatus", smode, read_th_sxstatus }
+}
+};
+
+void th_register_custom_csrs(RISCVCPU *cpu)
+{
+for (size_t i = 0; i < ARRAY_SIZE(th_csr_list); i++) {
+int csrno = th_csr_list[i].csrno;
+riscv_csr_operations *csr_ops = &th_csr_list[i].csr_ops;
+if (!th_csr_list[i].insertion_test(cpu)) {
+riscv_set_csr_ops(csrno, csr_ops);
+}

[PULL 18/28] target/riscv: rvv: Check single width operator for vector fp widen instructions

2024-05-27 Thread Alistair Francis

From: Max Chou 

The require_scale_rvf function only checks the double width operator for
the vector floating point widen instructions, so most of the widen
checking functions need to add require_rvf for single width operator.

The vfwcvt.f.x.v and vfwcvt.f.xu.v instructions convert single width
integer to double width float, so the opfxv_widen_check function doesn’t
need require_rvf for the single width operator(integer).

Signed-off-by: Max Chou 
Reviewed-by: Daniel Henrique Barboza 
Cc: qemu-stable 
Message-ID: <20240322092600.1198921-3-max.c...@sifive.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 678b34b759..a7217aed4e 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2331,6 +2331,7 @@ GEN_OPFVF_TRANS(vfrsub_vf,  opfvf_check)
 static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a)
 {
 return require_rvv(s) &&
+   require_rvf(s) &&
require_scale_rvf(s) &&
(s->sew != MO_8) &&
vext_check_isa_ill(s) &&
@@ -2370,6 +2371,7 @@ GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check)
 static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a)
 {
 return require_rvv(s) &&
+   require_rvf(s) &&
require_scale_rvf(s) &&
(s->sew != MO_8) &&
vext_check_isa_ill(s) &&
@@ -2402,6 +2404,7 @@ GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
 static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
 {
 return require_rvv(s) &&
+   require_rvf(s) &&
require_scale_rvf(s) &&
(s->sew != MO_8) &&
vext_check_isa_ill(s) &&
@@ -2441,6 +2444,7 @@ GEN_OPFWV_WIDEN_TRANS(vfwsub_wv)
 static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a)
 {
 return require_rvv(s) &&
+   require_rvf(s) &&
require_scale_rvf(s) &&
(s->sew != MO_8) &&
vext_check_isa_ill(s) &&
@@ -2941,6 +2945,7 @@ GEN_OPFVV_TRANS(vfredmin_vs, freduction_check)
 static bool freduction_widen_check(DisasContext *s, arg_rmrr *a)
 {
 return reduction_widen_check(s, a) &&
+   require_rvf(s) &&
require_scale_rvf(s) &&
(s->sew != MO_8);
 }
-- 
2.45.1

[PULL 08/28] target/riscv/debug: set tval=pc in breakpoint exceptions

2024-05-27 Thread Alistair Francis

From: Daniel Henrique Barboza 

We're not setting (s/m)tval when triggering breakpoints of type 2
(mcontrol) and 6 (mcontrol6). According to the debug spec section
5.7.12, "Match Control Type 6":

"The Privileged Spec says that breakpoint exceptions that occur on
instruction fetches, loads, or stores update the tval CSR with either
zero or the faulting virtual address. The faulting virtual address for
an mcontrol6 trigger with action = 0 is the address being accessed and
which caused that trigger to fire."

A similar text is also found in the Debug spec section 5.7.11 w.r.t.
mcontrol.

Note that what we're doing ATM is not violating the spec, but it's
simple enough to set mtval/stval and it makes life easier for any
software that relies on this info.

Given that we always use action = 0, save the faulting address for the
mcontrol and mcontrol6 trigger breakpoints into env->badaddr, which is
used as as scratch area for traps with address information. 'tval' is
then set during riscv_cpu_do_interrupt().

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Alistair Francis 
Reviewed-by: LIU Zhiwei 
Message-ID: <20240416230437.1869024-2-dbarb...@ventanamicro.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu_helper.c | 1 +
 target/riscv/debug.c  | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 8ad546a45a..179cf3d1a1 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1718,6 +1718,7 @@ void riscv_cpu_do_interrupt(CPUState *cs)
 tval = env->bins;
 break;
 case RISCV_EXCP_BREAKPOINT:
+tval = env->badaddr;
 if (cs->watchpoint_hit) {
 tval = cs->watchpoint_hit->hitaddr;
 cs->watchpoint_hit = NULL;
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index e30d99cc2f..b110370ea6 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -798,6 +798,7 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs)
 if ((ctrl & TYPE2_EXEC) && (bp->pc == pc)) {
 /* check U/S/M bit against current privilege level */
 if ((ctrl >> 3) & BIT(env->priv)) {
+env->badaddr = pc;
 return true;
 }
 }
@@ -810,11 +811,13 @@ bool riscv_cpu_debug_check_breakpoint(CPUState *cs)
 if (env->virt_enabled) {
 /* check VU/VS bit against current privilege level */
 if ((ctrl >> 23) & BIT(env->priv)) {
+env->badaddr = pc;
 return true;
 }
 } else {
 /* check U/S/M bit against current privilege level */
 if ((ctrl >> 3) & BIT(env->priv)) {
+env->badaddr = pc;
 return true;
 }
 }
-- 
2.45.1

[PULL 09/28] trans_privileged.c.inc: set (m|s)tval on ebreak breakpoint

2024-05-27 Thread Alistair Francis

From: Daniel Henrique Barboza 

Privileged spec section 4.1.9 mentions:

"When a trap is taken into S-mode, stval is written with
exception-specific information to assist software in handling the trap.
(...)

If stval is written with a nonzero value when a breakpoint,
address-misaligned, access-fault, or page-fault exception occurs on an
instruction fetch, load, or store, then stval will contain the faulting
virtual address."

A similar text is found for mtval in section 3.1.16.

Setting mtval/stval in this scenario is optional, but some softwares read
these regs when handling ebreaks.

Write 'badaddr' in all ebreak breakpoints to write the appropriate
'tval' during riscv_do_cpu_interrrupt().

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Alistair Francis 
Reviewed-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Message-ID: <20240416230437.1869024-3-dbarb...@ventanamicro.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_privileged.c.inc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/riscv/insn_trans/trans_privileged.c.inc 
b/target/riscv/insn_trans/trans_privileged.c.inc
index 620ab54eb0..bc5263a4e0 100644
--- a/target/riscv/insn_trans/trans_privileged.c.inc
+++ b/target/riscv/insn_trans/trans_privileged.c.inc
@@ -62,6 +62,8 @@ static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
 if (pre == 0x01f01013 && ebreak == 0x00100073 && post == 0x40705013) {
 generate_exception(ctx, RISCV_EXCP_SEMIHOST);
 } else {
+tcg_gen_st_tl(tcg_constant_tl(ebreak_addr), tcg_env,
+  offsetof(CPURISCVState, badaddr));
 generate_exception(ctx, RISCV_EXCP_BREAKPOINT);
 }
 return true;
-- 
2.45.1

[PULL 02/28] target/riscv/kvm: Fix exposure of Zkr

2024-05-27 Thread Alistair Francis

From: Andrew Jones 

The Zkr extension may only be exposed to KVM guests if the VMM
implements the SEED CSR. Use the same implementation as TCG.

Without this patch, running with a KVM which does not forward the
SEED CSR access to QEMU will result in an ILL exception being
injected into the guest (this results in Linux guests crashing on
boot). And, when running with a KVM which does forward the access,
QEMU will crash, since QEMU doesn't know what to do with the exit.

Fixes: 3108e2f1c69d ("target/riscv/kvm: update KVM exts to Linux 6.8")
Signed-off-by: Andrew Jones 
Reviewed-by: Daniel Henrique Barboza 
Cc: qemu-stable 
Message-ID: <20240422134605.534207-2-ajo...@ventanamicro.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/cpu.h |  3 +++
 target/riscv/csr.c | 18 ++
 target/riscv/kvm/kvm-cpu.c | 25 +
 3 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 2d0c02c35b..746efd099a 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -819,6 +819,9 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations 
*ops);
 
 void riscv_cpu_register_gdb_regs_for_features(CPUState *cs);
 
+target_ulong riscv_new_csr_seed(target_ulong new_value,
+target_ulong write_mask);
+
 uint8_t satp_mode_max_from_map(uint32_t map);
 const char *satp_mode_str(uint8_t satp_mode, bool is_32_bit);
 
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 726096444f..829d8346ed 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -4267,10 +4267,8 @@ static RISCVException write_upmbase(CPURISCVState *env, 
int csrno,
 #endif
 
 /* Crypto Extension */
-static RISCVException rmw_seed(CPURISCVState *env, int csrno,
-   target_ulong *ret_value,
-   target_ulong new_value,
-   target_ulong write_mask)
+target_ulong riscv_new_csr_seed(target_ulong new_value,
+target_ulong write_mask)
 {
 uint16_t random_v;
 Error *random_e = NULL;
@@ -4294,6 +4292,18 @@ static RISCVException rmw_seed(CPURISCVState *env, int 
csrno,
 rval = random_v | SEED_OPST_ES16;
 }
 
+return rval;
+}
+
+static RISCVException rmw_seed(CPURISCVState *env, int csrno,
+   target_ulong *ret_value,
+   target_ulong new_value,
+   target_ulong write_mask)
+{
+target_ulong rval;
+
+rval = riscv_new_csr_seed(new_value, write_mask);
+
 if (ret_value) {
 *ret_value = rval;
 }
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index eaa36121c7..b8136c7ef8 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1418,6 +1418,28 @@ static int kvm_riscv_handle_sbi(CPUState *cs, struct 
kvm_run *run)
 return ret;
 }
 
+static int kvm_riscv_handle_csr(CPUState *cs, struct kvm_run *run)
+{
+target_ulong csr_num = run->riscv_csr.csr_num;
+target_ulong new_value = run->riscv_csr.new_value;
+target_ulong write_mask = run->riscv_csr.write_mask;
+int ret = 0;
+
+switch (csr_num) {
+case CSR_SEED:
+run->riscv_csr.ret_value = riscv_new_csr_seed(new_value, write_mask);
+break;
+default:
+qemu_log_mask(LOG_UNIMP,
+  "%s: un-handled CSR EXIT for CSR %lx\n",
+  __func__, csr_num);
+ret = -1;
+break;
+}
+
+return ret;
+}
+
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
 int ret = 0;
@@ -1425,6 +1447,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 case KVM_EXIT_RISCV_SBI:
 ret = kvm_riscv_handle_sbi(cs, run);
 break;
+case KVM_EXIT_RISCV_CSR:
+ret = kvm_riscv_handle_csr(cs, run);
+break;
 default:
 qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
   __func__, run->exit_reason);
-- 
2.45.1

[PULL 03/28] target/riscv: Raise exceptions on wrs.nto

2024-05-27 Thread Alistair Francis

From: Andrew Jones 

Implementing wrs.nto to always just return is consistent with the
specification, as the instruction is permitted to terminate the
stall for any reason, but it's not useful for virtualization, where
we'd like the guest to trap to the hypervisor in order to allow
scheduling of the lock holding VCPU. Change to always immediately
raise exceptions when the appropriate conditions are present,
otherwise continue to just return. Note, immediately raising
exceptions is also consistent with the specification since the
time limit that should expire prior to the exception is
implementation-specific.

Signed-off-by: Andrew Jones 
Reviewed-by: Christoph Müllner 
Reviewed-by: Daniel Henrique Barboza 
Reviewed-by: Alistair Francis 
Message-ID: <20240424142808.62936-2-ajo...@ventanamicro.com>
Signed-off-by: Alistair Francis 
---
 target/riscv/helper.h   |  1 +
 target/riscv/op_helper.c| 11 
 target/riscv/insn_trans/trans_rvzawrs.c.inc | 29 ++---
 3 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 8a63523851..451261ce5a 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -132,6 +132,7 @@ DEF_HELPER_6(csrrw_i128, tl, env, int, tl, tl, tl, tl)
 DEF_HELPER_1(sret, tl, env)
 DEF_HELPER_1(mret, tl, env)
 DEF_HELPER_1(wfi, void, env)
+DEF_HELPER_1(wrs_nto, void, env)
 DEF_HELPER_1(tlb_flush, void, env)
 DEF_HELPER_1(tlb_flush_all, void, env)
 /* Native Debug */
diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
index f414aaebdb..2baf5bc3ca 100644
--- a/target/riscv/op_helper.c
+++ b/target/riscv/op_helper.c
@@ -380,6 +380,17 @@ void helper_wfi(CPURISCVState *env)
 }
 }
 
+void helper_wrs_nto(CPURISCVState *env)
+{
+if (env->virt_enabled && (env->priv == PRV_S || env->priv == PRV_U) &&
+get_field(env->hstatus, HSTATUS_VTW) &&
+!get_field(env->mstatus, MSTATUS_TW)) {
+riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, GETPC());
+} else if (env->priv != PRV_M && get_field(env->mstatus, MSTATUS_TW)) {
+riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC());
+}
+}
+
 void helper_tlb_flush(CPURISCVState *env)
 {
 CPUState *cs = env_cpu(env);
diff --git a/target/riscv/insn_trans/trans_rvzawrs.c.inc 
b/target/riscv/insn_trans/trans_rvzawrs.c.inc
index 32efbff4d5..0eef033838 100644
--- a/target/riscv/insn_trans/trans_rvzawrs.c.inc
+++ b/target/riscv/insn_trans/trans_rvzawrs.c.inc
@@ -16,7 +16,7 @@
  * this program.  If not, see .
  */
 
-static bool trans_wrs(DisasContext *ctx)
+static bool trans_wrs_sto(DisasContext *ctx, arg_wrs_sto *a)
 {
 if (!ctx->cfg_ptr->ext_zawrs) {
 return false;
@@ -40,12 +40,23 @@ static bool trans_wrs(DisasContext *ctx)
 return true;
 }
 
-#define GEN_TRANS_WRS(insn) \
-static bool trans_ ## insn(DisasContext *ctx, arg_ ## insn *a)  \
-{   \
-(void)a;\
-return trans_wrs(ctx);  \
-}
+static bool trans_wrs_nto(DisasContext *ctx, arg_wrs_nto *a)
+{
+if (!ctx->cfg_ptr->ext_zawrs) {
+return false;
+}
 
-GEN_TRANS_WRS(wrs_nto)
-GEN_TRANS_WRS(wrs_sto)
+/*
+ * Depending on the mode of execution, mstatus.TW and hstatus.VTW, wrs.nto
+ * should raise an exception when the implementation-specific bounded time
+ * limit has expired. Our time limit is zero, so we either return
+ * immediately, as does our implementation of wrs.sto, or raise an
+ * exception, as handled by the wrs.nto helper.
+ */
+#ifndef CONFIG_USER_ONLY
+gen_helper_wrs_nto(tcg_env);
+#endif
+
+/* We only get here when helper_wrs_nto() doesn't raise an exception. */
+return trans_wrs_sto(ctx, NULL);
+}
-- 
2.45.1

[PULL 01/28] hw/intc/riscv_aplic: APLICs should add child earlier than realize

2024-05-27 Thread Alistair Francis

From: "yang.zhang" 

Since only root APLICs can have hw IRQ lines, aplic->parent should
be initialized first.

Fixes: e8f79343cf ("hw/intc: Add RISC-V AIA APLIC device emulation")
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: yang.zhang 
Cc: qemu-stable 
Message-ID: <20240409014445.278-1-gaoshanliu...@163.com>
Signed-off-by: Alistair Francis 
---
 hw/intc/riscv_aplic.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index fc5df0d598..32edd6d07b 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -1000,16 +1000,16 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr 
size,
 qdev_prop_set_bit(dev, "msimode", msimode);
 qdev_prop_set_bit(dev, "mmode", mmode);
 
+if (parent) {
+riscv_aplic_add_child(parent, dev);
+}
+
 sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
 
 if (!is_kvm_aia(msimode)) {
 sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
 }
 
-if (parent) {
-riscv_aplic_add_child(parent, dev);
-}
-
 if (!msimode) {
 for (i = 0; i < num_harts; i++) {
 CPUState *cpu = cpu_by_arch_id(hartid_base + i);
-- 
2.45.1

[PULL 00/28] riscv-to-apply queue

2024-05-27 Thread Alistair Francis

The following changes since commit ad10b4badc1dd5b28305f9b9f1168cf0aa3ae946:

  Merge tag 'pull-error-2024-05-27' of https://repo.or.cz/qemu/armbru into 
staging (2024-05-27 06:40:42 -0700)

are available in the Git repository at:

  https://github.com/alistair23/qemu.git tags/pull-riscv-to-apply-20240528

for you to fetch changes up to 1806da76cb81088ea026ca3441551782b850e393:

  target/riscv: raise an exception when CSRRS/CSRRC writes a read-only CSR 
(2024-05-28 12:20:27 +1000)


RISC-V PR for 9.1

* APLICs add child earlier than realize
* Fix exposure of Zkr
* Raise exceptions on wrs.nto
* Implement SBI debug console (DBCN) calls for KVM
* Support 64-bit addresses for initrd
* Change RISCV_EXCP_SEMIHOST exception number to 63
* Tolerate KVM disable ext errors
* Set tval in breakpoints
* Add support for Zve32x extension
* Add support for Zve64x extension
* Relax vector register check in RISCV gdbstub
* Fix the element agnostic Vector function problem
* Fix Zvkb extension config
* Implement dynamic establishment of custom decoder
* Add th.sxstatus CSR emulation
* Fix Zvfhmin checking for vfwcvt.f.f.v and vfncvt.f.f.w instructions
* Check single width operator for vector fp widen instructions
* Check single width operator for vfncvt.rod.f.f.w
* Remove redudant SEW checking for vector fp narrow/widen instructions
* Prioritize pmp errors in raise_mmu_exception()
* Do not set mtval2 for non guest-page faults
* Remove experimental prefix from "B" extension
* Fixup CBO extension register calculation
* Fix the hart bit setting of AIA
* Fix reg_width in ricsv_gen_dynamic_vector_feature()
* Decode all of the pmpcfg and pmpaddr CSRs
* Raise an exception when CSRRS/CSRRC writes a read-only CSR


Alexei Filippov (1):
  target/riscv: do not set mtval2 for non guest-page faults

Alistair Francis (2):
  target/riscv: rvzicbo: Fixup CBO extension register calculation
  disas/riscv: Decode all of the pmpcfg and pmpaddr CSRs

Andrew Jones (2):
  target/riscv/kvm: Fix exposure of Zkr
  target/riscv: Raise exceptions on wrs.nto

Cheng Yang (1):
  hw/riscv/boot.c: Support 64-bit address for initrd

Christoph Müllner (1):
  riscv: thead: Add th.sxstatus CSR emulation

Clément Léger (1):
  target/riscv: change RISCV_EXCP_SEMIHOST exception number to 63

Daniel Henrique Barboza (6):
  target/riscv/kvm: implement SBI debug console (DBCN) calls
  target/riscv/kvm: tolerate KVM disable ext errors
  target/riscv/debug: set tval=pc in breakpoint exceptions
  trans_privileged.c.inc: set (m|s)tval on ebreak breakpoint
  target/riscv: prioritize pmp errors in raise_mmu_exception()
  riscv, gdbstub.c: fix reg_width in ricsv_gen_dynamic_vector_feature()

Huang Tao (2):
  target/riscv: Fix the element agnostic function problem
  target/riscv: Implement dynamic establishment of custom decoder

Jason Chien (3):
  target/riscv: Add support for Zve32x extension
  target/riscv: Add support for Zve64x extension
  target/riscv: Relax vector register check in RISCV gdbstub

Max Chou (4):
  target/riscv: rvv: Fix Zvfhmin checking for vfwcvt.f.f.v and vfncvt.f.f.w 
instructions
  target/riscv: rvv: Check single width operator for vector fp widen 
instructions
  target/riscv: rvv: Check single width operator for vfncvt.rod.f.f.w
  target/riscv: rvv: Remove redudant SEW checking for vector fp 
narrow/widen instructions

Rob Bradford (1):
  target/riscv: Remove experimental prefix from "B" extension

Yangyu Chen (1):
  target/riscv/cpu.c: fix Zvkb extension config

Yong-Xuan Wang (1):
  target/riscv/kvm.c: Fix the hart bit setting of AIA

Yu-Ming Chang (1):
  target/riscv: raise an exception when CSRRS/CSRRC writes a read-only CSR

yang.zhang (1):
  hw/intc/riscv_aplic: APLICs should add child earlier than realize

 MAINTAINERS|   1 +
 target/riscv/cpu.h |  11 ++
 target/riscv/cpu_bits.h|   2 +-
 target/riscv/cpu_cfg.h |   2 +
 target/riscv/helper.h  |   1 +
 target/riscv/sbi_ecall_interface.h |  17 +++
 target/riscv/tcg/tcg-cpu.h |  15 +++
 disas/riscv.c  |  65 +-
 hw/intc/riscv_aplic.c  |   8 +-
 hw/riscv/boot.c|   4 +-
 target/riscv/cpu.c |  10 +-
 target/riscv/cpu_helper.c  |  37 +++---
 target/riscv/csr.c |  71 +--
 target/riscv/debug.c   |   3 +
 target/riscv/gdbstub.c |   8 +-
 target/riscv/kvm/kvm-cpu.c | 157 -
 target/riscv/op_helper.c   |  17 ++-
 target/

[PATCH v2 2/2] hw/ufs: Add support MCQ of UFSHCI 4.0

2024-05-27 Thread Minwoo Im

This patch adds support for MCQ defined in UFSHCI 4.0.  This patch
utilized the legacy I/O codes as much as possible to support MCQ.

MCQ operation & runtime register is placed at 0x1000 offset of UFSHCI
register statically with no spare space among four registers (48B):

UfsMcqSqReg, UfsMcqSqIntReg, UfsMcqCqReg, UfsMcqCqIntReg

The maxinum number of queue is 32 as per spec, and the default
MAC(Multiple Active Commands) are 32 in the device.

Example:
-device ufs,serial=foo,id=ufs0,mcq=true,mcq-maxq=8

Signed-off-by: Minwoo Im 
---
 hw/ufs/trace-events |  17 ++
 hw/ufs/ufs.c| 478 ++--
 hw/ufs/ufs.h|  98 -
 include/block/ufs.h |  23 ++-
 4 files changed, 596 insertions(+), 20 deletions(-)

diff --git a/hw/ufs/trace-events b/hw/ufs/trace-events
index 665e1a942b..dda7f8a2e5 100644
--- a/hw/ufs/trace-events
+++ b/hw/ufs/trace-events
@@ -11,13 +11,18 @@ ufs_exec_nop_cmd(uint32_t slot) "UTRLDBR slot %"PRIu32""
 ufs_exec_scsi_cmd(uint32_t slot, uint8_t lun, uint8_t opcode) "slot %"PRIu32", 
lun 0x%"PRIx8", opcode 0x%"PRIx8""
 ufs_exec_query_cmd(uint32_t slot, uint8_t opcode) "slot %"PRIu32", opcode 
0x%"PRIx8""
 ufs_process_uiccmd(uint32_t uiccmd, uint32_t ucmdarg1, uint32_t ucmdarg2, 
uint32_t ucmdarg3) "uiccmd 0x%"PRIx32", ucmdarg1 0x%"PRIx32", ucmdarg2 
0x%"PRIx32", ucmdarg3 0x%"PRIx32""
+ufs_mcq_complete_req(uint8_t qid) "sqid %"PRIu8""
+ufs_mcq_create_sq(uint8_t sqid, uint8_t cqid, uint64_t addr, uint16_t size) 
"mcq create sq sqid %"PRIu8", cqid %"PRIu8", addr 0x%"PRIx64", size %"PRIu16""
+ufs_mcq_create_cq(uint8_t cqid, uint64_t addr, uint16_t size) "mcq create cq 
cqid %"PRIu8", addr 0x%"PRIx64", size %"PRIu16""
 
 # error condition
 ufs_err_dma_read_utrd(uint32_t slot, uint64_t addr) "failed to read utrd. 
UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
 ufs_err_dma_read_req_upiu(uint32_t slot, uint64_t addr) "failed to read req 
upiu. UTRLDBR slot %"PRIu32", request upiu addr %"PRIu64""
 ufs_err_dma_read_prdt(uint32_t slot, uint64_t addr) "failed to read prdt. 
UTRLDBR slot %"PRIu32", prdt addr %"PRIu64""
+ufs_err_dma_read_sq(uint8_t qid, uint64_t addr) "failed to read sqe. SQ qid 
%"PRIu8", sqe addr %"PRIu64""
 ufs_err_dma_write_utrd(uint32_t slot, uint64_t addr) "failed to write utrd. 
UTRLDBR slot %"PRIu32", UTRD dma addr %"PRIu64""
 ufs_err_dma_write_rsp_upiu(uint32_t slot, uint64_t addr) "failed to write rsp 
upiu. UTRLDBR slot %"PRIu32", response upiu addr %"PRIu64""
+ufs_err_dma_write_cq(uint32_t cqid, uint64_t addr) "failed to write cq entry. 
cqid %"PRIu8", hwaddr %"PRIu64""
 ufs_err_utrl_slot_error(uint32_t slot) "UTRLDBR slot %"PRIu32" is in error"
 ufs_err_utrl_slot_busy(uint32_t slot) "UTRLDBR slot %"PRIu32" is busy"
 ufs_err_unsupport_register_offset(uint32_t offset) "Register offset 
0x%"PRIx32" is not yet supported"
@@ -31,3 +36,15 @@ ufs_err_query_invalid_opcode(uint8_t opcode) "query request 
has invalid opcode.
 ufs_err_query_invalid_idn(uint8_t opcode, uint8_t idn) "query request has 
invalid idn. opcode: 0x%"PRIx8", idn 0x%"PRIx8""
 ufs_err_query_invalid_index(uint8_t opcode, uint8_t index) "query request has 
invalid index. opcode: 0x%"PRIx8", index 0x%"PRIx8""
 ufs_err_invalid_trans_code(uint32_t slot, uint8_t trans_code) "request upiu 
has invalid transaction code. slot: %"PRIu32", trans_code: 0x%"PRIx8""
+ufs_err_mcq_db_wr_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8""
+ufs_err_mcq_db_wr_invalid_db(uint8_t qid, uint32_t db) "invalid mcq doorbell 
sqid %"PRIu8", db %"PRIu32""
+ufs_err_mcq_create_sq_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8""
+ufs_err_mcq_create_sq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8""
+ufs_err_mcq_create_sq_already_exists(uint8_t qid) "mcq sqid %"PRIu8 "already 
exists"
+ufs_err_mcq_delete_sq_invalid_sqid(uint8_t qid) "invalid mcq sqid %"PRIu8""
+ufs_err_mcq_delete_sq_not_exists(uint8_t qid) "mcq sqid %"PRIu8 "not exists"
+ufs_err_mcq_create_cq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8""
+ufs_err_mcq_create_cq_already_exists(uint8_t qid) "mcq cqid %"PRIu8 "already 
exists"
+ufs_err_mcq_delete_cq_invalid_cqid(uint8_t qid) "invalid mcq cqid %"PRIu8""
+ufs_err_mcq_delete_cq_not_exists(uint8_t qid) "mcq cqid %"PRIu8 "not exists"
+ufs_err_mcq_delete_cq_sq_not_deleted(uint8_t sqid, uint8_t cqid) "mcq sq 
%"PRIu8" still has cq %"PRIu8""
diff --git a/hw/ufs/ufs.c b/hw/ufs/ufs.c
index bac78a32bb..45700ca30b 100644
--- a/hw/ufs/ufs.c
+++ b/hw/ufs/ufs.c
@@ -9,7 +9,7 @@
  */
 
 /**
- * Reference Specs: https://www.jedec.org/, 3.1
+ * Reference Specs: https://www.jedec.org/, 4.0
  *
  * Usage
  * -
@@ -28,10 +28,45 @@
 #include "trace.h"
 #include "ufs.h"
 
-/* The QEMU-UFS device follows spec version 3.1 */
-#define UFS_SPEC_VER 0x0310
+/* The QEMU-UFS device follows spec version 4.0 */
+#define UFS_SPEC_VER 0x0400
 #define UFS_MAX_NUTRS 32
 #define UFS_MAX_NUTMRS 8
+#define UFS_MCQ_QCFGPTR 2
+
+static void ufs_exec_req(UfsRequest *req);
+static voi

[PATCH v2 1/2] hw/ufs: Update MCQ-related fields to block/ufs.h

2024-05-27 Thread Minwoo Im

This patch is a prep patch for the following MCQ support patch for
hw/ufs.  This patch updated minimal mandatory fields to support MCQ
based on UFSHCI 4.0.

Signed-off-by: Minwoo Im 
---
 include/block/ufs.h | 108 +++-
 1 file changed, 106 insertions(+), 2 deletions(-)

diff --git a/include/block/ufs.h b/include/block/ufs.h
index d61598b8f3..3513b6e772 100644
--- a/include/block/ufs.h
+++ b/include/block/ufs.h
@@ -7,7 +7,7 @@
 
 typedef struct QEMU_PACKED UfsReg {
 uint32_t cap;
-uint32_t rsvd0;
+uint32_t mcqcap;
 uint32_t ver;
 uint32_t rsvd1;
 uint32_t hcpid;
@@ -46,6 +46,13 @@ typedef struct QEMU_PACKED UfsReg {
 uint32_t rsvd7[4];
 uint32_t rsvd8[16];
 uint32_t ccap;
+uint32_t rsvd9[127];
+uint32_t config;
+uint32_t rsvd10[3];
+uint32_t rsvd11[28];
+uint32_t mcqconfig;
+uint32_t esilba;
+uint32_t esiuba;
 } UfsReg;
 
 REG32(CAP, offsetof(UfsReg, cap))
@@ -57,6 +64,15 @@ REG32(CAP, offsetof(UfsReg, cap))
 FIELD(CAP, OODDS, 25, 1)
 FIELD(CAP, UICDMETMS, 26, 1)
 FIELD(CAP, CS, 28, 1)
+FIELD(CAP, LSDBS, 29, 1)
+FIELD(CAP, MCQS, 30, 1)
+REG32(MCQCAP, offsetof(UfsReg, mcqcap))
+FIELD(MCQCAP, MAXQ, 0, 8)
+FIELD(MCQCAP, SP, 8, 1)
+FIELD(MCQCAP, RRP, 9, 1)
+FIELD(MCQCAP, EIS, 10, 1)
+FIELD(MCQCAP, QCFGPTR, 16, 8)
+FIELD(MCQCAP, MIAG, 24, 8)
 REG32(VER, offsetof(UfsReg, ver))
 REG32(HCPID, offsetof(UfsReg, hcpid))
 REG32(HCMID, offsetof(UfsReg, hcmid))
@@ -78,6 +94,7 @@ REG32(IS, offsetof(UfsReg, is))
 FIELD(IS, HCFES, 16, 1)
 FIELD(IS, SBFES, 17, 1)
 FIELD(IS, CEFES, 18, 1)
+FIELD(IS, CQES, 20, 1)
 REG32(IE, offsetof(UfsReg, ie))
 FIELD(IE, UTRCE, 0, 1)
 FIELD(IE, UDEPRIE, 1, 1)
@@ -95,6 +112,7 @@ REG32(IE, offsetof(UfsReg, ie))
 FIELD(IE, HCFEE, 16, 1)
 FIELD(IE, SBFEE, 17, 1)
 FIELD(IE, CEFEE, 18, 1)
+FIELD(IE, CQEE, 20, 1)
 REG32(HCS, offsetof(UfsReg, hcs))
 FIELD(HCS, DP, 0, 1)
 FIELD(HCS, UTRLRDY, 1, 1)
@@ -128,6 +146,10 @@ REG32(UCMDARG1, offsetof(UfsReg, ucmdarg1))
 REG32(UCMDARG2, offsetof(UfsReg, ucmdarg2))
 REG32(UCMDARG3, offsetof(UfsReg, ucmdarg3))
 REG32(CCAP, offsetof(UfsReg, ccap))
+REG32(CONFIG, offsetof(UfsReg, config))
+FIELD(CONFIG, QT, 0, 1)
+REG32(MCQCONFIG, offsetof(UfsReg, mcqconfig))
+FIELD(MCQCONFIG, MAC, 8, 8)
 
 #define UFS_INTR_MASK\
 ((1 << R_IS_CEFES_SHIFT) | (1 << R_IS_SBFES_SHIFT) | \
@@ -157,6 +179,69 @@ REG32(CCAP, offsetof(UfsReg, ccap))
 ((be32_to_cpu(dword2) >> UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_SHIFT) & \
  UFS_UPIU_HEADER_DATA_SEGMENT_LENGTH_MASK)
 
+typedef struct QEMU_PACKED UfsMcqReg {
+uint32_t sqattr;
+uint32_t sqlba;
+uint32_t squba;
+uint32_t sqdao;
+uint32_t sqisao;
+uint32_t sqcfg;
+uint32_t rsvd0[2];
+uint32_t cqattr;
+uint32_t cqlba;
+uint32_t cquba;
+uint32_t cqdao;
+uint32_t cqisao;
+uint32_t cqcfg;
+uint32_t rsvd1[2];
+} UfsMcqReg;
+
+REG32(SQATTR, offsetof(UfsMcqReg, sqattr))
+FIELD(SQATTR, SIZE, 0, 16)
+FIELD(SQATTR, CQID, 16, 8)
+FIELD(SQATTR, SQPL, 28, 3)
+FIELD(SQATTR, SQEN, 31, 1)
+REG32(SQLBA, offsetof(UfsMcqReg, sqlba))
+REG32(SQUBA, offsetof(UfsMcqReg, squba))
+REG32(SQDAO, offsetof(UfsMcqReg, sqdao))
+REG32(SQISAO, offsetof(UfsMcqReg, sqisao))
+REG32(SQCFG, offsetof(UfsMcqReg, sqcfg))
+REG32(CQATTR, offsetof(UfsMcqReg, cqattr))
+FIELD(CQATTR, SIZE, 0, 16)
+FIELD(CQATTR, CQEN, 31, 1)
+REG32(CQLBA, offsetof(UfsMcqReg, cqlba))
+REG32(CQUBA, offsetof(UfsMcqReg, cquba))
+REG32(CQDAO, offsetof(UfsMcqReg, cqdao))
+REG32(CQISAO, offsetof(UfsMcqReg, cqisao))
+REG32(CQCFG, offsetof(UfsMcqReg, cqcfg))
+
+typedef struct QEMU_PACKED UfsMcqSqReg {
+uint32_t hp;
+uint32_t tp;
+uint32_t rtc;
+uint32_t cti;
+uint32_t rts;
+} UfsMcqSqReg;
+
+typedef struct QEMU_PACKED UfsMcqCqReg {
+uint32_t hp;
+uint32_t tp;
+} UfsMcqCqReg;
+
+typedef struct QEMU_PACKED UfsMcqSqIntReg {
+uint32_t is;
+uint32_t ie;
+} UfsMcqSqIntReg;
+
+typedef struct QEMU_PACKED UfsMcqCqIntReg {
+uint32_t is;
+uint32_t ie;
+uint32_t iacr;
+} UfsMcqCqIntReg;
+
+REG32(CQIS, offsetof(UfsMcqCqIntReg, is))
+FIELD(CQIS, TEPS, 0, 1)
+
 typedef struct QEMU_PACKED DeviceDescriptor {
 uint8_t length;
 uint8_t descriptor_idn;
@@ -1064,9 +1149,26 @@ typedef struct QEMU_PACKED UtpUpiuRsp {
 };
 } UtpUpiuRsp;
 
+/*
+ * MCQ Completion Queue Entry
+ */
+typedef UtpTransferReqDesc UfsSqEntry;
+typedef struct QEMU_PACKED UfsCqEntry {
+uint64_t utp_addr;
+uint16_t resp_len;
+uint16_t resp_off;
+uint16_t prdt_len;
+uint16_t prdt_off;
+uint8_t status;
+uint8_t error;
+uint16_t rsvd1;
+uint32_t rsvd2[3];
+} UfsCqEntry;
+
 static inline void _ufs_check_size(void)
 {
-QEMU_BUILD_BUG_ON(sizeof(UfsReg) != 0x104);
+QEMU_BUILD_BUG_ON(sizeof(UfsReg) != 0x38C);
+QEMU_BUILD_BUG_ON(sizeof(UfsMc

[PATCH v2 0/2] hw/ufs: Add support MCQ

2024-05-27 Thread Minwoo Im

UFSHCI 4.0 spec introduced MCQ(Multi-Circular Queue) to support multiple
command queues for UFS controller.  To test ufs-mcq path of kernel, MCQ
emulated device would be a good choice to go with.

The first patch added newly introduced fields in UFSHCI 4.0 to support
MCQ.  The other one made the actual changes for MCQ.

v2:
  It fixed printing error event trace even in normal shutdown cases for
SQ/CQ tear-down by checking whether each SQ/CQ is valid or not.  The
default value of mcq-maxq was updated to 2 from 1 to prevent the kernel
from allocating a single queue as a poll_queue by default and to ensure
that io_queues exist to handle device commands.

Please review.

Thanks,

Minwoo Im (2):
  hw/ufs: Update MCQ-related fields to block/ufs.h
  hw/ufs: Add support MCQ of UFSHCI 4.0

 hw/ufs/trace-events |  17 ++
 hw/ufs/ufs.c| 478 ++--
 hw/ufs/ufs.h|  98 -
 include/block/ufs.h | 131 +++-
 4 files changed, 702 insertions(+), 22 deletions(-)

-- 
2.34.1

Re: [PATCH] x86: cpu: fixup number of addressable IDs for processor cores in the physical package

2024-05-27 Thread Zhao Liu

Hi Chuang,

On Mon, May 27, 2024 at 11:13:33AM +0800, Chuang Xu wrote:
> Date: Mon, 27 May 2024 11:13:33 +0800
> From: Chuang Xu 
> Subject: [PATCH] x86: cpu: fixup number of addressable IDs for processor
>  cores in the physical package

According to the usual practice of QEMU commits, people tend to use
"i386/cpu" as the subject prefix, which indicates the code path.

> X-Mailer: git-send-email 2.24.3 (Apple Git-128)
> 
> When QEMU is started with:
> -cpu host,host-cache-info=on,l3-cache=off \

Just a discussion, "l3-cache=off" doesn't work in host cache pssthu
case, do you have a specific need that you don't want to see l3 cache?

> -smp 2,sockets=1,dies=1,cores=1,threads=2
> Guest can't acquire maximum number of addressable IDs for processor cores in
> the physical package from CPUID[04H].
> 
> This bug was introduced in commit d7caf13b5fcf742e5680c1d3448ba070fc811644.
> Fix it by changing the judgement condition to a >= 1.

Pls add a "Fixes" tag like:

Fixes: d7caf13b5fcf ("x86: cpu: fixup number of addressable IDs for logical 
processors sharing cache")

Since this is a historical issue that deserves to be ported to the
stable branch, you can cc stable list by:

Cc: qemu-sta...@nongnu.org

> Signed-off-by: Chuang Xu 

As the patch sender, it's better to put your signature on the last line.
;-)

> Signed-off-by: Guixiong Wei 
> Signed-off-by: Yipeng Yin 
> ---
>  target/i386/cpu.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index cd16cb893d..0369c01153 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -6097,7 +6097,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
> uint32_t count,
>  if (*eax & 31) {
>  int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
>  int vcpus_per_socket = cs->nr_cores * cs->nr_threads;
> -if (cs->nr_cores > 1) {
> +if (cs->nr_cores >= 1) {

Like Igor suggested, this condition could be removed since cs->nr_cores can't
be 0.

>  *eax &= ~0xFC00;
>  *eax |= (pow2ceil(cs->nr_cores) - 1) << 26;
>  }

...the code is outdated, pls rebase on the latest master branch.

Regards,
Zhao

Re: [PATCH 2/2] hw/ufs: Add support MCQ of UFSHCI 4.0

2024-05-27 Thread Minwoo Im

On 24-05-28 10:00:35, Jeuk Kim wrote:
> Thanks for your contribution!
> 
> There are only two minor comments.

Thanks for your review.

> 
> Please check it and send patch v2.
> 
> 
> Thank you!
> 
> On 5/21/2024 8:05 PM, Minwoo Im wrote:
> > @@ -1288,12 +1717,21 @@ static void ufs_exit(PCIDevice *pci_dev)
> >   ufs_clear_req(&u->req_list[i]);
> >   }
> >   g_free(u->req_list);
> > +
> > +for (int i = 0; i < ARRAY_SIZE(u->sq); i++) {
> > +ufs_mcq_delete_sq(u, i);
> 
> Isn't it possible that trace_ufs_err_mcq_delete_cq_not_exists is printed
> even in a normal shutdown situation?
> 
> If true, please fix it so that the ufs_err log is not printed in normal
> situation.

I will make sure that the normal shut-down case will not print out the error
event trace.

> 
> > +}
> > +for (int i = 0; i < ARRAY_SIZE(u->cq); i++) {
> > +ufs_mcq_delete_cq(u, i);
> > +}
> >   }
> >   static Property ufs_props[] = {
> >   DEFINE_PROP_STRING("serial", UfsHc, params.serial),
> >   DEFINE_PROP_UINT8("nutrs", UfsHc, params.nutrs, 32),
> >   DEFINE_PROP_UINT8("nutmrs", UfsHc, params.nutmrs, 8),
> > +DEFINE_PROP_BOOL("mcq", UfsHc, params.mcq, false),
> > +DEFINE_PROP_UINT8("mcq-maxq", UfsHc, params.mcq_maxq, 1),
> Please change this value to a value greater than or equal to 2.

Oh yeah, it should be to support device command handling in MCQ mode.  I will
update it in v2.

RE: [PATCH v4 08/16] aspeed/smc: support 64 bits dma dram address

2024-05-27 Thread Jamin Lin

Hi Philippe,

> Hi Jamin,
> 
> On 27/5/24 10:02, Jamin Lin wrote:
> > AST2700 support the maximum dram size is 8GiB and has a "DMA DRAM
> Side
> > Address High Part(0x7C)"
> > register to support 64 bits dma dram address.
> > Add helper routines functions to compute the dma dram address, new
> > features and update trace-event to support 64 bits dram address.
> >
> > Signed-off-by: Troy Lee 
> > Signed-off-by: Jamin Lin 
> > ---
> >   hw/ssi/aspeed_smc.c | 52
> +++--
> >   hw/ssi/trace-events |  2 +-
> >   include/hw/ssi/aspeed_smc.h |  1 +
> >   3 files changed, 46 insertions(+), 9 deletions(-)
> 
> 
> > +static uint64_t aspeed_smc_dma_dram_addr(AspeedSMCState *s) {
> > +return s->regs[R_DMA_DRAM_ADDR] |
> > +((uint64_t) s->regs[R_DMA_DRAM_ADDR_HIGH] << 32); }
> > +
> >   static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)
> >   {
> >   AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); @@ -903,24
> > +921,34 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
> >
> >   static void aspeed_smc_dma_rw(AspeedSMCState *s)
> >   {
> > +AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
> > +uint64_t dma_dram_offset;
> > +uint64_t dma_dram_addr;
> >   MemTxResult result;
> >   uint32_t dma_len;
> >   uint32_t data;
> >
> >   dma_len = aspeed_smc_dma_len(s);
> > +dma_dram_addr = aspeed_smc_dma_dram_addr(s);
> > +
> > +if (aspeed_smc_has_dma64(asc)) {
> > +dma_dram_offset = dma_dram_addr - s->dram_base;
> > +} else {
> > +dma_dram_offset = dma_dram_addr;
> 
> Here s->dram_base is 0x0. Do we really need to check
> aspeed_smc_has_dma64?
> 

Yes, it is required to check aspeed_smc_has_dma64 to support dram 64bit address.
s->dram_base has been changed to "0x4 ".
Thanks-Jamin

> > +}
> 
> Maybe simplify improving aspeed_smc_dma_dram_addr() as:
> 
>static uint64_t aspeed_smc_dma_dram_addr(AspeedSMCState *s)
>{
>return (s->regs[R_DMA_DRAM_ADDR]
>| ((uint64_t) s->regs[R_DMA_DRAM_ADDR_HIGH] << 32))
>- s->dram_base;
>}
> 
> Then no need for dma_dram_offset, dma_dram_addr is enough.
> 
> >
> >   trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] &
> DMA_CTRL_WRITE ?
> >   "write" : "read",
> >   s->regs[R_DMA_FLASH_ADDR],
> > -s->regs[R_DMA_DRAM_ADDR],
> > +dma_dram_offset,
> >   dma_len);
> >   while (dma_len) {
> >   if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
> > -data = address_space_ldl_le(&s->dram_as,
> s->regs[R_DMA_DRAM_ADDR],
> > +data = address_space_ldl_le(&s->dram_as,
> dma_dram_offset,
> >
> MEMTXATTRS_UNSPECIFIED, &result);
> >   if (result != MEMTX_OK) {
> > -aspeed_smc_error("DRAM read failed @%08x",
> > - s->regs[R_DMA_DRAM_ADDR]);
> > +aspeed_smc_error("DRAM read failed @%" PRIx64,
> > + dma_dram_offset);
> >   return;
> >   }
> >
> > @@ -940,11 +968,11 @@ static void aspeed_smc_dma_rw(AspeedSMCState
> *s)
> >   return;
> >   }
> >
> > -address_space_stl_le(&s->dram_as,
> s->regs[R_DMA_DRAM_ADDR],
> > +address_space_stl_le(&s->dram_as, dma_dram_offset,
> >data,
> MEMTXATTRS_UNSPECIFIED, &result);
> >   if (result != MEMTX_OK) {
> > -aspeed_smc_error("DRAM write failed @%08x",
> > - s->regs[R_DMA_DRAM_ADDR]);
> > +aspeed_smc_error("DRAM write failed @%" PRIx64,
> > + dma_dram_offset);
> >   return;
> >   }
> >   }
> > @@ -953,8 +981,12 @@ static void aspeed_smc_dma_rw(AspeedSMCState
> *s)
> >* When the DMA is on-going, the DMA registers are updated
> >* with the current working addresses and length.
> >*/
> > +dma_dram_offset += 4;
> > +dma_dram_addr += 4;
> > +
> > +s->regs[R_DMA_DRAM_ADDR_HIGH] = dma_dram_addr >> 32;
> > +s->regs[R_DMA_DRAM_ADDR] = dma_dram_addr & 0x;
> >   s->regs[R_DMA_FLASH_ADDR] += 4;
> > -s->regs[R_DMA_DRAM_ADDR] += 4;
> >   dma_len -= 4;
> >   s->regs[R_DMA_LEN] = dma_len;
> >   s->regs[R_DMA_CHECKSUM] += data; @@ -1107,6 +1139,9
> @@
> > static void aspeed_smc_write(void *opaque, hwaddr addr, uint64_t data,
> >   } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN &&
> >  aspeed_smc_dma_granted(s)) {
> >   s->regs[addr] = DMA_LENGTH(value);
> > +} else if (aspeed_smc_has_dma(asc) && aspeed_smc_has_dma64(asc)
> &&
> > +   addr == R_DMA_DRAM_ADDR_HIGH) {
> > +s->regs[addr] = DMA_DRAM_ADDR_HIGH(value);
>

RE: [PATCH v4 08/16] aspeed/smc: support 64 bits dma dram address

2024-05-27 Thread Jamin Lin

Hi Cedric,

> On 5/27/24 10:02, Jamin Lin wrote:
> > AST2700 support the maximum dram size is 8GiB and has a "DMA DRAM
> Side
> > Address High Part(0x7C)"
> > register to support 64 bits dma dram address.
> > Add helper routines functions to compute the dma dram address, new
> > features and update trace-event to support 64 bits dram address.
> >
> > Signed-off-by: Troy Lee 
> > Signed-off-by: Jamin Lin 
> 
> I will move the addition of the "dram-base" property to another patch. See :
> 
>https://patchew.org/QEMU/20240527124315.35356-1-...@redhat.com/
> 
> (Please review)
Review done.
If I need to resend v5 patch series, I will remove "dram-base property" from 
this patch.
Thanks for your help, Jamin
> 
> Else,
> 
> Reviewed-by: Cédric Le Goater 
> 
> Thanks,
> 
> C.
> 
> 
> > ---
> >   hw/ssi/aspeed_smc.c | 52
> +++--
> >   hw/ssi/trace-events |  2 +-
> >   include/hw/ssi/aspeed_smc.h |  1 +
> >   3 files changed, 46 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c index
> > ffb13a12e8..df0c63469c 100644
> > --- a/hw/ssi/aspeed_smc.c
> > +++ b/hw/ssi/aspeed_smc.c
> > @@ -132,6 +132,9 @@
> >   #define   FMC_WDT2_CTRL_BOOT_SOURCE  BIT(4) /* O: primary
> 1: alternate */
> >   #define   FMC_WDT2_CTRL_EN   BIT(0)
> >
> > +/* DMA DRAM Side Address High Part (AST2700) */
> > +#define R_DMA_DRAM_ADDR_HIGH   (0x7c / 4)
> > +
> >   /* DMA Control/Status Register */
> >   #define R_DMA_CTRL(0x80 / 4)
> >   #define   DMA_CTRL_REQUEST  (1 << 31)
> > @@ -187,6 +190,7 @@
> >*   0x1FF: 32M bytes
> >*/
> >   #define DMA_DRAM_ADDR(asc, val)   ((val) & (asc)->dma_dram_mask)
> > +#define DMA_DRAM_ADDR_HIGH(val)   ((val) & 0xf)
> >   #define DMA_FLASH_ADDR(asc, val)  ((val) & (asc)->dma_flash_mask)
> >   #define DMA_LENGTH(val) ((val) & 0x01FF)
> >
> > @@ -207,6 +211,7 @@ static const AspeedSegments
> aspeed_2500_spi2_segments[];
> >   #define ASPEED_SMC_FEATURE_DMA   0x1
> >   #define ASPEED_SMC_FEATURE_DMA_GRANT 0x2
> >   #define ASPEED_SMC_FEATURE_WDT_CONTROL 0x4
> > +#define ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH 0x08
> >
> >   static inline bool aspeed_smc_has_dma(const AspeedSMCClass *asc)
> >   {
> > @@ -218,6 +223,11 @@ static inline bool
> aspeed_smc_has_wdt_control(const AspeedSMCClass *asc)
> >   return !!(asc->features & ASPEED_SMC_FEATURE_WDT_CONTROL);
> >   }
> >
> > +static inline bool aspeed_smc_has_dma64(const AspeedSMCClass *asc) {
> > +return !!(asc->features &
> ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH);
> > +}
> > +
> >   #define aspeed_smc_error(fmt, ...)
> \
> >   qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt "\n", __func__, ##
> > __VA_ARGS__)
> >
> > @@ -747,6 +757,8 @@ static uint64_t aspeed_smc_read(void *opaque,
> hwaddr addr, unsigned int size)
> >   (aspeed_smc_has_dma(asc) && addr == R_DMA_CTRL) ||
> >   (aspeed_smc_has_dma(asc) && addr == R_DMA_FLASH_ADDR)
> ||
> >   (aspeed_smc_has_dma(asc) && addr == R_DMA_DRAM_ADDR)
> ||
> > +(aspeed_smc_has_dma(asc) && aspeed_smc_has_dma64(asc)
> &&
> > + addr == R_DMA_DRAM_ADDR_HIGH) ||
> >   (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN) ||
> >   (aspeed_smc_has_dma(asc) && addr == R_DMA_CHECKSUM)
> ||
> >   (addr >= R_SEG_ADDR0 &&
> > @@ -847,6 +859,12 @@ static bool
> aspeed_smc_inject_read_failure(AspeedSMCState *s)
> >   }
> >   }
> >
> > +static uint64_t aspeed_smc_dma_dram_addr(AspeedSMCState *s) {
> > +return s->regs[R_DMA_DRAM_ADDR] |
> > +((uint64_t) s->regs[R_DMA_DRAM_ADDR_HIGH] << 32); }
> > +
> >   static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)
> >   {
> >   AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s); @@ -903,24
> > +921,34 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
> >
> >   static void aspeed_smc_dma_rw(AspeedSMCState *s)
> >   {
> > +AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
> > +uint64_t dma_dram_offset;
> > +uint64_t dma_dram_addr;
> >   MemTxResult result;
> >   uint32_t dma_len;
> >   uint32_t data;
> >
> >   dma_len = aspeed_smc_dma_len(s);
> > +dma_dram_addr = aspeed_smc_dma_dram_addr(s);
> > +
> > +if (aspeed_smc_has_dma64(asc)) {
> > +dma_dram_offset = dma_dram_addr - s->dram_base;
> > +} else {
> > +dma_dram_offset = dma_dram_addr;
> > +}
> >
> >   trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] &
> DMA_CTRL_WRITE ?
> >   "write" : "read",
> >   s->regs[R_DMA_FLASH_ADDR],
> > -s->regs[R_DMA_DRAM_ADDR],
> > +dma_dram_offset,
> >   dma_len);
> >   while (dma_len) {
> >   if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
> > -data = address_space_ldl_le(&s->dram_as,
> s->regs[R_DMA_DRAM_ADDR],
> > +data = address_space_ldl_le(&s->dram_as,
> dma

RE: [PATCH] aspeed/smc: Reintroduce "dram-base" property for AST2700

2024-05-27 Thread Jamin Lin

> 
> The Aspeed SMC device model use to have a 'sdram_base' property. It was
> removed by commit d177892d4a48 ("aspeed/smc: Remove unused
> "sdram-base" property") because previous changes simplified the DMA
> transaction model to use an offset in RAM and not the physical address.
> 
> The AST2700 SoC has larger address space (64-bit) and a new register DMA
> DRAM Side Address High Part (0x7C) is introduced to deal with the high bits of
> the DMA address. To be able to compute the offset of the DMA transaction, as
> done on the other SoCs, we will need to know where the DRAM is mapped in
> the address space. Re-introduce a "dram-base"
> property to hold this value.
> 
> Signed-off-by: Cédric Le Goater 
> ---
>  include/hw/ssi/aspeed_smc.h | 1 +
>  hw/ssi/aspeed_smc.c | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h index
> 8e1dda556b91..8791cc0ecb11 100644
> --- a/include/hw/ssi/aspeed_smc.h
> +++ b/include/hw/ssi/aspeed_smc.h
> @@ -76,6 +76,7 @@ struct AspeedSMCState {
>  AddressSpace flash_as;
>  MemoryRegion *dram_mr;
>  AddressSpace dram_as;
> +uint64_t dram_base;
> 
>  AspeedSMCFlash flashes[ASPEED_SMC_CS_MAX];
> 
> diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c index
> 6e1a84c19713..7075bc9d61b0 100644
> --- a/hw/ssi/aspeed_smc.c
> +++ b/hw/ssi/aspeed_smc.c
> @@ -1220,6 +1220,7 @@ static const VMStateDescription
> vmstate_aspeed_smc = {
> 
>  static Property aspeed_smc_properties[] = {
>  DEFINE_PROP_BOOL("inject-failure", AspeedSMCState, inject_failure,
> false),
> +DEFINE_PROP_UINT64("dram-base", AspeedSMCState, dram_base, 0),
>  DEFINE_PROP_LINK("dram", AspeedSMCState, dram_mr,
>   TYPE_MEMORY_REGION, MemoryRegion *),
>  DEFINE_PROP_END_OF_LIST(),
> --
> 2.45.1

Reviewed-by: Jamin Lin 

Thanks-Jamin

RE: [PATCH v4 05/16] aspeed/sdmc: Add AST2700 support

2024-05-27 Thread Jamin Lin

Hi Philippe, Cedric

> On 27/5/24 13:18, Cédric Le Goater wrote:
> > On 5/27/24 12:24, Philippe Mathieu-Daudé wrote:
> >> Hi Jamin,
> >>
> >> On 27/5/24 10:02, Jamin Lin wrote:
> >>> The SDRAM memory controller(DRAMC) controls the access to external
> >>> DDR4 and DDR5 SDRAM and power up to DDR4 and DDR5 PHY.
> >>>
> >>> The DRAM memory controller of AST2700 is not backward compatible to
> >>> previous chips such AST2600, AST2500 and AST2400.
> >>>
> >>> Max memory is now 8GiB on the AST2700. Introduce new
> >>> aspeed_2700_sdmc and class with read/write operation and reset
> >>> handlers.
> >>>
> >>> Define DRAMC necessary protected registers and unprotected registers
> >>> for AST2700 and increase the register set to 0x1000.
> >>>
> >>> Add unlocked property to change controller protected status.
> >>>
> >>> Signed-off-by: Troy Lee 
> >>> Signed-off-by: Jamin Lin 
> >>> Reviewed-by: Cédric Le Goater 
> >>> ---
> >>>   hw/misc/aspeed_sdmc.c | 190
> >>> +-
> >>>   include/hw/misc/aspeed_sdmc.h |   5 +-
> >>>   2 files changed, 193 insertions(+), 2 deletions(-)
> >>
> >>
> >>> diff --git a/include/hw/misc/aspeed_sdmc.h
> >>> b/include/hw/misc/aspeed_sdmc.h index ec2d59a14f..61c979583a 100644
> >>> --- a/include/hw/misc/aspeed_sdmc.h
> >>> +++ b/include/hw/misc/aspeed_sdmc.h
> >>> @@ -17,6 +17,7 @@ OBJECT_DECLARE_TYPE(AspeedSDMCState,
> >>> AspeedSDMCClass, ASPEED_SDMC)
> >>>   #define TYPE_ASPEED_2400_SDMC TYPE_ASPEED_SDMC "-ast2400"
> >>>   #define TYPE_ASPEED_2500_SDMC TYPE_ASPEED_SDMC "-ast2500"
> >>>   #define TYPE_ASPEED_2600_SDMC TYPE_ASPEED_SDMC "-ast2600"
> >>> +#define TYPE_ASPEED_2700_SDMC TYPE_ASPEED_SDMC "-ast2700"
> >>>   /*
> >>>    * SDMC has 174 documented registers. In addition the u-boot
> >>> device tree @@ -29,7 +30,7 @@
> OBJECT_DECLARE_TYPE(AspeedSDMCState,
> >>> AspeedSDMCClass, ASPEED_SDMC)
> >>>    * time, and the other is in the DDR-PHY IP which is used during
> >>> DDR-PHY
> >>>    * training.
> >>>    */
> >>> -#define ASPEED_SDMC_NR_REGS (0x500 >> 2)
> >>> +#define ASPEED_SDMC_NR_REGS (0x1000 >> 2)
> >>
> >> This change breaks the migration stream.
> >
> > Do you mean migration compat ? We never cared much about that for the
> > Aspeed machines.
> 
> So let's just remove the VMSTATE to reduce code burden?
> 
> Otherwise incrementing the vmstate.version is enough.
> 
> Regards,
> 
> Phil.
If you both okay, I will remove it.
Do I need to create a new patch or just update in this patch?
Thanks-Jamin

Re: [RFC v2 0/2] target/loongarch: Add loongson binary translation feature

2024-05-27 Thread maobibo





On 2024/5/27 下午6:39, Philippe Mathieu-Daudé wrote:

Hi Bibo,

On 27/5/24 10:34, Bibo Mao wrote:

Loongson Binary Translation (LBT) is used to accelerate binary
translation. LBT feature is added in kvm mode, not supported in TCG
mode since it is not emulated. And only LBT feature is added here, LBT
registers saving and restoring is not supported since it depeeds on LBT
feautre implemented in KVM kernel


How do you test?

There is a test application using LBT instruction as followings.

If LBT is not enabled, it reports illegal instruction. And it does not 
report error during VM migration.


Regards
Bibo Mao

--
#include 
#include 
int main()
{
int a = 0, b = 0;
for (;;)
{
asm(
"li.d $t0, 0xff  \n\t"
".word ((0x17<<18)|(0x3f<<10)|(1<<5)|0xc) \n\t" // mtflag
".word ((0x17<<18)|(0x3f<<10)|(0<<5)|0xc) \n\t" // mfflag
".word ((0x17<<18)|(0x3f<<10)|(1<<5)|0xc) \n\t" // mtflag
"move %0, $t0 \n\t"
: "=r"(a) : : );
sched_yield();
asm(
".word ((0x17<<18)|(0x3f<<10)|(0<<5)|0xc) \n\t" // mfflag
"move %0, $t0 \n\t"
: "=r"(b) : :);

if (a != b)
{
printf("in: 0x%x <=> out 0x%x \n", a, b);
return 1;
}

sched_yield();
int top = 0;
asm(
".word (0x8008) \n\t" // settm
".word ((0x70 << 8) | (5 << 5)) \n\t" // mttop 1
".word (0x8009) \n\t" // inctop
: : :);
sched_yield();
asm(
".word ((0x3a0 << 5) | (0xc)) \n\t" // mfftop
"move %0, $t0 \n\t"
: "=r"(top) : : );

if (top != 6)
{
printf("top: %d \n", top);
return 1;
}
}
return 0;
}



Thanks,

Phil.

Re: [RFC v2 1/2] target/loongarch: Add loongson binary translation feature

2024-05-27 Thread maobibo


Hi Philippe,

Thanks for reviewing my patch.
I reply inline.

On 2024/5/27 下午6:37, Philippe Mathieu-Daudé wrote:

Hi Bibo,

On 27/5/24 10:35, Bibo Mao wrote:

Loongson Binary Translation (LBT) is used to accelerate binary
translation, which contains 4 scratch registers (scr0 to scr3), x86/ARM
eflags (eflags) and x87 fpu stack pointer (ftop).

Now LBT feature is added in kvm mode, not supported in TCG mode since
it is not emulated. There are two feature flags such as forced_features
and default_features for each vcpu, the real feature is still in cpucfg.
Flag forced_features is parsed from command line, default_features is
parsed from cpu type.

Flag forced_features has higher priority than flag default_features,
default_features will be used if there is no command line option for LBT
feature. If the feature is not supported with KVM host, it reports error
and exits if forced_features is set, else it disables feature and 
continues

if default_features is set.

Signed-off-by: Bibo Mao 
---
  target/loongarch/cpu.c    | 69 +++
  target/loongarch/cpu.h    | 12 +
  target/loongarch/kvm/kvm.c    | 26 ++
  target/loongarch/kvm/kvm_loongarch.h  | 16 +++
  target/loongarch/loongarch-qmp-cmds.c |  2 +-
  5 files changed, 124 insertions(+), 1 deletion(-)




+static void loongarch_set_lbt(Object *obj, bool value, Error **errp)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(obj);
+
+    if (!kvm_enabled()) {


Either set errp, ...


+    return;
+    }
+
+    if (value) {
+    /* Enable binary translation for all architectures */
+    cpu->env.forced_features |= BIT_ULL(LOONGARCH_FEATURE_LBT);
+    } else {
+    /* Disable default features also */
+    cpu->env.default_features &= ~BIT_ULL(LOONGARCH_FEATURE_LBT);
+    }
+}
+
  void loongarch_cpu_post_init(Object *obj)
  {
  object_property_add_bool(obj, "lsx", loongarch_get_lsx,
   loongarch_set_lsx);
  object_property_add_bool(obj, "lasx", loongarch_get_lasx,
   loongarch_set_lasx);


... or only add the property if KVM is enabled:

    if (kvm_enabled()) {

Sure, will do. I think this method is better.

By the way bitmap method forced_features/default_feature is variant
of OnOffAuto method. Bitmap method uses two bit, OnOffAuto method uses 
separate feature variable. We do not know which method is better or 
which is the future trend.


Regards
Bibo Mao



+    object_property_add_bool(obj, "lbt", loongarch_get_lbt,
+ loongarch_set_lbt);
  }

Re: [PATCH 2/2] hw/ufs: Add support MCQ of UFSHCI 4.0

2024-05-27 Thread Jeuk Kim


Thanks for your contribution!

There are only two minor comments.

Please check it and send patch v2.


Thank you!

On 5/21/2024 8:05 PM, Minwoo Im wrote:

@@ -1288,12 +1717,21 @@ static void ufs_exit(PCIDevice *pci_dev)
  ufs_clear_req(&u->req_list[i]);
  }
  g_free(u->req_list);
+
+for (int i = 0; i < ARRAY_SIZE(u->sq); i++) {
+ufs_mcq_delete_sq(u, i);


Isn't it possible that trace_ufs_err_mcq_delete_cq_not_exists is printed 
even in a normal shutdown situation?


If true, please fix it so that the ufs_err log is not printed in normal 
situation.



+}
+for (int i = 0; i < ARRAY_SIZE(u->cq); i++) {
+ufs_mcq_delete_cq(u, i);
+}
  }
  
  static Property ufs_props[] = {

  DEFINE_PROP_STRING("serial", UfsHc, params.serial),
  DEFINE_PROP_UINT8("nutrs", UfsHc, params.nutrs, 32),
  DEFINE_PROP_UINT8("nutmrs", UfsHc, params.nutmrs, 8),
+DEFINE_PROP_BOOL("mcq", UfsHc, params.mcq, false),
+DEFINE_PROP_UINT8("mcq-maxq", UfsHc, params.mcq_maxq, 1),

Please change this value to a value greater than or equal to 2.

[PATCH v2 6/6] tests/qtest/migration-test: Use custom asm bios for ppc64

2024-05-27 Thread Nicholas Piggin

Similar to other archs, build a custom bios memory updater. Running the
test with OF code is a cool trick, but SLOF takes a long time to boot.
This reduces test time by around 3x (150s to 50s).

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 tests/migration/migration-test.h   |  1 +
 tests/migration/ppc64/a-b-kernel.h | 42 +++
 tests/qtest/migration-test.c   | 37 +++--
 tests/migration/Makefile   |  2 +-
 tests/migration/ppc64/Makefile | 15 +++
 tests/migration/ppc64/a-b-kernel.S | 66 ++
 6 files changed, 131 insertions(+), 32 deletions(-)
 create mode 100644 tests/migration/ppc64/a-b-kernel.h
 create mode 100644 tests/migration/ppc64/Makefile
 create mode 100644 tests/migration/ppc64/a-b-kernel.S

diff --git a/tests/migration/migration-test.h b/tests/migration/migration-test.h
index 68512c0b1b..194df7df6f 100644
--- a/tests/migration/migration-test.h
+++ b/tests/migration/migration-test.h
@@ -22,6 +22,7 @@
 /* PPC */
 #define PPC_TEST_MEM_START (1 * 1024 * 1024)
 #define PPC_TEST_MEM_END   (100 * 1024 * 1024)
+#define PPC_H_PUT_TERM_CHAR 0x58
 
 /* ARM */
 #define ARM_TEST_MEM_START (0x4000 + 1 * 1024 * 1024)
diff --git a/tests/migration/ppc64/a-b-kernel.h 
b/tests/migration/ppc64/a-b-kernel.h
new file mode 100644
index 00..673317efdb
--- /dev/null
+++ b/tests/migration/ppc64/a-b-kernel.h
@@ -0,0 +1,42 @@
+/* This file is automatically generated from the assembly file in
+ * tests/migration/ppc64. Edit that file and then run "make all"
+ * inside tests/migration to update, and then remember to send both
+ * the header and the assembler differences in your patch submission.
+ */
+unsigned char ppc64_kernel[] = {
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x7d, 0x20, 0x00, 0xa6, 0x39, 0x40, 0xff, 0xff,
+  0x79, 0x49, 0xf8, 0x0e, 0x7d, 0x20, 0x01, 0x64, 0x3e, 0x80, 0x00, 0x10,
+  0x62, 0x94, 0x00, 0x00, 0x3d, 0x20, 0x06, 0x40, 0x61, 0x29, 0x00, 0x00,
+  0x7e, 0xb4, 0x48, 0x50, 0x39, 0x40, 0x10, 0x00, 0x7e, 0xb5, 0x53, 0xd2,
+  0x38, 0x60, 0x00, 0x58, 0x38, 0x80, 0x00, 0x00, 0x38, 0xa0, 0x00, 0x01,
+  0x38, 0xc0, 0x00, 0x41, 0x78, 0xc6, 0xc1, 0xc6, 0x44, 0x00, 0x00, 0x22,
+  0x38, 0x60, 0x00, 0x00, 0x7e, 0x89, 0xa3, 0x78, 0x7e, 0xa9, 0x03, 0xa6,
+  0x98, 0x69, 0x00, 0x00, 0x39, 0x29, 0x10, 0x00, 0x42, 0x00, 0xff, 0xf8,
+  0x7e, 0x89, 0xa3, 0x78, 0x7e, 0xa9, 0x03, 0xa6, 0x88, 0x69, 0x00, 0x00,
+  0x38, 0x63, 0x00, 0x01, 0x98, 0x69, 0x00, 0x00, 0x39, 0x29, 0x10, 0x00,
+  0x42, 0x00, 0xff, 0xf0, 0x38, 0x60, 0x00, 0x58, 0x38, 0x80, 0x00, 0x00,
+  0x38, 0xa0, 0x00, 0x01, 0x38, 0xc0, 0x00, 0x42, 0x78, 0xc6, 0xc1, 0xc6,
+  0x44, 0x00, 0x00, 0x22, 0x4b, 0xff, 0xff, 0xcc
+};
+
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 87fa733d60..45830eb213 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -128,6 +128,7 @@ static char *bootpath;
  */
 #include "tests/migration/i386/a-b-bootblock.h"
 #include "tests/migration/aarch64/a-b-kernel.h"
+#include "tests/migration/ppc64/a-b-kernel.h"
 #include "tests/migration/s390x/a-b-bios.h"
 
 static void bootfile_create(char *dir, bool suspend_me)
@@ -147,10 +148,8 @@ static void bootfile_create(char *dir, bool suspend_me)
 content = s390x_elf;
 len = sizeof(s390x_elf);
 } else if (strcmp(arch, "ppc64") == 0) {
-/*
- * sane architectures can be programme

[PATCH v2 5/6] tests/qtest/migration-test: Enable on ppc64 TCG

2024-05-27 Thread Nicholas Piggin

ppc64 with TCG seems to no longer be failing this test, perhaps since
commit 03bfc2188f061 ("physmem: Fix migration dirty bitmap coherency
with TCG memory access") which is not ppc specific but was seen to hit
ppc64 quite easily.

Let's enable it again.

The s390x problem has been identified so mention it while we are
adjusting the comment.

Reviewed-by: Thomas Huth 
Signed-off-by: Nicholas Piggin 
---
 tests/qtest/migration-test.c | 16 +++-
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 7d64696f7a..87fa733d60 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -3463,19 +3463,9 @@ int main(int argc, char **argv)
 #endif
 
 /*
- * On ppc64, the test only works with kvm-hv, but not with kvm-pr and TCG
- * is touchy due to race conditions on dirty bits (especially on PPC for
- * some reason)
- */
-if (g_str_equal(arch, "ppc64") &&
-(!has_kvm || access("/sys/module/kvm_hv", F_OK))) {
-g_test_message("Skipping tests: kvm_hv not available");
-goto test_add_done;
-}
-
-/*
- * Similar to ppc64, s390x seems to be touchy with TCG, so disable it
- * there until the problems are resolved
+ * On s390x with TCG, migration is observed to hang due to the 'pending'
+ * state of the flic interrupt controller not being migrated or
+ * reconstructed post-migration. Disable it until the problem is resolved.
  */
 if (g_str_equal(arch, "s390x") && !has_kvm) {
 g_test_message("Skipping tests: s390x host with KVM is required");
-- 
2.43.0

[PATCH v2 4/6] tests/qtest/migration-test: Quieten ppc64 QEMU warnigns

2024-05-27 Thread Nicholas Piggin

Reviewed-by: Thomas Huth 
Signed-off-by: Nicholas Piggin 
---
 tests/qtest/migration-test.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 8247ed98f2..7d64696f7a 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -21,6 +21,7 @@
 #include "chardev/char.h"
 #include "crypto/tlscredspsk.h"
 #include "qapi/qmp/qlist.h"
+#include "ppc-util.h"
 
 #include "migration-helpers.h"
 #include "tests/migration/migration-test.h"
@@ -750,7 +751,8 @@ static int test_migrate_start(QTestState **from, QTestState 
**to,
   "until'", end_address, start_address);
 machine_alias = "pseries";
 machine_opts = "vsmt=8";
-arch_opts = g_strdup("-nodefaults");
+arch_opts = g_strdup("-nodefaults "
+ "-machine " PSERIES_DEFAULT_CAPABILITIES);
 } else if (strcmp(arch, "aarch64") == 0) {
 memory_size = "150M";
 machine_alias = "virt";
-- 
2.43.0

[PATCH v2 3/6] tests/qtest: Move common define from libqos-spapr.h to new ppc-util.h

2024-05-27 Thread Nicholas Piggin

The spapr QEMU machine defaults is useful outside libqos, so create a
new header for ppc specific qtests and move it there.

Signed-off-by: Nicholas Piggin 
---
 tests/qtest/libqos/libqos-spapr.h |  7 ---
 tests/qtest/ppc-util.h| 19 +++
 tests/qtest/boot-serial-test.c|  2 +-
 tests/qtest/prom-env-test.c   |  2 +-
 tests/qtest/pxe-test.c|  2 +-
 5 files changed, 22 insertions(+), 10 deletions(-)
 create mode 100644 tests/qtest/ppc-util.h

diff --git a/tests/qtest/libqos/libqos-spapr.h 
b/tests/qtest/libqos/libqos-spapr.h
index e4483c14f8..a446276416 100644
--- a/tests/qtest/libqos/libqos-spapr.h
+++ b/tests/qtest/libqos/libqos-spapr.h
@@ -9,11 +9,4 @@ QOSState *qtest_spapr_boot(const char *cmdline_fmt, ...)
 G_GNUC_PRINTF(1, 2);
 void qtest_spapr_shutdown(QOSState *qs);
 
-/* List of capabilities needed to silence warnings with TCG */
-#define PSERIES_DEFAULT_CAPABILITIES \
-"cap-cfpc=broken,"   \
-"cap-sbbc=broken,"   \
-"cap-ibs=broken,"\
-"cap-ccf-assist=off,"
-
 #endif
diff --git a/tests/qtest/ppc-util.h b/tests/qtest/ppc-util.h
new file mode 100644
index 00..f68ee93520
--- /dev/null
+++ b/tests/qtest/ppc-util.h
@@ -0,0 +1,19 @@
+/*
+ * PowerPC misc useful things
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef PPC_UTIL_H
+#define PPC_UTIL_H
+
+/* List of capabilities needed to silence warnings with TCG */
+#define PSERIES_DEFAULT_CAPABILITIES \
+"cap-cfpc=broken,"   \
+"cap-sbbc=broken,"   \
+"cap-ibs=broken,"\
+"cap-ccf-assist=off,"
+
+#endif /* PPC_UTIL_H */
diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c
index df389adeeb..3b92fa5d50 100644
--- a/tests/qtest/boot-serial-test.c
+++ b/tests/qtest/boot-serial-test.c
@@ -15,7 +15,7 @@
 
 #include "qemu/osdep.h"
 #include "libqtest.h"
-#include "libqos/libqos-spapr.h"
+#include "ppc-util.h"
 
 static const uint8_t bios_avr[] = {
 0x88, 0xe0, /* ldi r24, 0x08   */
diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c
index 39ccb59797..14705105ad 100644
--- a/tests/qtest/prom-env-test.c
+++ b/tests/qtest/prom-env-test.c
@@ -21,7 +21,7 @@
 
 #include "qemu/osdep.h"
 #include "libqtest.h"
-#include "libqos/libqos-spapr.h"
+#include "ppc-util.h"
 
 #define MAGIC   0xcafec0de
 #define ADDRESS 0x4000
diff --git a/tests/qtest/pxe-test.c b/tests/qtest/pxe-test.c
index e4b48225a5..a3f900fbea 100644
--- a/tests/qtest/pxe-test.c
+++ b/tests/qtest/pxe-test.c
@@ -16,7 +16,7 @@
 #include 
 #include "libqtest.h"
 #include "boot-sector.h"
-#include "libqos/libqos-spapr.h"
+#include "ppc-util.h"
 
 #define NETNAME "net0"
 
-- 
2.43.0

[PATCH v2 1/6] tests/qtest/migration: Run test_mode_reboot outside gitlab CI

2024-05-27 Thread Nicholas Piggin

As Fabiano points out, this test isn't flaky it just can't run under
gitlab CI since runners have a very small shm size.

Suggested-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 tests/qtest/migration-test.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index b7e3406471..04bf1c0092 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -706,6 +706,14 @@ static int test_migrate_start(QTestState **from, 
QTestState **to,
 g_test_skip("/dev/shm is not supported");
 return -1;
 }
+if (getenv("GITLAB_CI")) {
+/*
+ * Gitlab runners are limited to 64MB shm size. See:
+ * https://lore.kernel.org/all/87ttq5fvh7@suse.de/
+ */
+g_test_skip("/dev/shm is not supported in Gitlab CI environment");
+return -1;
+}
 }
 
 dst_state = (QTestMigrationState) { };
@@ -3506,15 +3514,7 @@ int main(int argc, char **argv)
test_precopy_file_offset);
 migration_test_add("/migration/precopy/file/offset/bad",
test_precopy_file_offset_bad);
-
-/*
- * Our CI system has problems with shared memory.
- * Don't run this test until we find a workaround.
- */
-if (getenv("QEMU_TEST_FLAKY_TESTS")) {
-migration_test_add("/migration/mode/reboot", test_mode_reboot);
-}
-
+migration_test_add("/migration/mode/reboot", test_mode_reboot);
 migration_test_add("/migration/precopy/file/mapped-ram",
test_precopy_file_mapped_ram);
 migration_test_add("/migration/precopy/file/mapped-ram/live",
-- 
2.43.0

[PATCH v2 2/6] tests/qtest/migration-test: Fix and enable test_ignore_shared

2024-05-27 Thread Nicholas Piggin

This test is already starting to bitrot, so first remove it from ifdef
and fix compile issues. ppc64 transfers about 2MB, so bump the size
threshold too.

It was said to be broken on aarch64 but it may have been the limited shm
size under gitlab CI. The test is now excluded from running on CI so it
shouldn't cause too much annoyance.

So let's try enable it.

Cc: Yury Kotov 
Cc: Dr. David Alan Gilbert 
Signed-off-by: Nicholas Piggin 
---
 tests/qtest/migration-test.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 04bf1c0092..8247ed98f2 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1893,14 +1893,15 @@ static void 
test_precopy_unix_tls_x509_override_host(void)
 #endif /* CONFIG_TASN1 */
 #endif /* CONFIG_GNUTLS */
 
-#if 0
-/* Currently upset on aarch64 TCG */
 static void test_ignore_shared(void)
 {
 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
 QTestState *from, *to;
+MigrateStart args = {
+.use_shmem = true,
+};
 
-if (test_migrate_start(&from, &to, uri, false, true, NULL, NULL)) {
+if (test_migrate_start(&from, &to, uri, &args)) {
 return;
 }
 
@@ -1925,11 +1926,11 @@ static void test_ignore_shared(void)
 wait_for_migration_complete(from);
 
 /* Check whether shared RAM has been really skipped */
-g_assert_cmpint(read_ram_property_int(from, "transferred"), <, 1024 * 
1024);
+g_assert_cmpint(read_ram_property_int(from, "transferred"), <,
+   4 * 1024 * 1024);
 
 test_migrate_end(from, to, true);
 }
-#endif
 
 static void *
 test_migrate_xbzrle_start(QTestState *from,
@@ -3580,7 +3581,8 @@ int main(int argc, char **argv)
 #endif /* CONFIG_TASN1 */
 #endif /* CONFIG_GNUTLS */
 
-/* migration_test_add("/migration/ignore_shared", test_ignore_shared); */
+migration_test_add("/migration/ignore_shared", test_ignore_shared);
+
 #ifndef _WIN32
 migration_test_add("/migration/precopy/fd/tcp",
test_migrate_precopy_fd_socket);
-- 
2.43.0

[PATCH v2 0/6] tests/qtest/migration-test: Improve and enable on ppc64

2024-05-27 Thread Nicholas Piggin

Since v1:
- Added "TCG" in subject since it is enabling for TCG
- Enable test_mode_reboot with checking GITLAB_CI env that Fabiano
  suggested.
- Move test_ignore_shared patch out of the s390 fix series to here
  and use GITLAB_CI for it too.
- Move ppc64 pseries machine options out of libqos-spapr.h to a
  new general qtest ppc header.
- Adjust remaining s390x comment to explain the problem.

Thanks,
Nick

Nicholas Piggin (6):
  tests/qtest/migration: Run test_mode_reboot outside gitlab CI
  tests/qtest/migration-test: Fix and enable test_ignore_shared
  tests/qtest: Move common define from libqos-spapr.h to new ppc-util.h
  tests/qtest/migration-test: Quieten ppc64 QEMU warnigns
  tests/qtest/migration-test: Enable on ppc64 TCG
  tests/qtest/migration-test: Use custom asm bios for ppc64

 tests/migration/migration-test.h   |  1 +
 tests/migration/ppc64/a-b-kernel.h | 42 +++
 tests/qtest/libqos/libqos-spapr.h  |  7 ---
 tests/qtest/ppc-util.h | 19 +++
 tests/qtest/boot-serial-test.c |  2 +-
 tests/qtest/migration-test.c   | 85 ++
 tests/qtest/prom-env-test.c|  2 +-
 tests/qtest/pxe-test.c |  2 +-
 tests/migration/Makefile   |  2 +-
 tests/migration/ppc64/Makefile | 15 ++
 tests/migration/ppc64/a-b-kernel.S | 66 +++
 11 files changed, 174 insertions(+), 69 deletions(-)
 create mode 100644 tests/migration/ppc64/a-b-kernel.h
 create mode 100644 tests/qtest/ppc-util.h
 create mode 100644 tests/migration/ppc64/Makefile
 create mode 100644 tests/migration/ppc64/a-b-kernel.S

-- 
2.43.0

Re: [RFC PATCH 4/4] ci: Add the new migration device tests

2024-05-27 Thread Fabiano Rosas

Peter Xu  writes:

> On Thu, May 23, 2024 at 05:19:22PM -0300, Fabiano Rosas wrote:
>> We have two new migration tests that check cross version
>> compatibility. One uses the vmstate-static-checker.py script to
>> compare the vmstate structures from two different QEMU versions. The
>> other runs a simple migration with a few devices present in the VM, to
>> catch obvious breakages.
>> 
>> Add both tests to the migration-compat-common job.
>> 
>> Signed-off-by: Fabiano Rosas 
>> ---
>>  .gitlab-ci.d/buildtest.yml | 43 +++---
>>  1 file changed, 36 insertions(+), 7 deletions(-)
>> 
>> diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
>> index 91c57efded..bc7ac35983 100644
>> --- a/.gitlab-ci.d/buildtest.yml
>> +++ b/.gitlab-ci.d/buildtest.yml
>> @@ -202,18 +202,47 @@ build-previous-qemu:
>>needs:
>>  - job: build-previous-qemu
>>  - job: build-system-opensuse
>> -  # The old QEMU could have bugs unrelated to migration that are
>> -  # already fixed in the current development branch, so this test
>> -  # might fail.
>> +  # This test is allowed to fail because:
>> +  #
>> +  # - The old QEMU could have bugs unrelated to migration that are
>> +  #   already fixed in the current development branch.
>
> Did you ever hit a real failure with this?  I'm wondering whether we can
> remove this allow_failure thing.
>

I haven't. But when it fails we'll go through an entire release cycle
with this thing showing red for every person that runs the CI. Remember,
this is a CI failure to which there's no fix aside from waiting for the
release to happen. Even if we're quick to react and disable the job, I
feel it might create some confusion already.

>> +  #
>> +  # - The vmstate-static-checker script trips on renames and other
>> +  #   backward-compatible changes to the vmstate structs.
>
> I think I keep my preference per last time we talked on this. :)

Sorry, I'm not trying to force this in any way, I just wrote these to
use in the pull-request and thought I'd put it out there. At the very
least we can have your concerns documented. =)

> I still think it's too early to involve a test that can report false
> negative.

(1)
Well, we haven't seen any false negatives, we've seen fields being
renamed. If that happens, then we'll ask the person to update the
script. Is that not acceptable to you? Or are you thinking about other
sorts of issues?

> I'd still keep running this before soft-freeze like I used to
> do, throw issues to others and urge them to fix before release.

Having hidden procedures that maintainers run before a release is bad
IMHO, it just delays the catching of bugs and frustrates
contributors. Imagine working on a series, everything goes well with
reviews, CI passes, patch gets queued and merged and a month later you
get a ping about something you should have done to avoid breaking
migration. Right during freeze.

> Per my
> previous experience that doesn't consume me a lot of time, and it's not
> common to see issues either.
>
> So I want people to really pay attention when someone sees a migration CI
> test failed, rather than we help people form the habit in "oh migration CI
> failed again?  I think that's fine, it allows failing anyway".

That's a good point. I don't think it applies here though. See my point
in (1).

> So far I still don't see as much benefit to adding this if we need to pay
> for the other false negative issue.  I'll fully support it if e.g. we can
> fix the tool to avoid reporting false negatives, but that may take effort
> that I didn't check.
>

Re: [PATCH] target/riscv: fix instructions count handling in icount mode

2024-05-27 Thread Alistair Francis

On Thu, Apr 11, 2024 at 9:34 PM Clément Léger  wrote:
>
> When icount is enabled, rather than returning the virtual CPU time, we
> should return the instruction count itself. Add an instructions bool
> parameter to get_ticks() to correctly return icount_get_raw() when
> icount_enabled() == 1 and instruction count is queried. This will modify
> the existing behavior which was returning an instructions count close to
> the number of cycles (CPI ~= 1).
>
> Signed-off-by: Clément Léger 

This patch fails checkpatch

Have a look at 
https://www.qemu.org/docs/master/devel/submitting-a-patch.html#id32
for details on what tests to run before submitting patches

Alistair

>
> ---
>  target/riscv/csr.c | 29 -
>  1 file changed, 16 insertions(+), 13 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 726096444f..5f1dcee102 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -762,14 +762,17 @@ static RISCVException write_vcsr(CPURISCVState *env, 
> int csrno,
>  }
>
>  /* User Timers and Counters */
> -static target_ulong get_ticks(bool shift)
> +static target_ulong get_ticks(bool shift, bool instructions)
>  {
>  int64_t val;
>  target_ulong result;
>
>  #if !defined(CONFIG_USER_ONLY)
>  if (icount_enabled()) {
> -val = icount_get();
> +if (instructions)
> +val = icount_get_raw();
> +else
> +val = icount_get();
>  } else {
>  val = cpu_get_host_ticks();
>  }
> @@ -804,14 +807,14 @@ static RISCVException read_timeh(CPURISCVState *env, 
> int csrno,
>  static RISCVException read_hpmcounter(CPURISCVState *env, int csrno,
>target_ulong *val)
>  {
> -*val = get_ticks(false);
> +*val = get_ticks(false, (csrno == CSR_INSTRET));
>  return RISCV_EXCP_NONE;
>  }
>
>  static RISCVException read_hpmcounterh(CPURISCVState *env, int csrno,
> target_ulong *val)
>  {
> -*val = get_ticks(true);
> +*val = get_ticks(true, (csrno == CSR_INSTRETH));
>  return RISCV_EXCP_NONE;
>  }
>
> @@ -875,11 +878,11 @@ static RISCVException write_mhpmcounter(CPURISCVState 
> *env, int csrno,
>  int ctr_idx = csrno - CSR_MCYCLE;
>  PMUCTRState *counter = &env->pmu_ctrs[ctr_idx];
>  uint64_t mhpmctr_val = val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  counter->mhpmcounter_val = val;
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -counter->mhpmcounter_prev = get_ticks(false);
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +counter->mhpmcounter_prev = get_ticks(false, instr);
>  if (ctr_idx > 2) {
>  if (riscv_cpu_mxl(env) == MXL_RV32) {
>  mhpmctr_val = mhpmctr_val |
> @@ -902,12 +905,12 @@ static RISCVException write_mhpmcounterh(CPURISCVState 
> *env, int csrno,
>  PMUCTRState *counter = &env->pmu_ctrs[ctr_idx];
>  uint64_t mhpmctr_val = counter->mhpmcounter_val;
>  uint64_t mhpmctrh_val = val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  counter->mhpmcounterh_val = val;
>  mhpmctr_val = mhpmctr_val | (mhpmctrh_val << 32);
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -counter->mhpmcounterh_prev = get_ticks(true);
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +counter->mhpmcounterh_prev = get_ticks(true, instr);
>  if (ctr_idx > 2) {
>  riscv_pmu_setup_timer(env, mhpmctr_val, ctr_idx);
>  }
> @@ -926,6 +929,7 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState 
> *env, target_ulong *val,
>   counter->mhpmcounter_prev;
>  target_ulong ctr_val = upper_half ? counter->mhpmcounterh_val :
>  counter->mhpmcounter_val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  if (get_field(env->mcountinhibit, BIT(ctr_idx))) {
>  /*
> @@ -946,9 +950,8 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState 
> *env, target_ulong *val,
>   * The kernel computes the perf delta by subtracting the current value 
> from
>   * the value it initialized previously (ctr_val).
>   */
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -*val = get_ticks(upper_half) - ctr_prev + ctr_val;
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +*val = get_ticks(upper_half, instr) - ctr_prev + ctr_val;
>  } else {
>  *val = ctr_val;
>  }
> --
> 2.43.0
>
>

Re: [RFC PATCH 3/4] tests/qtest/migration: Add support for simple device tests

2024-05-27 Thread Fabiano Rosas

Peter Xu  writes:

> On Thu, May 23, 2024 at 05:19:21PM -0300, Fabiano Rosas wrote:
>> The current migration-tests are almost entirely focused on catching
>> bugs on the migration code itself, not on the device migration
>> infrastructure (vmstate). That means we miss catching some low hanging
>> fruits that would show up immediately if only we had the device in
>> question present in the VM.
>> 
>> Add a list of devices to include by default in the migration-tests,
>> starting with one that recently had issues, virtio-gpu. Also add an
>> environment variable QTEST_DEVICE_OPTS to allow test users to
>> experiment with different devices or device options.
>> 
>> Do not run every migration test with the devices because that would
>> increase the complexity of the command lines and, as mentioned, the
>> migration-tests are mostly used to test the core migration code, not
>> the device migration. Add a special value QTEST_DEVICE_OPTS=all that
>> enables testing with devices.
>> 
>> Notes on usage:
>> 
>> For this new testing mode, it's not useful to run all the migration
>> tests, a single test would probably suffice to catch any issues, so
>> provide the -p option to migration-test and the test of your choice.
>> 
>> Like with the cross-version compatibility tests in CI and the recently
>> introduced vmstate-static-checker test, to be of any use, a test with
>> devices needs to be run against a different QEMU version, like so:
>> 
>> $ cd build
>> $ QTEST_DEVICE_OPTS=all \
>>  QTEST_QEMU_BINARY=./qemu-system-x86_64 \
>>  QTEST_QEMU_BINARY_DST=../build-previous/qemu-system-x86_64 \
>>  ./tests/qtest/migration-test -p /x86_64/migration/precopy/tcp/plain
>> 
>> $ cd build
>> $ QTEST_DEVICE_OPTS='-device virtio-net' \
>>  QTEST_QEMU_BINARY=./qemu-system-x86_64 \
>>  QTEST_QEMU_BINARY_DST=../build-previous/qemu-system-x86_64 \
>>  ./tests/qtest/migration-test -p /x86_64/migration/precopy/tcp/plain
>> 
>> Signed-off-by: Fabiano Rosas 
>> ---
>>  tests/qtest/migration-test.c | 19 +--
>>  1 file changed, 17 insertions(+), 2 deletions(-)
>> 
>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> index 2253e0fc5b..35bb224d18 100644
>> --- a/tests/qtest/migration-test.c
>> +++ b/tests/qtest/migration-test.c
>> @@ -71,6 +71,13 @@ static QTestMigrationState dst_state;
>>  #define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC"
>>  #define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST"
>>  
>> +/*
>> + * The tests using DEFAULT_DEVICES need a special invocation and
>> + * cannot be reached from make check, so don't bother with the
>> + * --without-default-devices build.
>
> What's this "--without-default-devices"?

A configure option. It removes from the build any devices that are
marked as default. It's an endless source of bugs because it is supposed
to be paired with a config file that adds back some of the removed
devices, but there's nothing enforcing that so we always run it as is
and generate a broken QEMU binary.

So anything in the tests that refer to devices should first check if
that QEMU binary even has the device present. I'm saying here that we're
not going to do that because this test cannot be accidentally reached
via make check. Realistically, most people will consume this test
through the CI job only.

Re: TCG change broke MorphOS boot on sam460ex

2024-05-27 Thread BALATON Zoltan


On Tue, 28 May 2024, BALATON Zoltan wrote:

On Wed, 3 Apr 2024, Nicholas Piggin wrote:

On Tue Apr 2, 2024 at 9:32 PM AEST, BALATON Zoltan wrote:

On Thu, 21 Mar 2024, BALATON Zoltan wrote:

On 27/2/24 17:47, BALATON Zoltan wrote:

Hello,

Commit 18a536f1f8 (accel/tcg: Always require can_do_io) broke booting
MorphOS on sam460ex (this was before 8.2.0 and I thought I've verified 
it

before that release but apparently missed it back then). It can be
reproduced with https://www.morphos-team.net/morphos-3.18.iso and 
following

command:

qemu-system-ppc -M sam460ex -serial stdio -d unimp,guest_errors \
   -drive if=none,id=cd,format=raw,file=morphos-3.18.iso \
   -device ide-cd,drive=cd,bus=ide.1


Any idea on this one? While MorphOS boots on other machines and other OSes
seem to boot on this machine it may still suggest there's some problem
somewhere as this worked before. So it may worth investigating it to make
sure there's no bug that could affect other OSes too even if they boot. I
don't know how to debug this so some help would be needed.


In the bad case it crashes after running this TB:


IN:
0x00c01354:  38c00040  li   r6, 0x40
0x00c01358:  38e10204  addi r7, r1, 0x204
0x00c0135c:  39010104  addi r8, r1, 0x104
0x00c01360:  39410004  addi r10, r1, 4
0x00c01364:  3920  li   r9, 0
0x00c01368:  7cc903a6  mtctrr6
0x00c0136c:  84c70004  lwzu r6, 4(r7)
0x00c01370:  7cc907a4  tlbwehi  r6, r9
0x00c01374:  84c80004  lwzu r6, 4(r8)
0x00c01378:  7cc90fa4  tlbwelo  r6, r9
0x00c0137c:  84ca0004  lwzu r6, 4(r10)
0x00c01380:  7cc917a4  tlbwehi  r6, r9
0x00c01384:  39290001  addi r9, r9, 1
0x00c01388:  4200ffe4  bdnz 0xc0136c

IN:
0x00c01374: unable to read memory


"unable to read memory" is the tracer, it does actually translate
the address, but it points to a wayward real address which returns
0 to TCG, which is an invalid instruction.

The good case instead doesn't exit the TB after 0x00c01370 but after
the complete loop at the bdnz. That look like this after the same
first TB:


IN:
0x00c0136c:  84c70004  lwzu r6, 4(r7)
0x00c01370:  7cc907a4  tlbwehi  r6, r9
0x00c01374:  84c80004  lwzu r6, 4(r8)
0x00c01378:  7cc90fa4  tlbwelo  r6, r9
0x00c0137c:  84ca0004  lwzu r6, 4(r10)
0x00c01380:  7cc917a4  tlbwehi  r6, r9
0x00c01384:  39290001  addi r9, r9, 1
0x00c01388:  4200ffe4  bdnz 0xc0136c

IN:
0x00c0138c:  4c00012c  isync

All the tlbwe are executed in the same TB. MMU tracing shows the
first tlbwehi creates a new valid(!) TLB for 0x-0x1
that has a garbage RPN because the tlbwelo did not run yet.

What's happening in the bad case is that the translator breaks
and "re-fetches" instructions in the middle of that sequence, and
that's where the bogus translation causes 0 to be returned. The
good case the whole block is executed in the same fetch which
creates correct translations.

So it looks like a morphos bug, the can-do-io change just happens
to cause it to re-fetch in that place, but that could happen for
a number of reasons, so you can't rely on TLB *only* changing or
ifetch *only* re-fetching at a sync point like isync.

I would expect code like this to write an invalid entry with tlbwehi,
then tlbwelo to set the correct RPN, then make the entry valid with
the second tlbwehi. It would probably fix the bug if you just did the
first tlbwehi with r6=0 (or at least without the 0x200 bit set).


Revisiting this, I've found in the docs that PPC440 has shadow TLBs so this 
code can rely upon the TLB not being invalidated until isync and works on 
real machine but breaks on QEMU. We would either need to make sure the TB 
runs until the sync or somehow emulate the shadow TLB. I've experimented with 
the latter but I could not make it work (and unexpectedly keeping a cache of 
the most recently used entries is slower than always searching through all 
TLB entries as done now so I've abandoned that idea). The problem is that an 
entry is modified by multiple tlbwe instructions but these can come in any 
order (and sometimes only one of them is done like invalidating an entry 
seems to only do one write) so I don't know when to copy the new entry to the 
TLB and when to wait for more parts and keep the old one. Any idea how to fix 
this?


Also I'm not sure if it's related but by running the stream benchmark on 
sam460ex now I can reproduce some memory access problem but I'm not sure what 
causes it. The full output of that benchmark under AmigaOS on sam460ex is 
this:


-
STREAM version $Revision: 5.10 $
-
This system uses 8 bytes per array element.
-
Array size = 1000 (elements), Offset = 0 (elements)
Memory per array = 76.3 MiB (= 0.1 GiB).
Total memory required = 228.9 MiB (= 0.

Re: [RFC PATCH 2/4] tests/qtest/migration: Add a test that runs vmstate-static-checker

2024-05-27 Thread Fabiano Rosas

Peter Xu  writes:

> On Thu, May 23, 2024 at 05:19:20PM -0300, Fabiano Rosas wrote:
>> We have the vmstate-static-checker script that takes the output of:
>> '$QEMU -M $machine -dump-vmstate' for two different QEMU versions and
>> compares them to check for compatibility breakages. This is just too
>> simple and useful for us to pass on it. Add a test that runs the
>> script.
>> 
>> Since this needs to use two different QEMU versions, the test is
>> skipped if only one QEMU is provided. The infrastructure for passing
>> more than one binary is already in place:
>> 
>> $ PYTHON=$(which python3.11) \
>>  QTEST_QEMU_BINARY_SRC=../build-previous/qemu-system-x86_64 \
>>  QTEST_QEMU_BINARY=./qemu-system-x86_64 \
>>  ./tests/qtest/migration-test -p /x86_64/migration/vmstate-checker-script
>> 
>> Signed-off-by: Fabiano Rosas 
>> ---
>> some code duplication for now, just so we can reason about this
>> without too much noise
>> ---
>>  tests/qtest/migration-test.c | 82 
>>  1 file changed, 82 insertions(+)
>> 
>> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> index e8d3555f56..2253e0fc5b 100644
>> --- a/tests/qtest/migration-test.c
>> +++ b/tests/qtest/migration-test.c
>> @@ -63,6 +63,7 @@ static QTestMigrationState dst_state;
>>  #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
>>  
>>  #define ANALYZE_SCRIPT "scripts/analyze-migration.py"
>> +#define VMSTATE_CHECKER_SCRIPT "scripts/vmstate-static-checker.py"
>>  
>>  #define QEMU_VM_FILE_MAGIC 0x5145564d
>>  #define FILE_TEST_FILENAME "migfile"
>> @@ -1611,6 +1612,85 @@ static void test_analyze_script(void)
>>  test_migrate_end(from, to, false);
>>  cleanup("migfile");
>>  }
>> +
>> +static void test_vmstate_checker_script(void)
>> +{
>> +g_autofree gchar *cmd_src = NULL;
>> +g_autofree gchar *cmd_dst = NULL;
>> +g_autofree gchar *vmstate_src = NULL;
>> +g_autofree gchar *vmstate_dst = NULL;
>> +const char *machine_alias, *machine_opts = "";
>> +g_autofree char *machine = NULL;
>> +const char *arch = qtest_get_arch();
>> +int pid, wstatus;
>> +const char *python = g_getenv("PYTHON");
>> +
>> +if (!getenv(QEMU_ENV_SRC) && !getenv(QEMU_ENV_DST)) {
>> +g_test_skip("Test needs two different QEMU versions");
>> +return;
>> +}
>> +
>> +if (!python) {
>> +g_test_skip("PYTHON variable not set");
>> +return;
>> +}
>> +
>> +if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
>> +if (g_str_equal(arch, "i386")) {
>> +machine_alias = "pc";
>> +} else {
>> +machine_alias = "q35";
>> +}
>> +} else if (g_str_equal(arch, "s390x")) {
>> +machine_alias = "s390-ccw-virtio";
>> +} else if (strcmp(arch, "ppc64") == 0) {
>> +machine_alias = "pseries";
>> +} else if (strcmp(arch, "aarch64") == 0) {
>> +machine_alias = "virt";
>> +} else {
>> +g_assert_not_reached();
>> +}
>> +
>> +if (!qtest_has_machine(machine_alias)) {
>> +g_autofree char *msg = g_strdup_printf("machine %s not supported", 
>> machine_alias);
>> +g_test_skip(msg);
>> +return;
>> +}
>> +
>> +machine = resolve_machine_version(machine_alias, QEMU_ENV_SRC,
>> +  QEMU_ENV_DST);
>> +
>> +vmstate_src = g_strdup_printf("%s/vmstate-src", tmpfs);
>> +vmstate_dst = g_strdup_printf("%s/vmstate-dst", tmpfs);
>> +
>> +cmd_dst = g_strdup_printf("-machine %s,%s -dump-vmstate %s",
>> +  machine, machine_opts, vmstate_dst);
>> +cmd_src = g_strdup_printf("-machine %s,%s -dump-vmstate %s",
>> +  machine, machine_opts, vmstate_src);
>> +
>> +qtest_init_with_env_no_handshake(QEMU_ENV_SRC, cmd_src);
>> +qtest_init_with_env_no_handshake(QEMU_ENV_DST, cmd_dst);
>> +
>> +pid = fork();
>> +if (!pid) {
>> +close(1);
>> +open("/dev/null", O_WRONLY);
>> +execl(python, python, VMSTATE_CHECKER_SCRIPT,
>> +  "-s", vmstate_src,
>> +  "-d", vmstate_dst,
>> +  NULL);
>> +g_assert_not_reached();
>> +}
>> +
>> +g_assert(waitpid(pid, &wstatus, 0) == pid);
>> +if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0) {
>> +g_test_message("Failed to run vmstate-static-checker.py");
>> +g_test_fail();
>> +}
>> +
>> +cleanup("vmstate-src");
>> +cleanup("vmstate-dst");
>> +}
>
> Did I ask before on whether this can be written without C?

If you did I forgot about it, sorry.

> I think this and also the analyze-script are more suitable to be written in
> other ways, e.g., bash or python, no?
>

I would prefer not to fragment the test framework. There's a bunch of
infra already present in migration-test/libqtest that we would end up
having to rewrite in the other languages.

>>  #endif
>>  
>>  static void test_precopy_common(MigrateCo

Re: TCG change broke MorphOS boot on sam460ex

2024-05-27 Thread BALATON Zoltan


On Wed, 3 Apr 2024, Nicholas Piggin wrote:

On Tue Apr 2, 2024 at 9:32 PM AEST, BALATON Zoltan wrote:

On Thu, 21 Mar 2024, BALATON Zoltan wrote:

On 27/2/24 17:47, BALATON Zoltan wrote:

Hello,

Commit 18a536f1f8 (accel/tcg: Always require can_do_io) broke booting
MorphOS on sam460ex (this was before 8.2.0 and I thought I've verified it
before that release but apparently missed it back then). It can be
reproduced with https://www.morphos-team.net/morphos-3.18.iso and following
command:

qemu-system-ppc -M sam460ex -serial stdio -d unimp,guest_errors \
   -drive if=none,id=cd,format=raw,file=morphos-3.18.iso \
   -device ide-cd,drive=cd,bus=ide.1


Any idea on this one? While MorphOS boots on other machines and other OSes
seem to boot on this machine it may still suggest there's some problem
somewhere as this worked before. So it may worth investigating it to make
sure there's no bug that could affect other OSes too even if they boot. I
don't know how to debug this so some help would be needed.


In the bad case it crashes after running this TB:


IN:
0x00c01354:  38c00040  li   r6, 0x40
0x00c01358:  38e10204  addi r7, r1, 0x204
0x00c0135c:  39010104  addi r8, r1, 0x104
0x00c01360:  39410004  addi r10, r1, 4
0x00c01364:  3920  li   r9, 0
0x00c01368:  7cc903a6  mtctrr6
0x00c0136c:  84c70004  lwzu r6, 4(r7)
0x00c01370:  7cc907a4  tlbwehi  r6, r9
0x00c01374:  84c80004  lwzu r6, 4(r8)
0x00c01378:  7cc90fa4  tlbwelo  r6, r9
0x00c0137c:  84ca0004  lwzu r6, 4(r10)
0x00c01380:  7cc917a4  tlbwehi  r6, r9
0x00c01384:  39290001  addi r9, r9, 1
0x00c01388:  4200ffe4  bdnz 0xc0136c

IN:
0x00c01374: unable to read memory


"unable to read memory" is the tracer, it does actually translate
the address, but it points to a wayward real address which returns
0 to TCG, which is an invalid instruction.

The good case instead doesn't exit the TB after 0x00c01370 but after
the complete loop at the bdnz. That look like this after the same
first TB:


IN:
0x00c0136c:  84c70004  lwzu r6, 4(r7)
0x00c01370:  7cc907a4  tlbwehi  r6, r9
0x00c01374:  84c80004  lwzu r6, 4(r8)
0x00c01378:  7cc90fa4  tlbwelo  r6, r9
0x00c0137c:  84ca0004  lwzu r6, 4(r10)
0x00c01380:  7cc917a4  tlbwehi  r6, r9
0x00c01384:  39290001  addi r9, r9, 1
0x00c01388:  4200ffe4  bdnz 0xc0136c

IN:
0x00c0138c:  4c00012c  isync

All the tlbwe are executed in the same TB. MMU tracing shows the
first tlbwehi creates a new valid(!) TLB for 0x-0x1
that has a garbage RPN because the tlbwelo did not run yet.

What's happening in the bad case is that the translator breaks
and "re-fetches" instructions in the middle of that sequence, and
that's where the bogus translation causes 0 to be returned. The
good case the whole block is executed in the same fetch which
creates correct translations.

So it looks like a morphos bug, the can-do-io change just happens
to cause it to re-fetch in that place, but that could happen for
a number of reasons, so you can't rely on TLB *only* changing or
ifetch *only* re-fetching at a sync point like isync.

I would expect code like this to write an invalid entry with tlbwehi,
then tlbwelo to set the correct RPN, then make the entry valid with
the second tlbwehi. It would probably fix the bug if you just did the
first tlbwehi with r6=0 (or at least without the 0x200 bit set).


Revisiting this, I've found in the docs that PPC440 has shadow TLBs so 
this code can rely upon the TLB not being invalidated until isync and 
works on real machine but breaks on QEMU. We would either need to make 
sure the TB runs until the sync or somehow emulate the shadow TLB. I've 
experimented with the latter but I could not make it work (and 
unexpectedly keeping a cache of the most recently used entries is slower 
than always searching through all TLB entries as done now so I've 
abandoned that idea). The problem is that an entry is modified by multiple 
tlbwe instructions but these can come in any order (and sometimes only one 
of them is done like invalidating an entry seems to only do one write) so 
I don't know when to copy the new entry to the TLB and when to wait for 
more parts and keep the old one. Any idea how to fix this?


Also I'm not sure if it's related but by running the stream benchmark on 
sam460ex now I can reproduce some memory access problem but I'm not sure 
what causes it. The full output of that benchmark under AmigaOS on 
sam460ex is this:


-
STREAM version $Revision: 5.10 $
-
This system uses 8 bytes per array element.
-
Array size = 1000 (elements), Offset = 0 (elements)
Memory per array = 76.3 MiB (= 0.1 GiB).
Total memory required = 228.9 MiB (= 0.2 GiB).
Each kernel will be executed 10 times.

[PATCH 16/18] tcg/loongarch64: Split out vdvjukN in tcg_out_vec_op

2024-05-27 Thread Richard Henderson

Fixes a bug in the immediate shifts, because the exact
encoding depends on the element size.

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 58 ++--
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 54f7bc9d14..5d2a6b2ca2 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1901,6 +1901,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 static const LoongArchInsn rotrv_vec_insn[4] = {
 OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D
 };
+static const LoongArchInsn rotri_vec_insn[4] = {
+OPC_VROTRI_B, OPC_VROTRI_H, OPC_VROTRI_W, OPC_VROTRI_D
+};
 
 a0 = args[0];
 a1 = args[1];
@@ -2034,15 +2037,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_sarv_vec:
 insn = sarv_vec_insn[vece];
 goto vdvjvk;
-case INDEX_op_shli_vec:
-tcg_out32(s, encode_vdvjuk3_insn(shli_vec_insn[vece], a0, a1, a2));
-break;
-case INDEX_op_shri_vec:
-tcg_out32(s, encode_vdvjuk3_insn(shri_vec_insn[vece], a0, a1, a2));
-break;
-case INDEX_op_sari_vec:
-tcg_out32(s, encode_vdvjuk3_insn(sari_vec_insn[vece], a0, a1, a2));
-break;
 case INDEX_op_rotlv_vec:
 /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
 tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], TCG_VEC_TMP0, a2));
@@ -2051,26 +2045,20 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_rotrv_vec:
 insn = rotrv_vec_insn[vece];
 goto vdvjvk;
+case INDEX_op_shli_vec:
+insn = shli_vec_insn[vece];
+goto vdvjukN;
+case INDEX_op_shri_vec:
+insn = shri_vec_insn[vece];
+goto vdvjukN;
+case INDEX_op_sari_vec:
+insn = sari_vec_insn[vece];
+goto vdvjukN;
 case INDEX_op_rotli_vec:
 /* rotli_vec a1, a2 = rotri_vec a1, -a2 */
 a2 = extract32(-a2, 0, 3 + vece);
-switch (vece) {
-case MO_8:
-tcg_out_opc_vrotri_b(s, a0, a1, a2);
-break;
-case MO_16:
-tcg_out_opc_vrotri_h(s, a0, a1, a2);
-break;
-case MO_32:
-tcg_out_opc_vrotri_w(s, a0, a1, a2);
-break;
-case MO_64:
-tcg_out_opc_vrotri_d(s, a0, a1, a2);
-break;
-default:
-g_assert_not_reached();
-}
-break;
+insn = rotri_vec_insn[vece];
+goto vdvjukN;
 case INDEX_op_bitsel_vec:
 /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
 tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
@@ -2083,6 +2071,24 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 vdvjvk:
 tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 break;
+vdvjukN:
+switch (vece) {
+case MO_8:
+tcg_out32(s, encode_vdvjuk3_insn(insn, a0, a1, a2));
+break;
+case MO_16:
+tcg_out32(s, encode_vdvjuk4_insn(insn, a0, a1, a2));
+break;
+case MO_32:
+tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, a2));
+break;
+case MO_64:
+tcg_out32(s, encode_vdvjuk6_insn(insn, a0, a1, a2));
+break;
+default:
+g_assert_not_reached();
+}
+break;
 }
 }
 
-- 
2.34.1

[PATCH 07/18] tcg/loongarch64: Support LASX in tcg_out_dup_vec

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index b1d652355d..cc54bc4a53 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1674,12 +1674,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 TCGReg rd, TCGReg rs)
 {
-static const LoongArchInsn repl_insn[4] = {
-OPC_VREPLGR2VR_B, OPC_VREPLGR2VR_H, OPC_VREPLGR2VR_W, OPC_VREPLGR2VR_D
+static const LoongArchInsn repl_insn[2][4] = {
+{ OPC_VREPLGR2VR_B, OPC_VREPLGR2VR_H,
+  OPC_VREPLGR2VR_W, OPC_VREPLGR2VR_D },
+{ OPC_XVREPLGR2VR_B, OPC_XVREPLGR2VR_H,
+  OPC_XVREPLGR2VR_W, OPC_XVREPLGR2VR_D },
 };
+bool lasx = type == TCG_TYPE_V256;
 
 tcg_debug_assert(vece <= MO_64);
-tcg_out32(s, encode_vdj_insn(repl_insn[vece], rd, rs));
+tcg_out32(s, encode_vdj_insn(repl_insn[lasx][vece], rd, rs));
 return true;
 }
 
-- 
2.34.1

[PATCH 05/18] util/loongarch64: Detect LASX vector support

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 host/include/loongarch64/host/cpuinfo.h | 1 +
 util/cpuinfo-loongarch.c| 1 +
 2 files changed, 2 insertions(+)

diff --git a/host/include/loongarch64/host/cpuinfo.h 
b/host/include/loongarch64/host/cpuinfo.h
index fab664a10b..d7bf27501d 100644
--- a/host/include/loongarch64/host/cpuinfo.h
+++ b/host/include/loongarch64/host/cpuinfo.h
@@ -8,6 +8,7 @@
 
 #define CPUINFO_ALWAYS  (1u << 0)  /* so cpuinfo is nonzero */
 #define CPUINFO_LSX (1u << 1)
+#define CPUINFO_LASX(1u << 2)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/util/cpuinfo-loongarch.c b/util/cpuinfo-loongarch.c
index 08b6d7460c..bb1f7f698b 100644
--- a/util/cpuinfo-loongarch.c
+++ b/util/cpuinfo-loongarch.c
@@ -29,6 +29,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
 
 info = CPUINFO_ALWAYS;
 info |= (hwcap & HWCAP_LOONGARCH_LSX ? CPUINFO_LSX : 0);
+info |= (hwcap & HWCAP_LOONGARCH_LASX ? CPUINFO_LASX : 0);
 
 cpuinfo = info;
 return info;
-- 
2.34.1

[PATCH 14/18] tcg/loongarch64: Support LASX in tcg_out_{mov,ld,st}

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 5f4915c6ac..e633d268d0 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -325,6 +325,9 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg 
ret, TCGReg arg)
 case TCG_TYPE_V128:
 tcg_out_opc_vori_b(s, ret, arg, 0);
 break;
+case TCG_TYPE_V256:
+tcg_out_opc_xvori_b(s, ret, arg, 0);
+break;
 default:
 g_assert_not_reached();
 }
@@ -854,6 +857,14 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg 
dest,
 tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0);
 }
 break;
+case TCG_TYPE_V256:
+if (-0x800 <= offset && offset <= 0x7ff) {
+tcg_out_opc_xvld(s, dest, base, offset);
+} else {
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+tcg_out_opc_xvldx(s, dest, base, TCG_REG_TMP0);
+}
+break;
 default:
 g_assert_not_reached();
 }
@@ -886,6 +897,14 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg 
src,
 tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0);
 }
 break;
+case TCG_TYPE_V256:
+if (-0x800 <= offset && offset <= 0x7ff) {
+tcg_out_opc_xvst(s, src, base, offset);
+} else {
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+tcg_out_opc_xvstx(s, src, base, TCG_REG_TMP0);
+}
+break;
 default:
 g_assert_not_reached();
 }
-- 
2.34.1

[PATCH 10/18] tcg/loongarch64: Support LASX in tcg_out_dupi_vec

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 9a8f67cf3e..c7d0c7839b 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1743,7 +1743,12 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType 
type, unsigned vece,
 int64_t value = sextract64(v64, 0, 8 << vece);
 if (-0x200 <= value && value <= 0x1FF) {
 uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
-tcg_out_opc_vldi(s, rd, imm);
+
+if (type == TCG_TYPE_V256) {
+tcg_out_opc_xvldi(s, rd, imm);
+} else {
+tcg_out_opc_vldi(s, rd, imm);
+}
 return;
 }
 
-- 
2.34.1

[PATCH 13/18] tcg/loongarch64: Split out vdvjvk in tcg_out_vec_op

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 119 ---
 1 file changed, 63 insertions(+), 56 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 652aa261a3..5f4915c6ac 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1900,49 +1900,55 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 tcg_out_ld(s, type, a0, a1, a2);
 break;
 case INDEX_op_and_vec:
-tcg_out_opc_vand_v(s, a0, a1, a2);
-break;
+insn = OPC_VAND_V;
+goto vdvjvk;
 case INDEX_op_andc_vec:
 /*
  * vandn vd, vj, vk: vd = vk & ~vj
  * andc_vec vd, vj, vk: vd = vj & ~vk
- * vk and vk are swapped
+ * vj and vk are swapped
  */
-tcg_out_opc_vandn_v(s, a0, a2, a1);
-break;
+a1 = a2;
+a2 = args[2];
+insn = OPC_VANDN_V;
+goto vdvjvk;
 case INDEX_op_or_vec:
-tcg_out_opc_vor_v(s, a0, a1, a2);
-break;
+insn = OPC_VOR_V;
+goto vdvjvk;
 case INDEX_op_orc_vec:
-tcg_out_opc_vorn_v(s, a0, a1, a2);
-break;
+insn = OPC_VORN_V;
+goto vdvjvk;
 case INDEX_op_xor_vec:
-tcg_out_opc_vxor_v(s, a0, a1, a2);
-break;
-case INDEX_op_nor_vec:
-tcg_out_opc_vnor_v(s, a0, a1, a2);
-break;
+insn = OPC_VXOR_V;
+goto vdvjvk;
 case INDEX_op_not_vec:
-tcg_out_opc_vnor_v(s, a0, a1, a1);
-break;
+a2 = a1;
+/* fall through */
+case INDEX_op_nor_vec:
+insn = OPC_VNOR_V;
+goto vdvjvk;
 case INDEX_op_cmp_vec:
 {
 TCGCond cond = args[3];
+
 if (const_args[2]) {
 /*
  * cmp_vec dest, src, value
  * Try vseqi/vslei/vslti
  */
 int64_t value = sextract64(a2, 0, 8 << vece);
-if ((cond == TCG_COND_EQ || cond == TCG_COND_LE || \
- cond == TCG_COND_LT) && (-0x10 <= value && value <= 
0x0f)) {
-tcg_out32(s, 
encode_vdvjsk5_insn(cmp_vec_imm_insn[cond][vece], \
- a0, a1, value));
+if ((cond == TCG_COND_EQ ||
+ cond == TCG_COND_LE ||
+ cond == TCG_COND_LT) &&
+(-0x10 <= value && value <= 0x0f)) {
+insn = cmp_vec_imm_insn[cond][vece];
+tcg_out32(s, encode_vdvjsk5_insn(insn, a0, a1, value));
 break;
-} else if ((cond == TCG_COND_LEU || cond == TCG_COND_LTU) &&
-(0x00 <= value && value <= 0x1f)) {
-tcg_out32(s, 
encode_vdvjuk5_insn(cmp_vec_imm_insn[cond][vece], \
- a0, a1, value));
+} else if ((cond == TCG_COND_LEU ||
+cond == TCG_COND_LTU) &&
+   (0x00 <= value && value <= 0x1f)) {
+insn = cmp_vec_imm_insn[cond][vece];
+tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value));
 break;
 }
 
@@ -1963,9 +1969,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 insn = cmp_vec_insn[cond][vece];
 tcg_debug_assert(insn != 0);
 }
-tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 }
-break;
+goto vdvjvk;
 case INDEX_op_add_vec:
 tcg_out_addsub_vec(s, false, vece, a0, a1, a2, const_args[2], true);
 break;
@@ -1976,41 +1981,41 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
 break;
 case INDEX_op_mul_vec:
-tcg_out32(s, encode_vdvjvk_insn(mul_vec_insn[vece], a0, a1, a2));
-break;
+insn = mul_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_smin_vec:
-tcg_out32(s, encode_vdvjvk_insn(smin_vec_insn[vece], a0, a1, a2));
-break;
+insn = smin_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_smax_vec:
-tcg_out32(s, encode_vdvjvk_insn(smax_vec_insn[vece], a0, a1, a2));
-break;
+insn = smax_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_umin_vec:
-tcg_out32(s, encode_vdvjvk_insn(umin_vec_insn[vece], a0, a1, a2));
-break;
+insn = umin_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_umax_vec:
-tcg_out32(s, encode_vdvjvk_insn(umax_vec_insn[vece], a0, a1, a2));
-break;
+insn = umax_vec_insn[vece];
+goto vdvjvk;
 case INDEX_op_ssadd_vec:
-tcg_out32(s, encode_vdvjvk_insn(ssadd_vec_insn[vece], a0, a1, a2));
-break;
+insn = ssadd_vec_insn[vece];
+goto vdvj

[PATCH 04/18] tcg/loongarch64: Support TCG_TYPE_V64

2024-05-27 Thread Richard Henderson

We can implement this with fld_d, fst_d for load and store,
and then use the normal v128 operations in registers.
This will improve support for guests which use v64.

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.h | 2 +-
 tcg/loongarch64/tcg-target.c.inc | 8 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 29e4860d20..990bad1d51 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -171,7 +171,7 @@ typedef enum {
 
 #define TCG_TARGET_HAS_tst  0
 
-#define TCG_TARGET_HAS_v64  0
+#define TCG_TARGET_HAS_v64  (cpuinfo & CPUINFO_LSX)
 #define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_LSX)
 #define TCG_TARGET_HAS_v256 0
 
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index de5369536e..980ea10211 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -321,6 +321,7 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg 
ret, TCGReg arg)
 }
 }
 break;
+case TCG_TYPE_V64:
 case TCG_TYPE_V128:
 tcg_out_opc_vori_b(s, ret, arg, 0);
 break;
@@ -838,6 +839,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg 
dest,
 }
 break;
 case TCG_TYPE_I64:
+case TCG_TYPE_V64:
 if (dest < TCG_REG_V0) {
 tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
 } else {
@@ -869,6 +871,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg 
src,
 }
 break;
 case TCG_TYPE_I64:
+case TCG_TYPE_V64:
 if (src < TCG_REG_V0) {
 tcg_out_ldst(s, OPC_ST_D, src, base, offset);
 } else {
@@ -1880,8 +1883,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 a2 = args[2];
 a3 = args[3];
 
-/* Currently only supports V128 */
-tcg_debug_assert(type == TCG_TYPE_V128);
+/* Currently only supports V64 & V128 */
+tcg_debug_assert(type == TCG_TYPE_V64 || type == TCG_TYPE_V128);
 
 switch (opc) {
 case INDEX_op_st_vec:
@@ -2394,6 +2397,7 @@ static void tcg_target_init(TCGContext *s)
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
 
 if (cpuinfo & CPUINFO_LSX) {
+tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
 tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
-- 
2.34.1

[PATCH 12/18] tcg/loongarch64: Support LASX in tcg_out_addsub_vec

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 36 ++--
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 47011488dd..652aa261a3 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1758,21 +1758,25 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType 
type, unsigned vece,
 tcg_out_dup_vec(s, type, vece, rd, TCG_REG_TMP0);
 }
 
-static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
-   const TCGArg a1, const TCGArg a2,
+static void tcg_out_addsub_vec(TCGContext *s, bool lasx, unsigned vece,
+   TCGArg a0, TCGArg a1, TCGArg a2,
bool a2_is_const, bool is_add)
 {
-static const LoongArchInsn add_vec_insn[4] = {
-OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D
+static const LoongArchInsn add_vec_insn[2][4] = {
+{ OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D },
+{ OPC_XVADD_B, OPC_XVADD_H, OPC_XVADD_W, OPC_XVADD_D },
 };
-static const LoongArchInsn add_vec_imm_insn[4] = {
-OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU
+static const LoongArchInsn add_vec_imm_insn[2][4] = {
+{ OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU },
+{ OPC_XVADDI_BU, OPC_XVADDI_HU, OPC_XVADDI_WU, OPC_XVADDI_DU },
 };
-static const LoongArchInsn sub_vec_insn[4] = {
-OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D
+static const LoongArchInsn sub_vec_insn[2][4] = {
+{ OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D },
+{ OPC_XVSUB_B, OPC_XVSUB_H, OPC_XVSUB_W, OPC_XVSUB_D },
 };
-static const LoongArchInsn sub_vec_imm_insn[4] = {
-OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
+static const LoongArchInsn sub_vec_imm_insn[2][4] = {
+{ OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU },
+{ OPC_XVSUBI_BU, OPC_XVSUBI_HU, OPC_XVSUBI_WU, OPC_XVSUBI_DU },
 };
 LoongArchInsn insn;
 
@@ -1783,10 +1787,10 @@ static void tcg_out_addsub_vec(TCGContext *s, unsigned 
vece, const TCGArg a0,
 value = -value;
 }
 if (value < 0) {
-insn = sub_vec_imm_insn[vece];
+insn = sub_vec_imm_insn[lasx][vece];
 value = -value;
 } else {
-insn = add_vec_imm_insn[vece];
+insn = add_vec_imm_insn[lasx][vece];
 }
 
 /* Constraint TCG_CT_CONST_VADD ensures validity. */
@@ -1797,9 +1801,9 @@ static void tcg_out_addsub_vec(TCGContext *s, unsigned 
vece, const TCGArg a0,
 }
 
 if (is_add) {
-insn = add_vec_insn[vece];
+insn = add_vec_insn[lasx][vece];
 } else {
-insn = sub_vec_insn[vece];
+insn = sub_vec_insn[lasx][vece];
 }
 tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 }
@@ -1963,10 +1967,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 }
 break;
 case INDEX_op_add_vec:
-tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], true);
+tcg_out_addsub_vec(s, false, vece, a0, a1, a2, const_args[2], true);
 break;
 case INDEX_op_sub_vec:
-tcg_out_addsub_vec(s, vece, a0, a1, a2, const_args[2], false);
+tcg_out_addsub_vec(s, false, vece, a0, a1, a2, const_args[2], false);
 break;
 case INDEX_op_neg_vec:
 tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], a0, a1));
-- 
2.34.1

[PATCH 18/18] tcg/loongarch64: Enable v256 with LASX

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.h |  2 +-
 tcg/loongarch64/tcg-target.c.inc | 11 ---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 990bad1d51..58bd7d258e 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -173,7 +173,7 @@ typedef enum {
 
 #define TCG_TARGET_HAS_v64  (cpuinfo & CPUINFO_LSX)
 #define TCG_TARGET_HAS_v128 (cpuinfo & CPUINFO_LSX)
-#define TCG_TARGET_HAS_v256 0
+#define TCG_TARGET_HAS_v256 (cpuinfo & CPUINFO_LASX)
 
 #define TCG_TARGET_HAS_not_vec  1
 #define TCG_TARGET_HAS_neg_vec  1
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index e2b5aad5e3..0b41b807e3 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -2484,9 +2484,14 @@ static void tcg_target_init(TCGContext *s)
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
 
-if (cpuinfo & CPUINFO_LSX) {
-tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
-tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+if (cpuinfo & (CPUINFO_LSX | CPUINFO_LASX)) {
+if (cpuinfo & CPUINFO_LSX) {
+tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
+tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
+}
+if (cpuinfo & CPUINFO_LASX) {
+tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
+}
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
 tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
-- 
2.34.1

[PATCH 11/18] tcg/loongarch64: Simplify tcg_out_addsub_vec

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index c7d0c7839b..47011488dd 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1774,33 +1774,34 @@ static void tcg_out_addsub_vec(TCGContext *s, unsigned 
vece, const TCGArg a0,
 static const LoongArchInsn sub_vec_imm_insn[4] = {
 OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU
 };
+LoongArchInsn insn;
 
 if (a2_is_const) {
 int64_t value = sextract64(a2, 0, 8 << vece);
+
 if (!is_add) {
 value = -value;
 }
-
-/* Try vaddi/vsubi */
-if (0 <= value && value <= 0x1f) {
-tcg_out32(s, encode_vdvjuk5_insn(add_vec_imm_insn[vece], a0, \
- a1, value));
-return;
-} else if (-0x1f <= value && value < 0) {
-tcg_out32(s, encode_vdvjuk5_insn(sub_vec_imm_insn[vece], a0, \
- a1, -value));
-return;
+if (value < 0) {
+insn = sub_vec_imm_insn[vece];
+value = -value;
+} else {
+insn = add_vec_imm_insn[vece];
 }
 
-/* constraint TCG_CT_CONST_VADD ensures unreachable */
-g_assert_not_reached();
+/* Constraint TCG_CT_CONST_VADD ensures validity. */
+tcg_debug_assert(0 <= value && value <= 0x1f);
+
+tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value));
+return;
 }
 
 if (is_add) {
-tcg_out32(s, encode_vdvjvk_insn(add_vec_insn[vece], a0, a1, a2));
+insn = add_vec_insn[vece];
 } else {
-tcg_out32(s, encode_vdvjvk_insn(sub_vec_insn[vece], a0, a1, a2));
+insn = sub_vec_insn[vece];
 }
+tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
 }
 
 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
-- 
2.34.1

[PATCH 00/18] tcg/loongarch64: Support v64 and v256

2024-05-27 Thread Richard Henderson

Some guests only have, or additionally have, 64-bit vectors.
For example arm, mips, sparc.  So it's best to enable this
whenever we can.  As with tcg/i386, use 64-bit loads and stores
but 128-bit vector registers.

If LASX is available (all current loongarch64 cpus?), we have
256-bit vectors as well.  Useful for guests that support such
things, e.g. aarch64, i386, and loongarch64 itself.


r~


Richard Henderson (18):
  tcg/loongarch64: Import LASX, FP insns
  tcg/loongarch64: Use fp load/store for I32 and I64 into vector regs
  tcg/loongarch64: Handle i32 and i64 moves between gr and fr
  tcg/loongarch64: Support TCG_TYPE_V64
  util/loongarch64: Detect LASX vector support
  tcg/loongarch64: Simplify tcg_out_dup_vec
  tcg/loongarch64: Support LASX in tcg_out_dup_vec
  tcg/loongarch64: Support LASX in tcg_out_dupm_vec
  tcg/loongarch64: Use tcg_out_dup_vec in tcg_out_dupi_vec
  tcg/loongarch64: Support LASX in tcg_out_dupi_vec
  tcg/loongarch64: Simplify tcg_out_addsub_vec
  tcg/loongarch64: Support LASX in tcg_out_addsub_vec
  tcg/loongarch64: Split out vdvjvk in tcg_out_vec_op
  tcg/loongarch64: Support LASX in tcg_out_{mov,ld,st}
  tcg/loongarch64: Remove temp_vec from tcg_out_vec_op
  tcg/loongarch64: Split out vdvjukN in tcg_out_vec_op
  tcg/loongarch64: Support LASX in tcg_out_vec_op
  tcg/loongarch64: Enable v256 with LASX

 host/include/loongarch64/host/cpuinfo.h |1 +
 tcg/loongarch64/tcg-target.h|4 +-
 util/cpuinfo-loongarch.c|1 +
 tcg/loongarch64/tcg-insn-defs.c.inc | 6181 ---
 tcg/loongarch64/tcg-target.c.inc|  575 ++-
 5 files changed, 2461 insertions(+), 4301 deletions(-)

-- 
2.34.1

[PATCH 15/18] tcg/loongarch64: Remove temp_vec from tcg_out_vec_op

2024-05-27 Thread Richard Henderson

Use TCG_VEC_TMP0 directly.

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index e633d268d0..54f7bc9d14 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1834,7 +1834,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 {
 TCGType type = vecl + TCG_TYPE_V64;
 TCGArg a0, a1, a2, a3;
-TCGReg temp_vec = TCG_VEC_TMP0;
 
 static const LoongArchInsn cmp_vec_insn[16][4] = {
 [TCG_COND_EQ] = {OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D},
@@ -1976,8 +1975,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
  * dupi_vec temp, a2
  * cmp_vec a0, a1, temp, cond
  */
-tcg_out_dupi_vec(s, type, vece, temp_vec, a2);
-a2 = temp_vec;
+tcg_out_dupi_vec(s, type, vece, TCG_VEC_TMP0, a2);
+a2 = TCG_VEC_TMP0;
 }
 
 insn = cmp_vec_insn[cond][vece];
@@ -2046,8 +2045,8 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
 break;
 case INDEX_op_rotlv_vec:
 /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
-tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], temp_vec, a2));
-a2 = temp_vec;
+tcg_out32(s, encode_vdvj_insn(neg_vec_insn[vece], TCG_VEC_TMP0, a2));
+a2 = TCG_VEC_TMP0;
 /* fall through */
 case INDEX_op_rotrv_vec:
 insn = rotrv_vec_insn[vece];
-- 
2.34.1

[PATCH 06/18] tcg/loongarch64: Simplify tcg_out_dup_vec

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 22 ++
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 980ea10211..b1d652355d 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1674,22 +1674,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 TCGReg rd, TCGReg rs)
 {
-switch (vece) {
-case MO_8:
-tcg_out_opc_vreplgr2vr_b(s, rd, rs);
-break;
-case MO_16:
-tcg_out_opc_vreplgr2vr_h(s, rd, rs);
-break;
-case MO_32:
-tcg_out_opc_vreplgr2vr_w(s, rd, rs);
-break;
-case MO_64:
-tcg_out_opc_vreplgr2vr_d(s, rd, rs);
-break;
-default:
-g_assert_not_reached();
-}
+static const LoongArchInsn repl_insn[4] = {
+OPC_VREPLGR2VR_B, OPC_VREPLGR2VR_H, OPC_VREPLGR2VR_W, OPC_VREPLGR2VR_D
+};
+
+tcg_debug_assert(vece <= MO_64);
+tcg_out32(s, encode_vdj_insn(repl_insn[vece], rd, rs));
 return true;
 }
 
-- 
2.34.1

[PATCH 08/18] tcg/loongarch64: Support LASX in tcg_out_dupm_vec

2024-05-27 Thread Richard Henderson

Each element size has a different encoding, so code cannot
be shared in the same way as with tcg_out_dup_vec.

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 30 --
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index cc54bc4a53..1e721b8b20 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1690,8 +1690,10 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, 
unsigned vece,
 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
  TCGReg r, TCGReg base, intptr_t offset)
 {
-/* Handle imm overflow and division (vldrepl.d imm is divided by 8) */
-if (offset < -0x800 || offset > 0x7ff || \
+bool lasx = type == TCG_TYPE_V256;
+
+/* Handle imm overflow and division (vldrepl.d imm is divided by 8). */
+if (offset < -0x800 || offset > 0x7ff ||
 (offset & ((1 << vece) - 1)) != 0) {
 tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
 base = TCG_REG_TMP0;
@@ -1701,16 +1703,32 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType 
type, unsigned vece,
 
 switch (vece) {
 case MO_8:
-tcg_out_opc_vldrepl_b(s, r, base, offset);
+if (lasx) {
+tcg_out_opc_xvldrepl_b(s, r, base, offset);
+} else {
+tcg_out_opc_vldrepl_b(s, r, base, offset);
+}
 break;
 case MO_16:
-tcg_out_opc_vldrepl_h(s, r, base, offset);
+if (lasx) {
+tcg_out_opc_xvldrepl_h(s, r, base, offset);
+} else {
+tcg_out_opc_vldrepl_h(s, r, base, offset);
+}
 break;
 case MO_32:
-tcg_out_opc_vldrepl_w(s, r, base, offset);
+if (lasx) {
+tcg_out_opc_xvldrepl_w(s, r, base, offset);
+} else {
+tcg_out_opc_vldrepl_w(s, r, base, offset);
+}
 break;
 case MO_64:
-tcg_out_opc_vldrepl_d(s, r, base, offset);
+if (lasx) {
+tcg_out_opc_xvldrepl_d(s, r, base, offset);
+} else {
+tcg_out_opc_vldrepl_d(s, r, base, offset);
+}
 break;
 default:
 g_assert_not_reached();
-- 
2.34.1

[PATCH 09/18] tcg/loongarch64: Use tcg_out_dup_vec in tcg_out_dupi_vec

2024-05-27 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 18 +-
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 1e721b8b20..9a8f67cf3e 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1749,24 +1749,8 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType 
type, unsigned vece,
 
 /* TODO: vldi patterns when imm 12 is set */
 
-/* Fallback to vreplgr2vr */
 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
-switch (vece) {
-case MO_8:
-tcg_out_opc_vreplgr2vr_b(s, rd, TCG_REG_TMP0);
-break;
-case MO_16:
-tcg_out_opc_vreplgr2vr_h(s, rd, TCG_REG_TMP0);
-break;
-case MO_32:
-tcg_out_opc_vreplgr2vr_w(s, rd, TCG_REG_TMP0);
-break;
-case MO_64:
-tcg_out_opc_vreplgr2vr_d(s, rd, TCG_REG_TMP0);
-break;
-default:
-g_assert_not_reached();
-}
+tcg_out_dup_vec(s, type, vece, rd, TCG_REG_TMP0);
 }
 
 static void tcg_out_addsub_vec(TCGContext *s, unsigned vece, const TCGArg a0,
-- 
2.34.1

1 2 3 4 >

1 - 100 of 370 matches

Mail list logo