from:"Nicholas Piggin"

[PATCH v2 2/9] spapr: prevent hdec timer being set up under virtual hypervisor

2022-02-16 Thread Nicholas Piggin

The spapr virtual hypervisor does not require the hdecr timer.
Remove it.

Reviewed-by: Daniel Henrique Barboza 
Reviewed-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/ppc.c| 2 +-
 hw/ppc/spapr_cpu_core.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index ba7fa0f3b5..c6dfc5975f 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1072,7 +1072,7 @@ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t 
freq)
 }
 /* Create new timer */
 tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, 
cpu);
-if (env->has_hv_mode) {
+if (env->has_hv_mode && !cpu->vhyp) {
 tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 
&cpu_ppc_hdecr_cb,
 cpu);
 } else {
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index a781e97f8d..ed84713960 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -261,12 +261,12 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 return false;
 }
 
-/* Set time-base frequency to 512 MHz */
-cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
-
 cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
 kvmppc_set_papr(cpu);
 
+/* Set time-base frequency to 512 MHz. vhyp must be set first. */
+cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
+
 if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) {
 qdev_unrealize(DEVICE(cpu));
 return false;
-- 
2.23.0

[PATCH v2 4/9] target/ppc: add vhyp addressing mode helper for radix MMU

2022-02-16 Thread Nicholas Piggin

The radix on vhyp MMU uses a single-level radix table walk, with the
partition scope mapping provided by the flat QEMU machine memory.

A subsequent change will use the two-level radix walk on vhyp in some
situations, so provide a helper which can abstract that logic.

Reviewed-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 
---
 target/ppc/mmu-radix64.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index df2fec80ce..5535f0fe20 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -354,6 +354,17 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
*cpu,
 return 0;
 }
 
+/*
+ * The spapr vhc has a flat partition scope provided by qemu memory.
+ */
+static bool vhyp_flat_addressing(PowerPCCPU *cpu)
+{
+if (cpu->vhyp) {
+return true;
+}
+return false;
+}
+
 static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
 MMUAccessType access_type,
 vaddr eaddr, uint64_t pid,
@@ -385,7 +396,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
 }
 prtbe_addr = (pate.dw1 & PATE1_R_PRTB) + offset;
 
-if (cpu->vhyp) {
+if (vhyp_flat_addressing(cpu)) {
 prtbe0 = ldq_phys(cs->as, prtbe_addr);
 } else {
 /*
@@ -411,7 +422,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
 *g_page_size = PRTBE_R_GET_RTS(prtbe0);
 base_addr = prtbe0 & PRTBE_R_RPDB;
 nls = prtbe0 & PRTBE_R_RPDS;
-if (msr_hv || cpu->vhyp) {
+if (msr_hv || vhyp_flat_addressing(cpu)) {
 /*
  * Can treat process table addresses as real addresses
  */
@@ -515,7 +526,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr 
eaddr,
 relocation = !mmuidx_real(mmu_idx);
 
 /* HV or virtual hypervisor Real Mode Access */
-if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) {
+if (!relocation && (mmuidx_hv(mmu_idx) || vhyp_flat_addressing(cpu))) {
 /* In real mode top 4 effective addr bits (mostly) ignored */
 *raddr = eaddr & 0x0FFFULL;
 
@@ -592,7 +603,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr 
eaddr,
 g_raddr = eaddr & R_EADDR_MASK;
 }
 
-if (cpu->vhyp) {
+if (vhyp_flat_addressing(cpu)) {
 *raddr = g_raddr;
 } else {
 /*
-- 
2.23.0

[PATCH v2 1/9] target/ppc: raise HV interrupts for partition table entry problems

2022-02-16 Thread Nicholas Piggin

Invalid or missing partition table entry exceptions should cause HV
interrupts. HDSISR is set to bad MMU config, which is consistent with
the ISA and experimentally matches what POWER9 generates.

Reviewed-by: Fabiano Rosas 
Reviewed-by: Daniel Henrique Barboza 
Signed-off-by: Nicholas Piggin 
---
 target/ppc/mmu-radix64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index d4e16bd7db..df2fec80ce 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -556,13 +556,13 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr 
eaddr,
 } else {
 if (!ppc64_v3_get_pate(cpu, lpid, &pate)) {
 if (guest_visible) {
-ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE);
+ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, 
DSISR_R_BADCONFIG);
 }
 return false;
 }
 if (!validate_pate(cpu, lpid, &pate)) {
 if (guest_visible) {
-ppc_radix64_raise_si(cpu, access_type, eaddr, 
DSISR_R_BADCONFIG);
+ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, 
DSISR_R_BADCONFIG);
 }
 return false;
 }
-- 
2.23.0

[PATCH v2 5/9] target/ppc: make vhyp get_pate method take lpid and return success

2022-02-16 Thread Nicholas Piggin

In prepartion for implementing a full partition table option for
vhyp, update the get_pate method to take an lpid and return a
success/fail indicator.

The spapr implementation currently just asserts lpid is always 0
and always return success.

Reviewed-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/spapr.c   | 7 ++-
 target/ppc/cpu.h | 3 ++-
 target/ppc/mmu-radix64.c | 7 ++-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index fd7eccbdfd..2c95a09d25 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1309,13 +1309,18 @@ void spapr_set_all_lpcrs(target_ulong value, 
target_ulong mask)
 }
 }
 
-static void spapr_get_pate(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry)
+static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu,
+   target_ulong lpid, ppc_v3_pate_t *entry)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
 
+assert(lpid == 0);
+
 /* Copy PATE1:GR into PATE0:HR */
 entry->dw0 = spapr->patb_entry & PATE0_HR;
 entry->dw1 = spapr->patb_entry;
+
+return true;
 }
 
 #define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 555c6b9245..c79ae74f10 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1320,7 +1320,8 @@ struct PPCVirtualHypervisorClass {
 hwaddr ptex, int n);
 void (*hpte_set_c)(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte1);
 void (*hpte_set_r)(PPCVirtualHypervisor *vhyp, hwaddr ptex, uint64_t pte1);
-void (*get_pate)(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry);
+bool (*get_pate)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu,
+ target_ulong lpid, ppc_v3_pate_t *entry);
 target_ulong (*encode_hpt_for_kvm_pr)(PPCVirtualHypervisor *vhyp);
 void (*cpu_exec_enter)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu);
 void (*cpu_exec_exit)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu);
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 5535f0fe20..3b6d75a292 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -563,7 +563,12 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr 
eaddr,
 if (cpu->vhyp) {
 PPCVirtualHypervisorClass *vhc;
 vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
-vhc->get_pate(cpu->vhyp, &pate);
+if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) {
+if (guest_visible) {
+ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, 
DSISR_R_BADCONFIG);
+}
+return false;
+}
 } else {
 if (!ppc64_v3_get_pate(cpu, lpid, &pate)) {
 if (guest_visible) {
-- 
2.23.0

[PATCH v2 8/9] target/ppc: Introduce a vhyp framework for nested HV support

2022-02-16 Thread Nicholas Piggin

Introduce virtual hypervisor methods that can support a "Nested KVM HV"
implementation using the bare metal 2-level radix MMU, and using HV
exceptions to return from H_ENTER_NESTED (rather than cause interrupts).

HV exceptions can now be raised in the TCG spapr machine when running a
nested KVM HV guest. The main ones are the lev==1 syscall, the hdecr,
hdsi and hisi, hv fu, and hv emu, and h_virt external interrupts.

HV exceptions are intercepted in the exception handler code and instead
of causing interrupts in the guest and switching the machine to HV mode,
they go to the vhyp where it may exit the H_ENTER_NESTED hcall with the
interrupt vector numer as return value as required by the hcall API.

Address translation is provided by the 2-level page table walker that is
implemented for the bare metal radix MMU. The partition scope page table
is pointed to the L1's partition scope by the get_pate vhc method.

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/pegasos2.c|  6 
 hw/ppc/spapr.c   |  6 
 target/ppc/cpu.h |  7 +
 target/ppc/excp_helper.c | 64 +---
 target/ppc/mmu-radix64.c | 11 +--
 5 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
index 298e6b93e2..d45008ac71 100644
--- a/hw/ppc/pegasos2.c
+++ b/hw/ppc/pegasos2.c
@@ -449,6 +449,11 @@ static target_ulong pegasos2_rtas(PowerPCCPU *cpu, 
Pegasos2MachineState *pm,
 }
 }
 
+static bool pegasos2_cpu_in_nested(PowerPCCPU *cpu)
+{
+return false;
+}
+
 static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
 {
 Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp);
@@ -504,6 +509,7 @@ static void pegasos2_machine_class_init(ObjectClass *oc, 
void *data)
 mc->default_ram_id = "pegasos2.ram";
 mc->default_ram_size = 512 * MiB;
 
+vhc->cpu_in_nested = pegasos2_cpu_in_nested;
 vhc->hypercall = pegasos2_hypercall;
 vhc->cpu_exec_enter = vhyp_nop;
 vhc->cpu_exec_exit = vhyp_nop;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2c95a09d25..6fab70767f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4470,6 +4470,11 @@ PowerPCCPU *spapr_find_cpu(int vcpu_id)
 return NULL;
 }
 
+static bool spapr_cpu_in_nested(PowerPCCPU *cpu)
+{
+return false;
+}
+
 static void spapr_cpu_exec_enter(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
 {
 SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
@@ -4578,6 +4583,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 fwc->get_dev_path = spapr_get_fw_dev_path;
 nc->nmi_monitor_handler = spapr_nmi;
 smc->phb_placement = spapr_phb_placement;
+vhc->cpu_in_nested = spapr_cpu_in_nested;
 vhc->hypercall = emulate_spapr_hypercall;
 vhc->hpt_mask = spapr_hpt_mask;
 vhc->map_hptes = spapr_map_hptes;
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c79ae74f10..2baa750729 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1311,6 +1311,8 @@ PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass 
*pcc);
 #ifndef CONFIG_USER_ONLY
 struct PPCVirtualHypervisorClass {
 InterfaceClass parent;
+bool (*cpu_in_nested)(PowerPCCPU *cpu);
+void (*deliver_hv_excp)(PowerPCCPU *cpu, int excp);
 void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu);
 hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp);
 const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp,
@@ -1330,6 +1332,11 @@ struct PPCVirtualHypervisorClass {
 #define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor"
 DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass,
  PPC_VIRTUAL_HYPERVISOR, TYPE_PPC_VIRTUAL_HYPERVISOR)
+
+static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu)
+{
+return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu);
+}
 #endif /* CONFIG_USER_ONLY */
 
 void ppc_cpu_dump_state(CPUState *cpu, FILE *f, int flags);
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 778eb4f3b0..a78d06d648 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1279,6 +1279,18 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
 powerpc_set_excp_state(cpu, vector, new_msr);
 }
 
+/*
+ * When running a nested HV guest under vhyp, external interrupts are
+ * delivered as HVIRT.
+ */
+static bool books_vhyp_promotes_external_to_hvirt(PowerPCCPU *cpu)
+{
+if (cpu->vhyp) {
+return vhyp_cpu_in_nested(cpu);
+}
+return false;
+}
+
 #ifdef TARGET_PPC64
 /*
  * When running under vhyp, hcalls are always intercepted and sent to the
@@ -1287,7 +1299,21 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
 static bool books_vhyp_handles_hcall(PowerPCCPU *cpu)
 {
 if (cpu->vhyp) {
-return true;
+return !vhyp_cpu_in_nested(cpu);
+}
+return false;
+}
+
+/*

[PATCH v2 3/9] ppc: allow the hdecr timer to be created/destroyed

2022-02-16 Thread Nicholas Piggin

Machines which don't emulate the HDEC facility are able to use the
timer for something else. Provide functions to start and stop the
hdecr timer.

Signed-off-by: Nicholas Piggin 
---
 hw/ppc/ppc.c | 21 +
 include/hw/ppc/ppc.h |  3 +++
 2 files changed, 24 insertions(+)

diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index c6dfc5975f..ad64015551 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -1083,6 +1083,27 @@ clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t 
freq)
 return &cpu_ppc_set_tb_clk;
 }
 
+/* cpu_ppc_hdecr_init may be used if the timer is not used by HDEC emulation */
+void cpu_ppc_hdecr_init(CPUPPCState *env)
+{
+PowerPCCPU *cpu = env_archcpu(env);
+
+assert(env->tb_env->hdecr_timer == NULL);
+
+env->tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 
&cpu_ppc_hdecr_cb,
+ cpu);
+}
+
+void cpu_ppc_hdecr_exit(CPUPPCState *env)
+{
+PowerPCCPU *cpu = env_archcpu(env);
+
+timer_free(env->tb_env->hdecr_timer);
+env->tb_env->hdecr_timer = NULL;
+
+cpu_ppc_hdecr_lower(cpu);
+}
+
 /*/
 /* PowerPC 40x timers */
 
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 93e614cffd..b0ba4bd6b9 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -54,6 +54,9 @@ struct ppc_tb_t {
 
 uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset);
 clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq);
+void cpu_ppc_hdecr_init(CPUPPCState *env);
+void cpu_ppc_hdecr_exit(CPUPPCState *env);
+
 /* Embedded PowerPC DCR management */
 typedef uint32_t (*dcr_read_cb)(void *opaque, int dcrn);
 typedef void (*dcr_write_cb)(void *opaque, int dcrn, uint32_t val);
-- 
2.23.0

[PATCH v2 6/9] target/ppc: add helper for books vhyp hypercall handler

2022-02-16 Thread Nicholas Piggin

The virtual hypervisor currently always intercepts and handles
hypercalls but with a future change this will not always be the case.

Add a helper for the test so the logic is abstracted from the mechanism.

Reviewed-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 
---
 target/ppc/excp_helper.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index fcc83a7701..6b6ec71bc2 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1278,6 +1278,18 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
 }
 
 #ifdef TARGET_PPC64
+/*
+ * When running under vhyp, hcalls are always intercepted and sent to the
+ * vhc->hypercall handler.
+ */
+static bool books_vhyp_handles_hcall(PowerPCCPU *cpu)
+{
+if (cpu->vhyp) {
+return true;
+}
+return false;
+}
+
 static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 {
 CPUState *cs = CPU(cpu);
@@ -1439,7 +1451,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 env->nip += 4;
 
 /* "PAPR mode" built-in hypercall emulation */
-if ((lev == 1) && cpu->vhyp) {
+if ((lev == 1) && books_vhyp_handles_hcall(cpu)) {
 PPCVirtualHypervisorClass *vhc =
 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 vhc->hypercall(cpu->vhyp, cpu);
-- 
2.23.0

[PATCH v2 9/9] spapr: implement nested-hv capability for the virtual hypervisor

2022-02-16 Thread Nicholas Piggin

This implements the Nested KVM HV hcall API for spapr under TCG.

The L2 is switched in when the H_ENTER_NESTED hcall is made, and the
L1 is switched back in returned from the hcall when a HV exception
is sent to the vhyp. Register state is copied in and out according to
the nested KVM HV hcall API specification.

The hdecr timer is started when the L2 is switched in, and it provides
the HDEC / 0x980 return to L1.

The MMU re-uses the bare metal radix 2-level page table walker by
using the get_pate method to point the MMU to the nested partition
table entry. MMU faults due to partition scope errors raise HV
exceptions and accordingly are routed back to the L1.

The MMU does not tag translations for the L1 (direct) vs L2 (nested)
guests, so the TLB is flushed on any L1<->L2 transition (hcall entry
and exit).

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/spapr.c  |  37 +++-
 hw/ppc/spapr_caps.c |  14 +-
 hw/ppc/spapr_hcall.c| 333 
 include/hw/ppc/spapr.h  |  74 ++-
 include/hw/ppc/spapr_cpu_core.h |   5 +
 5 files changed, 452 insertions(+), 11 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6fab70767f..87e68da77f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1270,6 +1270,8 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor 
*vhyp,
 /* The TCG path should also be holding the BQL at this point */
 g_assert(qemu_mutex_iothread_locked());
 
+g_assert(!vhyp_cpu_in_nested(cpu));
+
 if (msr_pr) {
 hcall_dprintf("Hypercall made with MSR[PR]=1\n");
 env->gpr[3] = H_PRIVILEGE;
@@ -1313,12 +1315,34 @@ static bool spapr_get_pate(PPCVirtualHypervisor *vhyp, 
PowerPCCPU *cpu,
target_ulong lpid, ppc_v3_pate_t *entry)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 
-assert(lpid == 0);
+if (!spapr_cpu->in_nested) {
+assert(lpid == 0);
 
-/* Copy PATE1:GR into PATE0:HR */
-entry->dw0 = spapr->patb_entry & PATE0_HR;
-entry->dw1 = spapr->patb_entry;
+/* Copy PATE1:GR into PATE0:HR */
+entry->dw0 = spapr->patb_entry & PATE0_HR;
+entry->dw1 = spapr->patb_entry;
+
+} else {
+uint64_t patb, pats;
+
+assert(lpid != 0);
+
+patb = spapr->nested_ptcr & PTCR_PATB;
+pats = spapr->nested_ptcr & PTCR_PATS;
+
+/* Calculate number of entries */
+pats = 1ull << (pats + 12 - 4);
+if (pats <= lpid) {
+return false;
+}
+
+/* Grab entry */
+patb += 16 * lpid;
+entry->dw0 = ldq_phys(CPU(cpu)->as, patb);
+entry->dw1 = ldq_phys(CPU(cpu)->as, patb + 8);
+}
 
 return true;
 }
@@ -4472,7 +4496,9 @@ PowerPCCPU *spapr_find_cpu(int vcpu_id)
 
 static bool spapr_cpu_in_nested(PowerPCCPU *cpu)
 {
-return false;
+SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+return spapr_cpu->in_nested;
 }
 
 static void spapr_cpu_exec_enter(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
@@ -4584,6 +4610,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 nc->nmi_monitor_handler = spapr_nmi;
 smc->phb_placement = spapr_phb_placement;
 vhc->cpu_in_nested = spapr_cpu_in_nested;
+vhc->deliver_hv_excp = spapr_exit_nested;
 vhc->hypercall = emulate_spapr_hypercall;
 vhc->hpt_mask = spapr_hpt_mask;
 vhc->map_hptes = spapr_map_hptes;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index e2412aaa57..6d74345930 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -444,19 +444,23 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState 
*spapr,
 {
 ERRP_GUARD();
 PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+CPUPPCState *env = &cpu->env;
 
 if (!val) {
 /* capability disabled by default */
 return;
 }
 
-if (tcg_enabled()) {
-error_setg(errp, "No Nested KVM-HV support in TCG");
+if (!(env->insns_flags2 & PPC2_ISA300)) {
+error_setg(errp, "Nested-HV only supported on POWER9 and later");
 error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n");
-} else if (kvm_enabled()) {
+return;
+}
+
+if (kvm_enabled()) {
 if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
   spapr->max_compat_pvr)) {
-error_setg(errp, "Nested KVM-HV only supported on POWER9");
+error_setg(errp, "Nested-HV only supported on POWER9 and later");
 error_append_hint(errp,
   "Try appending -machine 
max-cpu-compat=power9\n");
 return;
@@ -464,7 +468,7 @@ static void cap_nested_kvm_

[PATCH v2 7/9] target/ppc: Add powerpc_reset_excp_state helper

2022-02-16 Thread Nicholas Piggin

This moves the logic to reset the QEMU exception state into its own
function.

Reviewed-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 
---
 target/ppc/excp_helper.c | 41 
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 6b6ec71bc2..778eb4f3b0 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -360,12 +360,20 @@ static void ppc_excp_apply_ail(PowerPCCPU *cpu, int excp, 
target_ulong msr,
 }
 #endif
 
-static void powerpc_set_excp_state(PowerPCCPU *cpu,
-  target_ulong vector, target_ulong 
msr)
+static void powerpc_reset_excp_state(PowerPCCPU *cpu)
 {
 CPUState *cs = CPU(cpu);
 CPUPPCState *env = &cpu->env;
 
+/* Reset exception state */
+cs->exception_index = POWERPC_EXCP_NONE;
+env->error_code = 0;
+}
+
+static void powerpc_set_excp_state(PowerPCCPU *cpu, target_ulong vector, 
target_ulong msr)
+{
+CPUPPCState *env = &cpu->env;
+
 assert((msr & env->msr_mask) == msr);
 
 /*
@@ -376,21 +384,20 @@ static void powerpc_set_excp_state(PowerPCCPU *cpu,
  * will prevent setting of the HV bit which some exceptions might need
  * to do.
  */
+env->nip = vector;
 env->msr = msr;
 hreg_compute_hflags(env);
-env->nip = vector;
-/* Reset exception state */
-cs->exception_index = POWERPC_EXCP_NONE;
-env->error_code = 0;
 
-/* Reset the reservation */
-env->reserve_addr = -1;
+powerpc_reset_excp_state(cpu);
 
 /*
  * Any interrupt is context synchronizing, check if TCG TLB needs
  * a delayed flush on ppc64
  */
 check_tlb_flush(env, false);
+
+/* Reset the reservation */
+env->reserve_addr = -1;
 }
 
 static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
@@ -471,8 +478,7 @@ static void powerpc_excp_40x(PowerPCCPU *cpu, int excp)
 case POWERPC_EXCP_FP:
 if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
 trace_ppc_excp_fp_ignore();
-cs->exception_index = POWERPC_EXCP_NONE;
-env->error_code = 0;
+powerpc_reset_excp_state(cpu);
 return;
 }
 env->spr[SPR_40x_ESR] = ESR_FP;
@@ -609,8 +615,7 @@ static void powerpc_excp_6xx(PowerPCCPU *cpu, int excp)
 case POWERPC_EXCP_FP:
 if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
 trace_ppc_excp_fp_ignore();
-cs->exception_index = POWERPC_EXCP_NONE;
-env->error_code = 0;
+powerpc_reset_excp_state(cpu);
 return;
 }
 
@@ -783,8 +788,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp)
 case POWERPC_EXCP_FP:
 if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
 trace_ppc_excp_fp_ignore();
-cs->exception_index = POWERPC_EXCP_NONE;
-env->error_code = 0;
+powerpc_reset_excp_state(cpu);
 return;
 }
 
@@ -969,8 +973,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp)
 case POWERPC_EXCP_FP:
 if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
 trace_ppc_excp_fp_ignore();
-cs->exception_index = POWERPC_EXCP_NONE;
-env->error_code = 0;
+powerpc_reset_excp_state(cpu);
 return;
 }
 
@@ -1168,8 +1171,7 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
 case POWERPC_EXCP_FP:
 if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
 trace_ppc_excp_fp_ignore();
-cs->exception_index = POWERPC_EXCP_NONE;
-env->error_code = 0;
+powerpc_reset_excp_state(cpu);
 return;
 }
 
@@ -1406,8 +1408,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
 case POWERPC_EXCP_FP:
 if ((msr_fe0 == 0 && msr_fe1 == 0) || msr_fp == 0) {
 trace_ppc_excp_fp_ignore();
-cs->exception_index = POWERPC_EXCP_NONE;
-env->error_code = 0;
+powerpc_reset_excp_state(cpu);
 return;
 }
 
-- 
2.23.0

Re: [PATCH v2 9/9] spapr: implement nested-hv capability for the virtual hypervisor

2022-02-16 Thread Nicholas Piggin

Excerpts from Cédric Le Goater's message of February 16, 2022 8:52 pm:
> On 2/16/22 11:25, Nicholas Piggin wrote:
>> This implements the Nested KVM HV hcall API for spapr under TCG.
>> 
>> The L2 is switched in when the H_ENTER_NESTED hcall is made, and the
>> L1 is switched back in returned from the hcall when a HV exception
>> is sent to the vhyp. Register state is copied in and out according to
>> the nested KVM HV hcall API specification.
>> 
>> The hdecr timer is started when the L2 is switched in, and it provides
>> the HDEC / 0x980 return to L1.
>> 
>> The MMU re-uses the bare metal radix 2-level page table walker by
>> using the get_pate method to point the MMU to the nested partition
>> table entry. MMU faults due to partition scope errors raise HV
>> exceptions and accordingly are routed back to the L1.
>> 
>> The MMU does not tag translations for the L1 (direct) vs L2 (nested)
>> guests, so the TLB is flushed on any L1<->L2 transition (hcall entry
>> and exit).>
>> Reviewed-by: Fabiano Rosas 
>> Signed-off-by: Nicholas Piggin 
> 
> Reviewed-by: Cédric Le Goater 
> 
> Some last comments below,

[...]

>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index edbf3eeed0..852fe61b36 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -199,6 +199,9 @@ struct SpaprMachineState {
>>   bool has_graphics;
>>   uint32_t vsmt;   /* Virtual SMT mode (KVM's "core stride") */
>>   
>> +/* Nested HV support (TCG only) */
>> +uint64_t nested_ptcr;
>> +
> 
> this is new state to migrate.
> 

[...]

>> +/* Linux 64-bit powerpc pt_regs struct, used by nested HV */
>> +struct kvmppc_pt_regs {
>> +uint64_t gpr[32];
>> +uint64_t nip;
>> +uint64_t msr;
>> +uint64_t orig_gpr3;/* Used for restarting system calls */
>> +uint64_t ctr;
>> +uint64_t link;
>> +uint64_t xer;
>> +uint64_t ccr;
>> +uint64_t softe;/* Soft enabled/disabled */
>> +uint64_t trap; /* Reason for being here */
>> +uint64_t dar;  /* Fault registers */
>> +uint64_t dsisr;/* on 4xx/Book-E used for ESR */
>> +uint64_t result;   /* Result of a system call */
>> +};
> 
> I think we need to start moving all the spapr hcall definitions under
> spapr_hcall.h. It can come later.

Sure.

[...]

>> diff --git a/include/hw/ppc/spapr_cpu_core.h 
>> b/include/hw/ppc/spapr_cpu_core.h
>> index dab3dfc76c..b560514560 100644
>> --- a/include/hw/ppc/spapr_cpu_core.h
>> +++ b/include/hw/ppc/spapr_cpu_core.h
>> @@ -48,6 +48,11 @@ typedef struct SpaprCpuState {
>>   bool prod; /* not migrated, only used to improve dispatch latencies */
>>   struct ICPState *icp;
>>   struct XiveTCTX *tctx;
>> +
>> +/* Fields for nested-HV support */
>> +bool in_nested; /* true while the L2 is executing */
>> +CPUPPCState *nested_host_state; /* holds the L1 state while L2 executes 
>> */
>> +int64_t nested_tb_offset; /* L1->L2 TB offset */
> 
> This needs a new vmstate.

How about instead of the vmstate (we would need all the L1 state in
nested_host_state as well), we just add a migration blocker in the
L2 entry path. We could limit the max hdecr to say 1 second to
ensure it unblocks before long.

I know migration blockers are not preferred but in this case it gives
us some iterations to debug and optimise first, which might change
the data to migrate.

Thanks,
Nick

Re: [PATCH v2 9/9] spapr: implement nested-hv capability for the virtual hypervisor

2022-02-16 Thread Nicholas Piggin

Excerpts from Nicholas Piggin's message of February 16, 2022 9:38 pm:
> Excerpts from Cédric Le Goater's message of February 16, 2022 8:52 pm:
>> On 2/16/22 11:25, Nicholas Piggin wrote:
>>> This implements the Nested KVM HV hcall API for spapr under TCG.
>>> 
>>> The L2 is switched in when the H_ENTER_NESTED hcall is made, and the
>>> L1 is switched back in returned from the hcall when a HV exception
>>> is sent to the vhyp. Register state is copied in and out according to
>>> the nested KVM HV hcall API specification.
>>> 
>>> The hdecr timer is started when the L2 is switched in, and it provides
>>> the HDEC / 0x980 return to L1.
>>> 
>>> The MMU re-uses the bare metal radix 2-level page table walker by
>>> using the get_pate method to point the MMU to the nested partition
>>> table entry. MMU faults due to partition scope errors raise HV
>>> exceptions and accordingly are routed back to the L1.
>>> 
>>> The MMU does not tag translations for the L1 (direct) vs L2 (nested)
>>> guests, so the TLB is flushed on any L1<->L2 transition (hcall entry
>>> and exit).>
>>> Reviewed-by: Fabiano Rosas 
>>> Signed-off-by: Nicholas Piggin 
>> 
>> Reviewed-by: Cédric Le Goater 
>> 
>> Some last comments below,
> 
> [...]
> 
>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>> index edbf3eeed0..852fe61b36 100644
>>> --- a/include/hw/ppc/spapr.h
>>> +++ b/include/hw/ppc/spapr.h
>>> @@ -199,6 +199,9 @@ struct SpaprMachineState {
>>>   bool has_graphics;
>>>   uint32_t vsmt;   /* Virtual SMT mode (KVM's "core stride") */
>>>   
>>> +/* Nested HV support (TCG only) */
>>> +uint64_t nested_ptcr;
>>> +
>> 
>> this is new state to migrate.
>> 
> 
> [...]
> 
>>> +/* Linux 64-bit powerpc pt_regs struct, used by nested HV */
>>> +struct kvmppc_pt_regs {
>>> +uint64_t gpr[32];
>>> +uint64_t nip;
>>> +uint64_t msr;
>>> +uint64_t orig_gpr3;/* Used for restarting system calls */
>>> +uint64_t ctr;
>>> +uint64_t link;
>>> +uint64_t xer;
>>> +uint64_t ccr;
>>> +uint64_t softe;/* Soft enabled/disabled */
>>> +uint64_t trap; /* Reason for being here */
>>> +uint64_t dar;  /* Fault registers */
>>> +uint64_t dsisr;/* on 4xx/Book-E used for ESR */
>>> +uint64_t result;   /* Result of a system call */
>>> +};
>> 
>> I think we need to start moving all the spapr hcall definitions under
>> spapr_hcall.h. It can come later.
> 
> Sure.
> 
> [...]
> 
>>> diff --git a/include/hw/ppc/spapr_cpu_core.h 
>>> b/include/hw/ppc/spapr_cpu_core.h
>>> index dab3dfc76c..b560514560 100644
>>> --- a/include/hw/ppc/spapr_cpu_core.h
>>> +++ b/include/hw/ppc/spapr_cpu_core.h
>>> @@ -48,6 +48,11 @@ typedef struct SpaprCpuState {
>>>   bool prod; /* not migrated, only used to improve dispatch latencies */
>>>   struct ICPState *icp;
>>>   struct XiveTCTX *tctx;
>>> +
>>> +/* Fields for nested-HV support */
>>> +bool in_nested; /* true while the L2 is executing */
>>> +CPUPPCState *nested_host_state; /* holds the L1 state while L2 
>>> executes */
>>> +int64_t nested_tb_offset; /* L1->L2 TB offset */
>> 
>> This needs a new vmstate.
> 
> How about instead of the vmstate (we would need all the L1 state in
> nested_host_state as well), we just add a migration blocker in the
> L2 entry path. We could limit the max hdecr to say 1 second to
> ensure it unblocks before long.
> 
> I know migration blockers are not preferred but in this case it gives
> us some iterations to debug and optimise first, which might change
> the data to migrate.

This should be roughly the incremental patch to do this.

Thanks,
Nick

--
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 87e68da77f..14e41b7d31 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2882,6 +2882,13 @@ static void spapr_machine_init(MachineState *machine)
 "may run and log hardware error on the destination");
 }
 
+if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV) == SPAPR_CAP_ON) {
+/* Create the error string for live migration blocker */
+error_setg(&spapr->nested_hv_migration_blocker,
+"A nested-hv L2 guest is running. Migration is blocked until it "
+

Re: [RFC PATCH v3 1/5] ppc64: Add semihosting support

2022-04-27 Thread Nicholas Piggin

Excerpts from Nicholas Piggin's message of April 21, 2022 12:04 pm:
> Excerpts from Leandro Lupori's message of April 21, 2022 4:09 am:
>> On 4/18/22 17:22, Cédric Le Goater wrote:
>>> On 4/18/22 21:10, Leandro Lupori wrote:
 Add semihosting support for PPC64. This implementation is
 based on the standard for ARM semihosting version 2.0, as
 implemented by QEMU and documented in

  https://github.com/ARM-software/abi-aa/releases

 The PPC64 specific differences are the following:

 Semihosting Trap Instruction: sc 7
 Operation Number Register: r3
 Parameter Register: r4
 Return Register: r3
 Data block field size: 64 bits
>>> 
>>> 'sc' is a good way to implement semi hosting but we should make sure
>>> that it is not colliding with future extensions, at least with the
>>> next POWERPC processor. Is that the case ? if not, then the lev could
>>> be reserved.
>>> 
>> 
>> Power ISA 3.1B says that LEV values greater that 2 are reserved.
>> Level 2 is the ultravisor, so I assumed that level 7 was far enough from 
>> current max level. I don't know if POWER11 will introduce new privilege 
>> levels. Is this info publicly available somewhere? Or do you have a 
>> better level in mind to use instead?
> 
> It's not available but there are no plans to use LEV=7.
> 
> It would be fine in practice I think, but it's kind of ugly and not 
> great precedent -- how would we find out all the projects which use 
> reserved instructions or values for something? Nominally the onus is on 
> the software to accept breakage, but in reality important software that
> breaks causes a headache for the ISA.
> 
> IBM's systemsim emulator actually has an instruction to call out to the 
> emulator to do various things like IO. It uses the opcode
> 
>   .long 0x000eaeb0
> 
> That is the primary op 0 reserved space, and there is actually another 
> op 'attn' or 'sp_attn' there which IBM CPUs implement, it is similar in 
> spirit (it calls out to the service processor and/or chip error handling 
> system to deal with a condition out-of-band). You don't want to use attn 
> here because the core under emulation might implement it, I'm just 
> noting the precedent with similar functionality under this primary 
> opcode.
> 
> So I think the systemsim emulator instruction should be a good choice. 
> But it should really be documented. I will bring this up at the Open 
> Power ISA working group meeting next week and see what the options are 
> with getting it formally allocated for semihosting emulators (or what 
> the alternatives are).

Update on the ISA TWG meeting

Semihosting was well received, the idea is not so new so I think it was
easily understood by attendees.

There were no objections to allocating a new opcode for this purpose.
The preference was a new opcode rather than using a reserved sc LEV
value.

The primary opcode 0 space is possibly unsuitable because it is said
to be "allocated to specific purposes that are outside the scope of the
Power ISA." whereas I think we want a first class instruction for this,
it may have implementation-dependent behaviour but on processors that
do not implement it, we would like it to have well-defined behaviour.

So we can probably just pick an opcode and submit a patch RFC to the
ISA (I can try help with that). First, there are a few questions to
resolve:

- What behaviour do we want for CPUs which do not implement it or
  disable it? E.g., no-op or illegal instruction interrupt. Ideally
  we would choose an opcode such that the architecture is compatible
  with existing CPUs.

- Would it be useful for KVM to implement semihosting support for
  guests on hard processors?

- Is there value in an endian-agnostic instruction? (Assuming we can
  find one). This question only comes to me because our BMC gdbserver
  for debugging the host CPUs implements breakpoints by inserting an
  'attn' instruction in the host code, and that does not work if the
  host switches endian. Any possibility the semihosting instruction
  would ever be injected out-of-band? Seems not so likely.

There were also some thoughts about bringing the semihosting spec
under the Open Power group but that's outside the scope of the ISA
group. This may be a possibility we could consider but I think for
now it should be enough to document it like riscv and put it
somewhere (even in the QEMU tree should be okay for now IMO).

Thanks,
Nick

[RFC PATCH 3/4] tcg/ppc: Optimize memory ordering generation with lwsync

2022-05-03 Thread Nicholas Piggin

lwsync orders more than just LD_LD, importantly it matches x86 and
s390 default memory ordering.

Signed-off-by: Nicholas Piggin 
---
 target/ppc/cpu.h | 2 ++
 tcg/ppc/tcg-target.c.inc | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c2b6c987c0..0b0e9761cd 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -28,6 +28,8 @@
 
 #define TCG_GUEST_DEFAULT_MO 0
 
+#define PPC_LWSYNC_MO (TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST)
+
 #define TARGET_PAGE_BITS_64K 16
 #define TARGET_PAGE_BITS_16M 24
 
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 3ff845d063..b87fc2383e 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -1834,7 +1834,7 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
 {
 uint32_t insn = HWSYNC;
 a0 &= TCG_MO_ALL;
-if (a0 == TCG_MO_LD_LD) {
+if ((a0 & PPC_LWSYNC_MO) == a0) {
 insn = LWSYNC;
 }
 tcg_out32(s, insn);
-- 
2.35.1

[RFC PATCH 1/4] target/ppc: Fix eieio memory ordering semantics

2022-05-03 Thread Nicholas Piggin

The generated eieio memory ordering semantics do not match the
instruction definition in the architecture. Add a big comment to
explain this strange instruction and correct the memory ordering
behaviour.

Signed-off: Nicholas Piggin 
---
 target/ppc/translate.c | 26 +-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index fa34f81c30..abb8807180 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -3513,7 +3513,31 @@ static void gen_stswx(DisasContext *ctx)
 /* eieio */
 static void gen_eieio(DisasContext *ctx)
 {
-TCGBar bar = TCG_MO_LD_ST;
+TCGBar bar = TCG_MO_ALL;
+
+/*
+ * eieio has complex semanitcs. It provides memory ordering between
+ * operations in the set:
+ * - loads from CI memory.
+ * - stores to CI memory.
+ * - stores to WT memory.
+ *
+ * It separately also orders memory for operations in the set:
+ * - stores to cacheble memory.
+ *
+ * It also serializes instructions:
+ * - dcbt and dcbst.
+ *
+ * It separately serializes:
+ * - tlbie and tlbsync.
+ *
+ * And separately serializes:
+ * - slbieg, slbiag, and slbsync.
+ *
+ * The end result is that CI memory ordering requires TCG_MO_ALL
+ * and it is not possible to special-case more relaxed ordering for
+ * cacheable accesses. TCG_BAR_SC is required to provide the serialization.
+ */
 
 /*
  * POWER9 has a eieio instruction variant using bit 6 as a hint to
-- 
2.35.1

[RFC PATCH 2/4] tcg/ppc: ST_ST memory ordering is not provided with eieio

2022-05-03 Thread Nicholas Piggin

eieio does not provide ordering between stores to CI memory and stores
to cacheable memory so it can't be used as a general ST_ST barrier.

Signed-of-by: Nicholas Piggin 
---
 tcg/ppc/tcg-target.c.inc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index cfcd121f9c..3ff845d063 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -1836,8 +1836,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
 a0 &= TCG_MO_ALL;
 if (a0 == TCG_MO_LD_LD) {
 insn = LWSYNC;
-} else if (a0 == TCG_MO_ST_ST) {
-insn = EIEIO;
 }
 tcg_out32(s, insn);
 }
-- 
2.35.1

[RFC PATCH 4/4] target/ppc: Implement lwsync with weaker memory ordering

2022-05-03 Thread Nicholas Piggin

This allows an x86 host to no-op lwsyncs, and ppc host can use lwsync
rather than sync.

Signed-off-by: Nicholas Piggin 
---
 target/ppc/cpu.h   |  4 +++-
 target/ppc/cpu_init.c  | 13 +++--
 target/ppc/machine.c   |  3 ++-
 target/ppc/translate.c |  8 +++-
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 0b0e9761cd..bf5f226567 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -2287,6 +2287,8 @@ enum {
 PPC2_ISA300= 0x0008ULL,
 /* POWER ISA 3.1 */
 PPC2_ISA310= 0x0010ULL,
+/*   lwsync instruction  */
+PPC2_MEM_LWSYNC= 0x0020ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
 PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
@@ -2295,7 +2297,7 @@ enum {
 PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
 PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
 PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \
-PPC2_ISA300 | PPC2_ISA310)
+PPC2_ISA300 | PPC2_ISA310 | PPC2_MEM_LWSYNC)
 };
 
 /*/
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index d42e2ba8e0..26d9277ffb 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5769,7 +5769,7 @@ POWERPC_FAMILY(970)(ObjectClass *oc, void *data)
PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
PPC_64B | PPC_ALTIVEC |
PPC_SEGMENT_64B | PPC_SLBI;
-pcc->insns_flags2 = PPC2_FP_CVT_S64;
+pcc->insns_flags2 = PPC2_FP_CVT_S64 | PPC2_MEM_LWSYNC;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_VR) |
 (1ull << MSR_POW) |
@@ -5846,7 +5846,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data)
PPC_64B |
PPC_POPCNTB |
PPC_SEGMENT_64B | PPC_SLBI;
-pcc->insns_flags2 = PPC2_FP_CVT_S64;
+pcc->insns_flags2 = PPC2_FP_CVT_S64 | PPC2_MEM_LWSYNC;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_VR) |
 (1ull << MSR_POW) |
@@ -5984,7 +5984,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64 |
-PPC2_PM_ISA206;
+PPC2_PM_ISA206 | PPC2_MEM_LWSYNC;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_VR) |
 (1ull << MSR_VSX) |
@@ -6157,7 +6157,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-PPC2_TM | PPC2_PM_ISA206;
+PPC2_TM | PPC2_PM_ISA206 | PPC2_MEM_LWSYNC;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
@@ -6375,7 +6375,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL;
+PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_MEM_LWSYNC;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
@@ -6590,7 +6590,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
-PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310;
+PPC2_TM | PPC2_ISA300 | PPC2_PRCNTL | PPC2_ISA310 |
+PPC2_MEM_LWSYNC;
 pcc->msr_mask = (1ull << MSR_SF) |
 (1ull << MSR_HV) |
 (1ull << MSR_TM) |
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index e673944597..33b3d6cf30 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -157,7 +157,8 @@ static int cpu_pre_save(void *opaque)
 | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206
 | PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA2

[PATCH] target/ppc: restore powerpc_excp_booke doorbell interrupts

2022-09-24 Thread Nicholas Piggin

This partially reverts commit 9dc20cc37db9 ("target/ppc: Simplify
powerpc_excp_booke"), which removed DOORI and DOORCI interrupts.
Without this patch, a -cpu e5500 -smp 2 machine booting Linux
crashes with:

  qemu: fatal: Invalid PowerPC exception 36. Aborting

Signed-off-by: Nicholas Piggin 
---
 target/ppc/excp_helper.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 214acf5ac4..43f2480e94 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1247,6 +1247,12 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp)
 case POWERPC_EXCP_SPEU:   /* SPE/embedded floating-point unavailable/VPU  
*/
 env->spr[SPR_BOOKE_ESR] = ESR_SPV;
 break;
+case POWERPC_EXCP_DOORI: /* Embedded doorbell interrupt  */
+break;
+case POWERPC_EXCP_DOORCI:/* Embedded doorbell critical interrupt */
+srr0 = SPR_BOOKE_CSRR0;
+srr1 = SPR_BOOKE_CSRR1;
+break;
 case POWERPC_EXCP_RESET: /* System reset exception   */
 if (FIELD_EX64(env->msr, MSR, POW)) {
 cpu_abort(cs, "Trying to deliver power-saving system reset "
-- 
2.37.2

Re: [PATCH RFC V3 13/29] arm/virt: Make ARM vCPU present status ACPI persistent

2024-07-03 Thread Nicholas Piggin

On Fri Jun 14, 2024 at 9:36 AM AEST, Salil Mehta wrote:
> ARM arch does not allow CPUs presence to be changed [1] after kernel has 
> booted.
> Hence, firmware/ACPI/Qemu must ensure persistent view of the vCPUs to the 
> Guest
> kernel even when they are not present in the QoM i.e. are unplugged or are
> yet-to-be-plugged

Do you need arch-independent state for this? If ARM always requires
it then can it be implemented between arm and acpi interface?

If not, then perhaps could it be done in the patch that introduces
all the other state?

> References:
> [1] Check comment 5 in the bugzilla entry
>Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5

If I understand correctly (and I don't know ACPI, so it's likely
I don't), that is and update to ACPI spec to say some bit in ACPI
table must remain set regardless of CPU hotplug state.

Reference links are good, I think it would be nice to add a small
summary in the changelog too.

Thanks,
Nick

>
> Signed-off-by: Salil Mehta 
> ---
>  cpu-common.c  |  6 ++
>  hw/arm/virt.c |  7 +++
>  include/hw/core/cpu.h | 21 +
>  3 files changed, 34 insertions(+)
>
> diff --git a/cpu-common.c b/cpu-common.c
> index 49d2a50835..e4b4dee99a 100644
> --- a/cpu-common.c
> +++ b/cpu-common.c
> @@ -128,6 +128,12 @@ bool qemu_enabled_cpu(CPUState *cpu)
>  return cpu && !cpu->disabled;
>  }
>  
> +bool qemu_persistent_cpu(CPUState *cpu)
> +{
> +/* cpu state can be faked to the guest via acpi */
> +return cpu && cpu->acpi_persistent;
> +}
> +
>  uint64_t qemu_get_cpu_archid(int cpu_index)
>  {
>  MachineState *ms = MACHINE(qdev_get_machine());
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 5f98162587..9d33f30a6a 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -3016,6 +3016,13 @@ static void virt_cpu_pre_plug(HotplugHandler 
> *hotplug_dev, DeviceState *dev,
>  return;
>  }
>  virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp);
> +
> +/*
> + * To give persistent presence view of vCPUs to the guest, ACPI might 
> need
> + * to fake the presence of the vCPUs to the guest but keep them disabled.
> + * This shall be used during the init of ACPI Hotplug state and 
> hot-unplug
> + */
> + cs->acpi_persistent = true;
>  }
>  
>  static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 62e68611c0..e13e542177 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -540,6 +540,14 @@ struct CPUState {
>   * every CPUState is enabled across all architectures.
>   */
>  bool disabled;
> +/*
> + * On certain architectures, to provide a persistent view of the 
> 'presence'
> + * of vCPUs to the guest, ACPI might need to fake the 'presence' of the
> + * vCPUs but keep them ACPI-disabled for the guest. This is achieved by
> + * returning `_STA.PRES=True` and `_STA.Ena=False` for the unplugged 
> vCPUs
> + * in QEMU QoM.
> + */
> +bool acpi_persistent;
>  
>  /* TODO Move common fields from CPUArchState here. */
>  int cpu_index;
> @@ -959,6 +967,19 @@ bool qemu_present_cpu(CPUState *cpu);
>   */
>  bool qemu_enabled_cpu(CPUState *cpu);
>  
> +/**
> + * qemu_persistent_cpu:
> + * @cpu: The vCPU to check
> + *
> + * Checks if the vCPU state should always be reflected as *present* via ACPI
> + * to the Guest. By default, this is False on all architectures and has to be
> + * explicity set during initialization.
> + *
> + * Returns: True if it is ACPI 'persistent' CPU
> + *
> + */
> +bool qemu_persistent_cpu(CPUState *cpu);
> +
>  /**
>   * qemu_get_cpu_archid:
>   * @cpu_index: possible vCPU for which arch-id needs to be retreived

Re: [PATCH V13 3/8] hw/acpi: Update ACPI GED framework to support vCPU Hotplug

2024-07-03 Thread Nicholas Piggin

On Fri Jun 7, 2024 at 9:56 PM AEST, Salil Mehta wrote:
> @@ -400,6 +411,12 @@ static void acpi_ged_initfn(Object *obj)
>  memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st,
>TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT);
>  sysbus_init_mmio(sbd, &ged_st->regs);
> +
> +memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container",
> +   ACPI_CPU_HOTPLUG_REG_LEN);
> +sysbus_init_mmio(sbd, &s->container_cpuhp);
> +cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev),
> +&s->cpuhp_state, 0);
>  }

Could the ACPI persistent presence ARM requires be a property of
the ACPI device?

Thanks,
Nick

Re: [PATCH RFC V3 08/29] arm/virt: Init PMU at host for all possible vcpus

2024-07-03 Thread Nicholas Piggin

On Fri Jun 14, 2024 at 9:36 AM AEST, Salil Mehta wrote:
> PMU for all possible vCPUs must be initialized at the VM initialization time.
> Refactor existing code to accomodate possible vCPUs. This also assumes that 
> all
> processor being used are identical.
>
> Past discussion for reference:
> Link: https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg00131.html

I guess it's something for the ARM people, but there's a lot of
information in there, could it be useful to summarise important
parts here, e.g., from Andrew:

 KVM requires all VCPUs to have a PMU if one does. If the ARM ARM
 says it's possible to have PMUs for only some CPUs, then, for TCG,
 the restriction could be relaxed.

(I assume he meant ARM arch)

>
> Co-developed-by: Salil Mehta 
> Signed-off-by: Salil Mehta 
> Co-developed-by: Keqian Zhu 
> Signed-off-by: Keqian Zhu 
> Signed-off-by: Salil Mehta 
> ---
>  hw/arm/virt.c | 12 
>  include/hw/arm/virt.h |  1 +
>  2 files changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index ac53bfadca..57ec429022 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2045,12 +2045,14 @@ static void finalize_gic_version(VirtMachineState 
> *vms)
>   */
>  static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
>  {
> +CPUArchIdList *possible_cpus = vms->parent.possible_cpus;
>  int max_cpus = MACHINE(vms)->smp.max_cpus;
> -bool aarch64, pmu, steal_time;
> +bool aarch64, steal_time;
>  CPUState *cpu;
> +int n;
>  
>  aarch64 = object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL);
> -pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL);
> +vms->pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL);
>  steal_time = object_property_get_bool(OBJECT(first_cpu),
>"kvm-steal-time", NULL);
>  
> @@ -2077,8 +2079,10 @@ static void virt_cpu_post_init(VirtMachineState *vms, 
> MemoryRegion *sysmem)
>  memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime);
>  }
>  
> -CPU_FOREACH(cpu) {
> -if (pmu) {
> +for (n = 0; n < possible_cpus->len; n++) {
> +cpu = qemu_get_possible_cpu(n);
> +

Maybe a CPU_FOREACH_POSSIBLE()?

Thanks,
Nick

> +if (vms->pmu) {
>  assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU));
>  if (kvm_irqchip_in_kernel()) {
>  kvm_arm_pmu_set_irq(ARM_CPU(cpu), VIRTUAL_PMU_IRQ);
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index 36ac5ff4a2..d8dcc89a0d 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -155,6 +155,7 @@ struct VirtMachineState {
>  bool ras;
>  bool mte;
>  bool dtb_randomness;
> +bool pmu;
>  OnOffAuto acpi;
>  VirtGICType gic_version;
>  VirtIOMMUType iommu;

Re: [PATCH RFC V3 02/29] cpu-common: Add common CPU utility for possible vCPUs

2024-07-03 Thread Nicholas Piggin

On Fri Jun 14, 2024 at 9:36 AM AEST, Salil Mehta wrote:

[...]

> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 60b160d0b4..60b4778da9 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h

[...]

> +/**
> + * qemu_get_cpu_archid:
> + * @cpu_index: possible vCPU for which arch-id needs to be retreived
> + *
> + * Fetches the vCPU arch-id from the present possible vCPUs.
> + *
> + * Returns: arch-id of the possible vCPU
> + */
> +uint64_t qemu_get_cpu_archid(int cpu_index);

Not sure if blind... I can't see where this is used.

I'd be interested to see why it needs to be in non-arch code,
presumably it's only relevant to arch specific code. I'm
guessing ACPI needs it, but then could it be put into some
ACPI state or helper?

Thanks,
Nick

Re: [PATCH RFC V3 25/29] target/arm/kvm: Write CPU state back to KVM on reset

2024-07-03 Thread Nicholas Piggin

On Fri Jun 14, 2024 at 9:36 AM AEST, Salil Mehta wrote:
> From: Jean-Philippe Brucker 
>
> When a KVM vCPU is reset following a PSCI CPU_ON call, its power state
> is not synchronized with KVM at the moment. Because the vCPU is not
> marked dirty, we miss the call to kvm_arch_put_registers() that writes
> to KVM's MP_STATE. Force mp_state synchronization.

Hmm. Is this a bug fix for upstream? arm does respond to CPU_ON calls
by the look, but maybe it's not doing KVM parking until your series?
Maybe just a slight change to say "When KVM parking is implemented for
ARM..." if so.

>
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Salil Mehta 
> ---
>  target/arm/kvm.c | 7 +++
>  1 file changed, 7 insertions(+)
>
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 1121771c4a..7acd83ce64 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -980,6 +980,7 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu)
>  void kvm_arm_reset_vcpu(ARMCPU *cpu)
>  {
>  int ret;
> +CPUState *cs = CPU(cpu);
>  
>  /* Re-init VCPU so that all registers are set to
>   * their respective reset values.
> @@ -1001,6 +1002,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
>   * for the same reason we do so in kvm_arch_get_registers().
>   */
>  write_list_to_cpustate(cpu);
> +
> +/*
> + * Ensure we call kvm_arch_put_registers(). The vCPU isn't marked dirty 
> if
> + * it was parked in KVM and is now booting from a PSCI CPU_ON call.
> + */
> +cs->vcpu_dirty = true;
>  }
>  
>  void kvm_arm_create_host_vcpu(ARMCPU *cpu)

Also above my pay grade, but arm_set_cpu_on_async_work() which seems
to be what calls the CPU reset you refer to does a bunch of CPU register
and state setting including the power state setting that you mention.
Would the vcpu_dirty be better to go there?

Thanks,
Nick

Re: [PATCH 01/43] target/ppc: Reorganise and rename ppc_hash32_pp_prot()

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Reorganise ppc_hash32_pp_prot() swapping the if legs so it does not
> test for negative first and clean up to make it shorter. Also rename
> it to ppc_hash32_prot().

Rename makes sense since it's not entirely derived from pp but
also key and nx.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c |  2 +-
>  target/ppc/mmu-hash32.h | 35 +--
>  target/ppc/mmu_common.c |  2 +-
>  3 files changed, 15 insertions(+), 24 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index d5f2057eb1..8a446c8a7d 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -45,7 +45,7 @@ static int ppc_hash32_pte_prot(int mmu_idx,
>  key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
>  pp = pte.pte1 & HPTE32_R_PP;
>  
> -return ppc_hash32_pp_prot(key, pp, !!(sr & SR32_NX));
> +return ppc_hash32_prot(key, pp, !!(sr & SR32_NX));
>  }
>  
>  static target_ulong hash32_bat_size(int mmu_idx,
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index f0ce6951b4..bc4eedbecc 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -102,49 +102,40 @@ static inline void ppc_hash32_store_hpte1(PowerPCCPU 
> *cpu,
>  stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
>  }
>  
> -static inline int ppc_hash32_pp_prot(bool key, int pp, bool nx)
> +static inline int ppc_hash32_prot(bool key, int pp, bool nx)
>  {
>  int prot;
>  
> -if (key == 0) {
> +if (key) {
>  switch (pp) {
>  case 0x0:
> -case 0x1:
> -case 0x2:
> -prot = PAGE_READ | PAGE_WRITE;
> +prot = 0;
>  break;
> -
> +case 0x1:
>  case 0x3:
>  prot = PAGE_READ;
>  break;
> -
> +case 0x2:
> +prot = PAGE_READ | PAGE_WRITE;
> +break;
>  default:
> -abort();
> +g_assert_not_reached();
>  }
>  } else {
>  switch (pp) {
>  case 0x0:
> -prot = 0;
> -break;
> -
>  case 0x1:
> -case 0x3:
> -prot = PAGE_READ;
> -break;
> -
>  case 0x2:
>  prot = PAGE_READ | PAGE_WRITE;
>  break;
> -
> +case 0x3:
> +prot = PAGE_READ;
> +break;
>  default:
> -abort();
> +g_assert_not_reached();
>  }
>  }
> -if (nx == 0) {
> -prot |= PAGE_EXEC;
> -}
> -
> -return prot;
> +return nx ? prot : prot | PAGE_EXEC;
>  }
>  
>  typedef struct {
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index e2542694f0..08c5b61f76 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -120,7 +120,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  }
>  /* Keep the matching PTE information */
>  ctx->raddr = pte1;
> -ctx->prot = ppc_hash32_pp_prot(ctx->key, pp, ctx->nx);
> +ctx->prot = ppc_hash32_prot(ctx->key, pp, ctx->nx);
>  if (check_prot_access_type(ctx->prot, access_type)) {
>  /* Access granted */
>  qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");

Re: [PATCH 02/43] target/ppc/mmu_common.c: Remove local name for a constant

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> The mmask local variable is a less descriptive local name for a
> constant. Drop it and use the constant directly in the two places it
> is needed.
>
> Signed-off-by: BALATON Zoltan 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu_common.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 08c5b61f76..2618cdec6a 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -98,7 +98,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  target_ulong pte1, int h,
>  MMUAccessType access_type)
>  {
> -target_ulong ptem, mmask;
> +target_ulong ptem;
>  int ret, pteh, ptev, pp;
>  
>  ret = -1;
> @@ -108,12 +108,11 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  if (ptev && h == pteh) {
>  /* Check vsid & api */
>  ptem = pte0 & PTE_PTEM_MASK;
> -mmask = PTE_CHECK_MASK;
>  pp = pte1 & 0x0003;
>  if (ptem == ctx->ptem) {
>  if (ctx->raddr != (hwaddr)-1ULL) {
>  /* all matches should have equal RPN, WIMG & PP */
> -if ((ctx->raddr & mmask) != (pte1 & mmask)) {
> +if ((ctx->raddr & PTE_CHECK_MASK) != (pte1 & 
> PTE_CHECK_MASK)) {
>  qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
>  return -3;
>  }

Re: [PATCH 03/43] target/ppc/mmu_common.c: Remove single use local variable

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> The ptem variable in ppc6xx_tlb_pte_check() is used only once,
> simplify by removing it as the value is already clear itself without
> adding a local name for it.
>
> Signed-off-by: BALATON Zoltan 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu_common.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 2618cdec6a..371ec24485 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -98,7 +98,6 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  target_ulong pte1, int h,
>  MMUAccessType access_type)
>  {
> -target_ulong ptem;
>  int ret, pteh, ptev, pp;
>  
>  ret = -1;
> @@ -107,9 +106,8 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  pteh = (pte0 >> 6) & 1;
>  if (ptev && h == pteh) {
>  /* Check vsid & api */
> -ptem = pte0 & PTE_PTEM_MASK;
>  pp = pte1 & 0x0003;
> -if (ptem == ctx->ptem) {
> +if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) {
>  if (ctx->raddr != (hwaddr)-1ULL) {
>  /* all matches should have equal RPN, WIMG & PP */
>  if ((ctx->raddr & PTE_CHECK_MASK) != (pte1 & 
> PTE_CHECK_MASK)) {

Re: [PATCH 04/43] target/ppc/mmu_common.c: Remove single use local variable

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> The ptev variable in ppc6xx_tlb_pte_check() is used only once and just
> obfuscates an otherwise clear value. Get rid of it.
>
> Signed-off-by: BALATON Zoltan 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu_common.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 371ec24485..16578f7fa5 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -98,13 +98,12 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  target_ulong pte1, int h,
>  MMUAccessType access_type)
>  {
> -int ret, pteh, ptev, pp;
> +int ret, pteh, pp;
>  
>  ret = -1;
>  /* Check validity and table match */
> -ptev = pte_is_valid(pte0);
>  pteh = (pte0 >> 6) & 1;
> -if (ptev && h == pteh) {
> +if (pte_is_valid(pte0) && h == pteh) {
>  /* Check vsid & api */
>  pp = pte1 & 0x0003;
>  if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) {

Re: [PATCH 05/43] target/ppc/mmu_common.c: Remove another single use local variable

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> In ppc6xx_tlb_pte_check() the pteh variable is used only once to
> compare to the h parameter of the function. Inline its value and use
> pteh name for the function parameter which is more descriptive.
>
> Signed-off-by: BALATON Zoltan 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu_common.c | 7 +++
>  1 file changed, 3 insertions(+), 4 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 16578f7fa5..b21f52290f 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -95,15 +95,14 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong 
> eaddr,
>  }
>  
>  static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0,
> -target_ulong pte1, int h,
> +target_ulong pte1, int pteh,
>  MMUAccessType access_type)
>  {
> -int ret, pteh, pp;
> +int ret, pp;
>  
>  ret = -1;
>  /* Check validity and table match */
> -pteh = (pte0 >> 6) & 1;
> -if (pte_is_valid(pte0) && h == pteh) {
> +if (pte_is_valid(pte0) && ((pte0 >> 6) & 1) == pteh) {
>  /* Check vsid & api */
>  pp = pte1 & 0x0003;
>  if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) {

Re: [PATCH 06/43] target/ppc/mmu_common.c: Remove yet another single use local variable

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> In ppc6xx_tlb_pte_check() the pp variable is used only once to pass it
> to a function parameter with the same name. Remove the local and
> inline the value. Also use named constant for the hex value to make it
> clearer.
>

Reviewed-by: Nicholas Piggin 

> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index b21f52290f..799d2ced9b 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -98,13 +98,12 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  target_ulong pte1, int pteh,
>  MMUAccessType access_type)
>  {
> -int ret, pp;
> +int ret;
>  
>  ret = -1;
>  /* Check validity and table match */
>  if (pte_is_valid(pte0) && ((pte0 >> 6) & 1) == pteh) {
>  /* Check vsid & api */
> -pp = pte1 & 0x0003;
>  if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) {
>  if (ctx->raddr != (hwaddr)-1ULL) {
>  /* all matches should have equal RPN, WIMG & PP */
> @@ -115,7 +114,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  }
>  /* Keep the matching PTE information */
>  ctx->raddr = pte1;
> -ctx->prot = ppc_hash32_prot(ctx->key, pp, ctx->nx);
> +ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, 
> ctx->nx);
>  if (check_prot_access_type(ctx->prot, access_type)) {
>  /* Access granted */
>  qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");

Re: [PATCH 07/43] target/ppc/mmu_common.c: Return directly in ppc6xx_tlb_pte_check()

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Instead of using a local ret variable return directly and remove the
> local.
>
> Signed-off-by: BALATON Zoltan 

Some people probably prefer the existing style but I don't
mind early returns.

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu_common.c | 10 +++---
>  1 file changed, 3 insertions(+), 7 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 799d2ced9b..a5ae11394d 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -98,9 +98,6 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  target_ulong pte1, int pteh,
>  MMUAccessType access_type)
>  {
> -int ret;
> -
> -ret = -1;
>  /* Check validity and table match */
>  if (pte_is_valid(pte0) && ((pte0 >> 6) & 1) == pteh) {
>  /* Check vsid & api */
> @@ -118,16 +115,15 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  if (check_prot_access_type(ctx->prot, access_type)) {
>  /* Access granted */
>  qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
> -ret = 0;
> +return 0;
>  } else {
>  /* Access right violation */
>  qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
> -ret = -2;
> +return -2;
>  }
>  }
>  }
> -
> -return ret;
> +return -1;
>  }
>  
>  static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p,

Re: [PATCH 08/43] target/ppc/mmu_common.c: Simplify ppc6xx_tlb_pte_check()

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Invert conditions to avoid deep nested ifs and return early instead.
> Remove some obvious comments that don't add more clarity.
>

Reviewed-by: Nicholas Piggin 

> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 43 ++---
>  1 file changed, 19 insertions(+), 24 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index a5ae11394d..28adb3ca10 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -99,31 +99,26 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  MMUAccessType access_type)
>  {
>  /* Check validity and table match */
> -if (pte_is_valid(pte0) && ((pte0 >> 6) & 1) == pteh) {
> -/* Check vsid & api */
> -if ((pte0 & PTE_PTEM_MASK) == ctx->ptem) {
> -if (ctx->raddr != (hwaddr)-1ULL) {
> -/* all matches should have equal RPN, WIMG & PP */
> -if ((ctx->raddr & PTE_CHECK_MASK) != (pte1 & 
> PTE_CHECK_MASK)) {
> -qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
> -return -3;
> -}
> -}
> -/* Keep the matching PTE information */
> -ctx->raddr = pte1;
> -ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, 
> ctx->nx);
> -if (check_prot_access_type(ctx->prot, access_type)) {
> -/* Access granted */
> -qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
> -return 0;
> -} else {
> -/* Access right violation */
> -qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
> -return -2;
> -}
> -}
> +if (!pte_is_valid(pte0) || ((pte0 >> 6) & 1) != pteh ||
> +(pte0 & PTE_PTEM_MASK) != ctx->ptem) {
> +return -1;
> +}
> +/* all matches should have equal RPN, WIMG & PP */
> +if (ctx->raddr != (hwaddr)-1ULL &&
> +(ctx->raddr & PTE_CHECK_MASK) != (pte1 & PTE_CHECK_MASK)) {
> +qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
> +return -3;
> +}
> +/* Keep the matching PTE information */
> +ctx->raddr = pte1;
> +ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, ctx->nx);
> +if (check_prot_access_type(ctx->prot, access_type)) {
> +qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
> +return 0;
> +} else {
> +qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
> +return -2;
>  }
> -return -1;
>  }
>  
>  static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p,

Re: [PATCH 09/43] target/ppc/mmu_common.c: Remove unused field from mmu_ctx_t

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> The eaddr field of mmu_ctx_t is set once but never used so can be
> removed.
>
> Signed-off-by: BALATON Zoltan 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu_common.c | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 28adb3ca10..0a07023f48 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -40,7 +40,6 @@
>  /* Context used internally during MMU translations */
>  typedef struct {
>  hwaddr raddr;  /* Real address */
> -hwaddr eaddr;  /* Effective address*/
>  int prot;  /* Protection bits  */
>  hwaddr hash[2];/* Pagetable hash values*/
>  target_ulong ptem; /* Virtual segment ID | API */
> @@ -348,7 +347,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  
>  /* Perform segment based translation when no BATs matched */
>  pr = FIELD_EX64(env->msr, MSR, PR);
> -ctx->eaddr = eaddr;
>  
>  sr = env->sr[eaddr >> 28];
>  ctx->key = (((sr & 0x2000) && pr) ||

Re: [PATCH 10/43] target/ppc/mmu_common.c: Remove hash field from mmu_ctx_t

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Return hash value via a parameter and remove it from mmu_ctx.t.
>
> Signed-off-by: BALATON Zoltan 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu_common.c | 19 ---
>  1 file changed, 8 insertions(+), 11 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 0a07023f48..e3537c63c0 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -41,7 +41,6 @@
>  typedef struct {
>  hwaddr raddr;  /* Real address */
>  int prot;  /* Protection bits  */
> -hwaddr hash[2];/* Pagetable hash values*/
>  target_ulong ptem; /* Virtual segment ID | API */
>  int key;   /* Access key   */
>  int nx;/* Non-execute area */
> @@ -331,7 +330,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  }
>  
>  static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
> -   target_ulong eaddr,
> +   target_ulong eaddr, hwaddr *hashp,
> MMUAccessType access_type, int type)
>  {
>  PowerPCCPU *cpu = env_archcpu(env);
> @@ -379,8 +378,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx " htab_mask "
>HWADDR_FMT_plx " hash " HWADDR_FMT_plx "\n",
>ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), 
> hash);
> -ctx->hash[0] = hash;
> -ctx->hash[1] = ~hash;
> +*hashp = hash;
>  
>  /* Initialize real address with an invalid value */
>  ctx->raddr = (hwaddr)-1ULL;
> @@ -761,8 +759,8 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
>  mmu_ctx_t ctx;
> -int type;
> -int ret;
> +hwaddr hash = 0; /* init to 0 to avoid used uninit warning */
> +int type, ret;
>  
>  if (ppc_real_mode_xlate(cpu, eaddr, access_type, raddrp, psizep, protp)) 
> {
>  return true;
> @@ -779,9 +777,8 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  }
>  
>  ctx.prot = 0;
> -ctx.hash[0] = 0;
> -ctx.hash[1] = 0;
> -ret = mmu6xx_get_physical_address(env, &ctx, eaddr, access_type, type);
> +ret = mmu6xx_get_physical_address(env, &ctx, eaddr, &hash,
> +  access_type, type);
>  if (ret == 0) {
>  *raddrp = ctx.raddr;
>  *protp = ctx.prot;
> @@ -834,9 +831,9 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  tlb_miss:
>  env->error_code |= ctx.key << 19;
>  env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +
> -  get_pteg_offset32(cpu, ctx.hash[0]);
> +  get_pteg_offset32(cpu, hash);
>  env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) +
> -  get_pteg_offset32(cpu, ctx.hash[1]);
> +  get_pteg_offset32(cpu, ~hash);
>  break;
>  case -2:
>  /* Access rights violation */

Re: [PATCH 11/43] target/ppc/mmu_common.c: Remove pte_update_flags()

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> This function is used only once, its return value is ignored and one
> of its parameter is a return value from a previous call. It is better
> to inline it in the caller and remove it.
>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 41 +
>  1 file changed, 13 insertions(+), 28 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index e3537c63c0..c4902b7632 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -119,39 +119,14 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  }
>  }
>  
> -static int pte_update_flags(mmu_ctx_t *ctx, target_ulong *pte1p,
> -int ret, MMUAccessType access_type)
> -{
> -int store = 0;
> -
> -/* Update page flags */
> -if (!(*pte1p & 0x0100)) {
> -/* Update accessed flag */
> -*pte1p |= 0x0100;
> -store = 1;
> -}
> -if (!(*pte1p & 0x0080)) {
> -if (access_type == MMU_DATA_STORE && ret == 0) {
> -/* Update changed flag */
> -*pte1p |= 0x0080;
> -store = 1;
> -} else {
> -/* Force page fault for first write access */
> -ctx->prot &= ~PAGE_WRITE;
> -}
> -}
> -
> -return store;
> -}
> -
>  /* Software driven TLB helpers */
>  
>  static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx,
>  target_ulong eaddr, MMUAccessType access_type)
>  {
>  ppc6xx_tlb_t *tlb;
> -int nr, best, way;
> -int ret;
> +target_ulong *pte1p;
> +int nr, best, way, ret;
>  
>  best = -1;
>  ret = -1; /* No TLB found */
> @@ -204,7 +179,17 @@ done:
>" prot=%01x ret=%d\n",
>ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret);
>  /* Update page flags */
> -pte_update_flags(ctx, &env->tlb.tlb6[best].pte1, ret, access_type);
> +pte1p = &env->tlb.tlb6[best].pte1;
> +*pte1p |= 0x0100; /* Update accessed flag */
> +if (!(*pte1p & 0x0080)) {
> +if (access_type == MMU_DATA_STORE && ret == 0) {
> +/* Update changed flag */
> +*pte1p |= 0x0080;
> +} else {
> +/* Force page fault for first write access */
> +ctx->prot &= ~PAGE_WRITE;

Out of curiosity, I guess this unusual part is because ctx->prot can get
PAGE_WRITE set in the bat lookup, then it has to be cleared if the PTE
does not have changed bit?

> +}
> +}
>  }

Reviewed-by: Nicholas Piggin 

>  #if defined(DUMP_PAGE_TABLES)
>  if (qemu_loglevel_mask(CPU_LOG_MMU)) {

Re: [PATCH 12/43] target/ppc/mmu_common.c: Remove nx field from mmu_ctx_t

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Pass it as a parameter instead. Also use named constants instead of
> hex values when extracting bits from SR.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index c4902b7632..9f402a979d 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -43,7 +43,6 @@ typedef struct {
>  int prot;  /* Protection bits  */
>  target_ulong ptem; /* Virtual segment ID | API */
>  int key;   /* Access key   */
> -int nx;/* Non-execute area */
>  } mmu_ctx_t;
>  
>  void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
> @@ -94,7 +93,7 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
>  
>  static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0,
>  target_ulong pte1, int pteh,
> -MMUAccessType access_type)
> +MMUAccessType access_type, bool nx)
>  {
>  /* Check validity and table match */
>  if (!pte_is_valid(pte0) || ((pte0 >> 6) & 1) != pteh ||
> @@ -109,7 +108,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  }
>  /* Keep the matching PTE information */
>  ctx->raddr = pte1;
> -ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, ctx->nx);
> +ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, nx);
>  if (check_prot_access_type(ctx->prot, access_type)) {
>  qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
>  return 0;
> @@ -121,8 +120,9 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, 
> target_ulong pte0,
>  
>  /* Software driven TLB helpers */
>  
> -static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx,
> -target_ulong eaddr, MMUAccessType access_type)
> +static int ppc6xx_tlb_check(CPUPPCState *env,
> +mmu_ctx_t *ctx, target_ulong eaddr,
> +MMUAccessType access_type, bool nx)
>  {
>  ppc6xx_tlb_t *tlb;
>  target_ulong *pte1p;
> @@ -150,7 +150,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>access_type == MMU_DATA_STORE ? 'S' : 'L',
>access_type == MMU_INST_FETCH ? 'I' : 'D');
>  switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1,
> - 0, access_type)) {
> + 0, access_type, nx)) {
>  case -2:
>  /* Access violation */
>  ret = -2;
> @@ -322,7 +322,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  hwaddr hash;
>  target_ulong vsid, sr, pgidx;
>  int ds, target_page_bits;
> -bool pr;
> +bool pr, nx;
>  
>  /* First try to find a BAT entry if there are any */
>  if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) {
> @@ -336,8 +336,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  ctx->key = (((sr & 0x2000) && pr) ||
>  ((sr & 0x4000) && !pr)) ? 1 : 0;
>  ds = sr & 0x8000 ? 1 : 0;
> -ctx->nx = sr & 0x1000 ? 1 : 0;
> -vsid = sr & 0x00FF;
> +nx = sr & SR32_NX;
> +vsid = sr & SR32_VSID;
>  target_page_bits = TARGET_PAGE_BITS;
>  qemu_log_mask(CPU_LOG_MMU,
>"Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx
> @@ -352,10 +352,10 @@ static int mmu6xx_get_physical_address(CPUPPCState 
> *env, mmu_ctx_t *ctx,
>  ctx->ptem = (vsid << 7) | (pgidx >> 10);
>  
>  qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid "
> -  TARGET_FMT_lx "\n", ctx->key, ds, ctx->nx, vsid);
> +  TARGET_FMT_lx "\n", ctx->key, ds, nx, vsid);

This could be !!nx for consistency.

Reviewed-by: Nicholas Piggin 

>  if (!ds) {
>  /* Check if instruction fetch is allowed, if needed */
> -if (type == ACCESS_CODE && ctx->nx) {
> +if (type == ACCESS_CODE && nx) {
>  qemu_log_mask(CPU_LOG_MMU, "No access allowed\n");
>  return -3;
>  }
> @@ -368,7 +368,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  /* Initialize real address with an invalid value */
>  ctx->raddr = (hwaddr)-1ULL;
>  /* Software TLB search */
> -return ppc6xx_tlb_check(env, ctx, eaddr, access_type);
> +return ppc6xx_tlb_check(env, ctx, eaddr, access_type, nx);
>  }
>  
>  /* Direct-store segment : absolutely *BUGGY* for now */

Re: [PATCH 13/43] target/ppc/mmu_common.c: Convert local variable to bool

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> In mmu6xx_get_physical_address() ds is used as bool, declare it as
> such. Also use named constant instead of hex value.

Oh nx was bool, ignore my previous comment then.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 9f402a979d..5145bde7f9 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -321,8 +321,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  PowerPCCPU *cpu = env_archcpu(env);
>  hwaddr hash;
>  target_ulong vsid, sr, pgidx;
> -int ds, target_page_bits;
> -bool pr, nx;
> +int target_page_bits;
> +bool pr, ds, nx;
>  
>  /* First try to find a BAT entry if there are any */
>  if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) {
> @@ -335,7 +335,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  sr = env->sr[eaddr >> 28];
>  ctx->key = (((sr & 0x2000) && pr) ||
>  ((sr & 0x4000) && !pr)) ? 1 : 0;
> -ds = sr & 0x8000 ? 1 : 0;
> +ds = sr & SR32_T;
>  nx = sr & SR32_NX;
>  vsid = sr & SR32_VSID;
>  target_page_bits = TARGET_PAGE_BITS;

Re: [PATCH 14/43] target/ppc/mmu_common.c: Remove single use local variable

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> In mmu6xx_get_physical_address() tagtet_page_bits local is declared
> only to use TARGET_PAGE_BITS once. Drop the unneeded variable.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 5145bde7f9..0152e8d875 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -321,7 +321,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  PowerPCCPU *cpu = env_archcpu(env);
>  hwaddr hash;
>  target_ulong vsid, sr, pgidx;
> -int target_page_bits;
>  bool pr, ds, nx;
>  
>  /* First try to find a BAT entry if there are any */
> @@ -338,7 +337,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  ds = sr & SR32_T;
>  nx = sr & SR32_NX;
>  vsid = sr & SR32_VSID;
> -target_page_bits = TARGET_PAGE_BITS;
>  qemu_log_mask(CPU_LOG_MMU,
>"Check segment v=" TARGET_FMT_lx " %d " TARGET_FMT_lx
>" nip=" TARGET_FMT_lx " lr=" TARGET_FMT_lx
> @@ -347,7 +345,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>(int)FIELD_EX64(env->msr, MSR, IR),
>(int)FIELD_EX64(env->msr, MSR, DR), pr ? 1 : 0,
>access_type == MMU_DATA_STORE, type);
> -pgidx = (eaddr & ~SEGMENT_MASK_256M) >> target_page_bits;
> +pgidx = (eaddr & ~SEGMENT_MASK_256M) >> TARGET_PAGE_BITS;
>  hash = vsid ^ pgidx;
>  ctx->ptem = (vsid << 7) | (pgidx >> 10);
>

Re: [PATCH 15/43] target/ppc/mmu_common.c: Simplify a switch statement

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> In mmu6xx_get_physical_address() the switch handles all cases so the
> default is never reached and can be dropped. Also group together cases
> which just return -4.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 19 ---
>  1 file changed, 4 insertions(+), 15 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 0152e8d875..b2993e8563 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -375,15 +375,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  case ACCESS_INT:
>  /* Integer load/store : only access allowed */
>  break;
> -case ACCESS_CODE:
> -/* No code fetch is allowed in direct-store areas */
> -return -4;
> -case ACCESS_FLOAT:
> -/* Floating point load/store */
> -return -4;
> -case ACCESS_RES:
> -/* lwarx, ldarx or srwcx. */
> -return -4;
>  case ACCESS_CACHE:
>  /*
>   * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi
> @@ -393,12 +384,10 @@ static int mmu6xx_get_physical_address(CPUPPCState 
> *env, mmu_ctx_t *ctx,
>   */
>  ctx->raddr = eaddr;
>  return 0;
> -case ACCESS_EXT:
> -/* eciwx or ecowx */
> -return -4;
> -default:
> -qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need 
> address"
> -   " translation\n");
> +case ACCESS_CODE: /* No code fetch is allowed in direct-store areas */
> +case ACCESS_FLOAT: /* Floating point load/store */
> +case ACCESS_RES: /* lwarx, ldarx or srwcx. */
> +case ACCESS_EXT: /* eciwx or ecowx */
>  return -4;
>  }
>  if ((access_type == MMU_DATA_STORE || ctx->key != 1) &&

Re: [PATCH 16/43] target/ppc/mmu_common.c: Inline and remove ppc6xx_tlb_pte_check()

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> This function is only called once and we can make the caller simpler
> by inlining it.

I'm inclined to agree. Splitting into function can be nice,
but translating return values here is pretty horrible.

I think it looks right.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 71 +
>  1 file changed, 22 insertions(+), 49 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index b2993e8563..784e833ff2 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -91,33 +91,6 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
>  return nr;
>  }
>  
> -static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0,
> -target_ulong pte1, int pteh,
> -MMUAccessType access_type, bool nx)
> -{
> -/* Check validity and table match */
> -if (!pte_is_valid(pte0) || ((pte0 >> 6) & 1) != pteh ||
> -(pte0 & PTE_PTEM_MASK) != ctx->ptem) {
> -return -1;
> -}
> -/* all matches should have equal RPN, WIMG & PP */
> -if (ctx->raddr != (hwaddr)-1ULL &&
> -(ctx->raddr & PTE_CHECK_MASK) != (pte1 & PTE_CHECK_MASK)) {
> -qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
> -return -3;
> -}
> -/* Keep the matching PTE information */
> -ctx->raddr = pte1;
> -ctx->prot = ppc_hash32_prot(ctx->key, pte1 & HPTE32_R_PP, nx);
> -if (check_prot_access_type(ctx->prot, access_type)) {
> -qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
> -return 0;
> -} else {
> -qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
> -return -2;
> -}
> -}
> -
>  /* Software driven TLB helpers */
>  
>  static int ppc6xx_tlb_check(CPUPPCState *env,
> @@ -149,32 +122,32 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>tlb->EPN, eaddr, tlb->pte1,
>access_type == MMU_DATA_STORE ? 'S' : 'L',
>access_type == MMU_INST_FETCH ? 'I' : 'D');
> -switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1,
> - 0, access_type, nx)) {
> -case -2:
> -/* Access violation */
> -ret = -2;
> -best = nr;
> -break;
> -case -1: /* No match */
> -case -3: /* TLB inconsistency */
> -default:
> -break;
> -case 0:
> -/* access granted */
> -/*
> - * XXX: we should go on looping to check all TLBs
> - *  consistency but we can speed-up the whole thing as
> - *  the result would be undefined if TLBs are not
> - *  consistent.
> - */
> +/* Check validity and table match */
> +if (!pte_is_valid(tlb->pte0) || ((tlb->pte0 >> 6) & 1) != 0 ||
> +(tlb->pte0 & PTE_PTEM_MASK) != ctx->ptem) {
> +continue;
> +}
> +/* all matches should have equal RPN, WIMG & PP */
> +if (ctx->raddr != (hwaddr)-1ULL &&
> +(ctx->raddr & PTE_CHECK_MASK) != (tlb->pte1 & PTE_CHECK_MASK)) {
> +qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
> +/* TLB inconsistency */
> +continue;
> +}
> +/* Keep the matching PTE information */
> +best = nr;
> +ctx->raddr = tlb->pte1;
> +ctx->prot = ppc_hash32_prot(ctx->key, tlb->pte1 & HPTE32_R_PP, nx);
> +if (check_prot_access_type(ctx->prot, access_type)) {
> +qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
>  ret = 0;
> -best = nr;
> -goto done;
> +break;
> +} else {
> +qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n");
> +ret = -2;
>  }
>  }
>  if (best != -1) {
> -done:
>  qemu_log_mask(CPU_LOG_MMU, "found TLB at addr " HWADDR_FMT_plx
>" prot=%01x ret=%d\n",
>ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret);

Re: [PATCH 17/43] target/ppc/mmu_common.c: Remove ptem field from mmu_ctx_t

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Instead of passing around ptem in context use it once in the same
> function so it can be removed from mmu_ctx_t.

The downside here is now updating the SPRs in different places.

You already passed hash back with a pointer elsewhere, what about
passing ptmem back? I woud prefer that but you're maintaining
this code so if you prefer this... Either way,

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 23 ++-
>  1 file changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 784e833ff2..339df377e8 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -41,7 +41,6 @@
>  typedef struct {
>  hwaddr raddr;  /* Real address */
>  int prot;  /* Protection bits  */
> -target_ulong ptem; /* Virtual segment ID | API */
>  int key;   /* Access key   */
>  } mmu_ctx_t;
>  
> @@ -95,16 +94,18 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong 
> eaddr,
>  
>  static int ppc6xx_tlb_check(CPUPPCState *env,
>  mmu_ctx_t *ctx, target_ulong eaddr,
> -MMUAccessType access_type, bool nx)
> +MMUAccessType access_type, target_ulong ptem,
> +bool nx)
>  {
>  ppc6xx_tlb_t *tlb;
>  target_ulong *pte1p;
>  int nr, best, way, ret;
> +bool is_code = (access_type == MMU_INST_FETCH);
>  
>  best = -1;
>  ret = -1; /* No TLB found */
>  for (way = 0; way < env->nb_ways; way++) {
> -nr = ppc6xx_tlb_getnum(env, eaddr, way, access_type == 
> MMU_INST_FETCH);
> +nr = ppc6xx_tlb_getnum(env, eaddr, way, is_code);
>  tlb = &env->tlb.tlb6[nr];
>  /* This test "emulates" the PTE index match for hardware TLBs */
>  if ((eaddr & TARGET_PAGE_MASK) != tlb->EPN) {
> @@ -124,7 +125,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>access_type == MMU_INST_FETCH ? 'I' : 'D');
>  /* Check validity and table match */
>  if (!pte_is_valid(tlb->pte0) || ((tlb->pte0 >> 6) & 1) != 0 ||
> -(tlb->pte0 & PTE_PTEM_MASK) != ctx->ptem) {
> +(tlb->pte0 & PTE_PTEM_MASK) != ptem) {
>  continue;
>  }
>  /* all matches should have equal RPN, WIMG & PP */
> @@ -164,6 +165,10 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>  }
>  }
>  }
> +if (ret == -1) {
> +int r = is_code ? SPR_ICMP : SPR_DCMP;
> +env->spr[r] = ptem;
> +}
>  #if defined(DUMP_PAGE_TABLES)
>  if (qemu_loglevel_mask(CPU_LOG_MMU)) {
>  CPUState *cs = env_cpu(env);
> @@ -293,7 +298,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  {
>  PowerPCCPU *cpu = env_archcpu(env);
>  hwaddr hash;
> -target_ulong vsid, sr, pgidx;
> +target_ulong vsid, sr, pgidx, ptem;
>  bool pr, ds, nx;
>  
>  /* First try to find a BAT entry if there are any */
> @@ -320,7 +325,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>access_type == MMU_DATA_STORE, type);
>  pgidx = (eaddr & ~SEGMENT_MASK_256M) >> TARGET_PAGE_BITS;
>  hash = vsid ^ pgidx;
> -ctx->ptem = (vsid << 7) | (pgidx >> 10);
> +ptem = (vsid << 7) | (pgidx >> 10); /* Virtual segment ID | API */
>  
>  qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid "
>TARGET_FMT_lx "\n", ctx->key, ds, nx, vsid);
> @@ -339,7 +344,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  /* Initialize real address with an invalid value */
>  ctx->raddr = (hwaddr)-1ULL;
>  /* Software TLB search */
> -return ppc6xx_tlb_check(env, ctx, eaddr, access_type, nx);
> +return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, nx);
>  }
>  
>  /* Direct-store segment : absolutely *BUGGY* for now */
> @@ -741,7 +746,7 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  cs->exception_index = POWERPC_EXCP_IFTLB;
>  env->error_code = 1 << 18;
>  env->spr[SPR_IMISS] = eaddr;
> -env->spr[SPR_ICMP] = 0x8000 | ctx.ptem;
> +env->spr[SPR_ICMP] |= 0x8000;
>  goto tlb_miss;
>  case -2:
>  /* Access rights violation */
> @@ -772,7 +777,7 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  env->error_code = 0;
>  }
>  env->spr[SPR_DMISS] = eaddr;
> -env->spr[SPR_DCMP] = 0x8000 | ctx.ptem;
> +env->spr[SPR_DCMP] |= 0x8000;
>  tlb_miss:
>  env->error_code |= ctx.key << 19;
>  env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) +

Re: [PATCH 18/43] target/ppc: Add function to get protection key for hash32 MMU

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Add a function to get key bit from SR and use it instead of open coded
> version.
>

Nice.

Reviewed-by: Nicholas Piggin 

> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 9 ++---
>  target/ppc/mmu-hash32.h | 5 +
>  target/ppc/mmu_common.c | 3 +--
>  3 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 8a446c8a7d..93559447ff 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -42,7 +42,7 @@ static int ppc_hash32_pte_prot(int mmu_idx,
>  {
>  unsigned pp, key;
>  
> -key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
> +key = ppc_hash32_key(mmuidx_pr(mmu_idx), sr);
>  pp = pte.pte1 & HPTE32_R_PP;
>  
>  return ppc_hash32_prot(key, pp, !!(sr & SR32_NX));
> @@ -145,7 +145,6 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, 
> target_ulong sr,
>  {
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
> -int key = !!(mmuidx_pr(mmu_idx) ? (sr & SR32_KP) : (sr & SR32_KS));
>  
>  qemu_log_mask(CPU_LOG_MMU, "direct store...\n");
>  
> @@ -206,7 +205,11 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, 
> target_ulong sr,
>  cpu_abort(cs, "ERROR: insn should not need address translation\n");
>  }
>  
> -*prot = key ? PAGE_READ | PAGE_WRITE : PAGE_READ;
> +if (ppc_hash32_key(mmuidx_pr(mmu_idx), sr)) {
> +*prot = PAGE_READ | PAGE_WRITE;
> +} else {
> +*prot = PAGE_READ;
> +}
>  if (check_prot_access_type(*prot, access_type)) {
>  *raddr = eaddr;
>  return true;
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index bc4eedbecc..5902cf8333 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -102,6 +102,11 @@ static inline void ppc_hash32_store_hpte1(PowerPCCPU 
> *cpu,
>  stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
>  }
>  
> +static inline bool ppc_hash32_key(bool pr, target_ulong sr)
> +{
> +return pr ? (sr & SR32_KP) : (sr & SR32_KS);
> +}
> +
>  static inline int ppc_hash32_prot(bool key, int pp, bool nx)
>  {
>  int prot;
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 339df377e8..1ed2f45ac7 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -310,8 +310,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  pr = FIELD_EX64(env->msr, MSR, PR);
>  
>  sr = env->sr[eaddr >> 28];
> -ctx->key = (((sr & 0x2000) && pr) ||
> -((sr & 0x4000) && !pr)) ? 1 : 0;
> +ctx->key = ppc_hash32_key(pr, sr);
>  ds = sr & SR32_T;
>  nx = sr & SR32_NX;
>  vsid = sr & SR32_VSID;

Re: [PATCH 19/43] target/ppc/mmu-hash32.c: Inline and remove ppc_hash32_pte_prot()

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> This is used only once and can be inlined.
>

Reviewed-by: Nicholas Piggin 

> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 19 ---
>  1 file changed, 4 insertions(+), 15 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 93559447ff..160311de87 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -37,17 +37,6 @@
>  #  define LOG_BATS(...) do { } while (0)
>  #endif
>  
> -static int ppc_hash32_pte_prot(int mmu_idx,
> -   target_ulong sr, ppc_hash_pte32_t pte)
> -{
> -unsigned pp, key;
> -
> -key = ppc_hash32_key(mmuidx_pr(mmu_idx), sr);
> -pp = pte.pte1 & HPTE32_R_PP;
> -
> -return ppc_hash32_prot(key, pp, !!(sr & SR32_NX));
> -}
> -
>  static target_ulong hash32_bat_size(int mmu_idx,
>  target_ulong batu, target_ulong batl)
>  {
> @@ -341,10 +330,10 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
>  target_ulong sr;
> -hwaddr pte_offset;
> +hwaddr pte_offset, raddr;
>  ppc_hash_pte32_t pte;
> +bool key;
>  int prot;
> -hwaddr raddr;
>  
>  /* There are no hash32 large pages. */
>  *psizep = TARGET_PAGE_BITS;
> @@ -426,8 +415,8 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  "found PTE at offset %08" HWADDR_PRIx "\n", pte_offset);
>  
>  /* 7. Check access permissions */
> -
> -prot = ppc_hash32_pte_prot(mmu_idx, sr, pte);
> +key = ppc_hash32_key(mmuidx_pr(mmu_idx), sr);
> +prot = ppc_hash32_prot(key, pte.pte1 & HPTE32_R_PP, sr & SR32_NX);
>  
>  if (!check_prot_access_type(prot, access_type)) {
>  /* Access right violation */

Re: [PATCH 20/43] target/ppc/mmu_common.c: Init variable in function that relies on it

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> The ppc6xx_tlb_check() relies on the caller to initialise raddr field
> in ctx. Move this init from the only caller into the function.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 1ed2f45ac7..fe321ab49c 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -102,6 +102,8 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>  int nr, best, way, ret;
>  bool is_code = (access_type == MMU_INST_FETCH);
>  
> +/* Initialize real address with an invalid value */
> +ctx->raddr = (hwaddr)-1ULL;
>  best = -1;
>  ret = -1; /* No TLB found */
>  for (way = 0; way < env->nb_ways; way++) {
> @@ -340,8 +342,6 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), 
> hash);
>  *hashp = hash;
>  
> -/* Initialize real address with an invalid value */
> -ctx->raddr = (hwaddr)-1ULL;
>  /* Software TLB search */
>  return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, nx);
>  }

Re: [PATCH 21/43] target/ppc/mmu_common.c: Remove key field from mmu_ctx_t

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Pass it as a function parameter and remove it from mmu_ctx_t.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 26 ++
>  1 file changed, 14 insertions(+), 12 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index fe321ab49c..be09c3b1a3 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -41,7 +41,6 @@
>  typedef struct {
>  hwaddr raddr;  /* Real address */
>  int prot;  /* Protection bits  */
> -int key;   /* Access key   */
>  } mmu_ctx_t;
>  
>  void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
> @@ -95,7 +94,7 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
>  static int ppc6xx_tlb_check(CPUPPCState *env,
>  mmu_ctx_t *ctx, target_ulong eaddr,
>  MMUAccessType access_type, target_ulong ptem,
> -bool nx)
> +bool key, bool nx)
>  {
>  ppc6xx_tlb_t *tlb;
>  target_ulong *pte1p;
> @@ -140,7 +139,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>  /* Keep the matching PTE information */
>  best = nr;
>  ctx->raddr = tlb->pte1;
> -ctx->prot = ppc_hash32_prot(ctx->key, tlb->pte1 & HPTE32_R_PP, nx);
> +ctx->prot = ppc_hash32_prot(key, tlb->pte1 & HPTE32_R_PP, nx);
>  if (check_prot_access_type(ctx->prot, access_type)) {
>  qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
>  ret = 0;
> @@ -295,13 +294,14 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  }
>  
>  static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
> -   target_ulong eaddr, hwaddr *hashp,
> +   target_ulong eaddr,
> +   hwaddr *hashp, bool *keyp,
> MMUAccessType access_type, int type)
>  {
>  PowerPCCPU *cpu = env_archcpu(env);
>  hwaddr hash;
>  target_ulong vsid, sr, pgidx, ptem;
> -bool pr, ds, nx;
> +bool key, pr, ds, nx;
>  
>  /* First try to find a BAT entry if there are any */
>  if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) {
> @@ -312,7 +312,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  pr = FIELD_EX64(env->msr, MSR, PR);
>  
>  sr = env->sr[eaddr >> 28];
> -ctx->key = ppc_hash32_key(pr, sr);
> +key = ppc_hash32_key(pr, sr);
> +*keyp = key;
>  ds = sr & SR32_T;
>  nx = sr & SR32_NX;
>  vsid = sr & SR32_VSID;
> @@ -329,7 +330,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  ptem = (vsid << 7) | (pgidx >> 10); /* Virtual segment ID | API */
>  
>  qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid "
> -  TARGET_FMT_lx "\n", ctx->key, ds, nx, vsid);
> +  TARGET_FMT_lx "\n", key, ds, nx, vsid);
>  if (!ds) {
>  /* Check if instruction fetch is allowed, if needed */
>  if (type == ACCESS_CODE && nx) {
> @@ -343,7 +344,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  *hashp = hash;
>  
>  /* Software TLB search */
> -return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, nx);
> +return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, key, nx);
>  }
>  
>  /* Direct-store segment : absolutely *BUGGY* for now */
> @@ -367,8 +368,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  case ACCESS_EXT: /* eciwx or ecowx */
>  return -4;
>  }
> -if ((access_type == MMU_DATA_STORE || ctx->key != 1) &&
> -(access_type == MMU_DATA_LOAD || ctx->key != 0)) {
> +if ((access_type == MMU_DATA_STORE || !key) &&
> +(access_type == MMU_DATA_LOAD || key)) {
>  ctx->raddr = eaddr;
>  return 2;
>  }
> @@ -709,6 +710,7 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  CPUPPCState *env = &cpu->env;
>  mmu_ctx_t ctx;
>  hwaddr hash = 0; /* init to 0 to avoid used uninit warning */
> +bool key;
>  int type, ret;
>  
>  if (ppc_real_mode_xlate(cpu, eaddr, access_type, raddrp, psizep, protp)) 
> {

Re: [PATCH 22/43] target/ppc/mmu_common.c: Stop using ctx in ppc6xx_tlb_check()

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Pass raddr and prot in function parameters instead.
>
> Signed-off-by: BALATON Zoltan 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu_common.c | 26 +-
>  1 file changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index be09c3b1a3..ede409eb99 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -91,10 +91,9 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr,
>  
>  /* Software driven TLB helpers */
>  
> -static int ppc6xx_tlb_check(CPUPPCState *env,
> -mmu_ctx_t *ctx, target_ulong eaddr,
> -MMUAccessType access_type, target_ulong ptem,
> -bool key, bool nx)
> +static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr *raddr, int *prot,
> +target_ulong eaddr, MMUAccessType access_type,
> +target_ulong ptem, bool key, bool nx)
>  {
>  ppc6xx_tlb_t *tlb;
>  target_ulong *pte1p;
> @@ -102,7 +101,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>  bool is_code = (access_type == MMU_INST_FETCH);
>  
>  /* Initialize real address with an invalid value */
> -ctx->raddr = (hwaddr)-1ULL;
> +*raddr = (hwaddr)-1ULL;
>  best = -1;
>  ret = -1; /* No TLB found */
>  for (way = 0; way < env->nb_ways; way++) {
> @@ -130,17 +129,17 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>  continue;
>  }
>  /* all matches should have equal RPN, WIMG & PP */
> -if (ctx->raddr != (hwaddr)-1ULL &&
> -(ctx->raddr & PTE_CHECK_MASK) != (tlb->pte1 & PTE_CHECK_MASK)) {
> +if (*raddr != (hwaddr)-1ULL &&
> +(*raddr & PTE_CHECK_MASK) != (tlb->pte1 & PTE_CHECK_MASK)) {
>  qemu_log_mask(CPU_LOG_MMU, "Bad RPN/WIMG/PP\n");
>  /* TLB inconsistency */
>  continue;
>  }
>  /* Keep the matching PTE information */
>  best = nr;
> -ctx->raddr = tlb->pte1;
> -ctx->prot = ppc_hash32_prot(key, tlb->pte1 & HPTE32_R_PP, nx);
> -if (check_prot_access_type(ctx->prot, access_type)) {
> +*raddr = tlb->pte1;
> +*prot = ppc_hash32_prot(key, tlb->pte1 & HPTE32_R_PP, nx);
> +if (check_prot_access_type(*prot, access_type)) {
>  qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n");
>  ret = 0;
>  break;
> @@ -152,7 +151,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>  if (best != -1) {
>  qemu_log_mask(CPU_LOG_MMU, "found TLB at addr " HWADDR_FMT_plx
>" prot=%01x ret=%d\n",
> -  ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret);
> +  *raddr & TARGET_PAGE_MASK, *prot, ret);
>  /* Update page flags */
>  pte1p = &env->tlb.tlb6[best].pte1;
>  *pte1p |= 0x0100; /* Update accessed flag */
> @@ -162,7 +161,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env,
>  *pte1p |= 0x0080;
>  } else {
>  /* Force page fault for first write access */
> -ctx->prot &= ~PAGE_WRITE;
> +*prot &= ~PAGE_WRITE;
>  }
>  }
>  }
> @@ -344,7 +343,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  *hashp = hash;
>  
>  /* Software TLB search */
> -return ppc6xx_tlb_check(env, ctx, eaddr, access_type, ptem, key, nx);
> +return ppc6xx_tlb_check(env, &ctx->raddr, &ctx->prot, eaddr,
> +access_type, ptem, key, nx);
>  }
>  
>  /* Direct-store segment : absolutely *BUGGY* for now */

Re: [PATCH 23/43] target/ppc/mmu_common.c: Rename function parameter

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:12 AM AEST, BALATON Zoltan wrote:
> Rename parameter of get_bat_6xx_tlb() from virtual to eaddr to match
> other functions.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 18 +-
>  1 file changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index ede409eb99..110936ca83 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -221,7 +221,7 @@ static inline void bat_size_prot(CPUPPCState *env, 
> target_ulong *blp,
>  }
>  
>  static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
> -   target_ulong virtual, MMUAccessType access_type)
> +   target_ulong eaddr, MMUAccessType access_type)
>  {
>  target_ulong *BATlt, *BATut, *BATu, *BATl;
>  target_ulong BEPIl, BEPIu, bl;
> @@ -230,7 +230,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  bool ifetch = access_type == MMU_INST_FETCH;
>  
>  qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
> -  ifetch ? 'I' : 'D', virtual);
> +  ifetch ? 'I' : 'D', eaddr);
>  if (ifetch) {
>  BATlt = env->IBAT[1];
>  BATut = env->IBAT[0];
> @@ -246,15 +246,15 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  bat_size_prot(env, &bl, &valid, &prot, BATu, BATl);
>  qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu "
>TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__,
> -  ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl);
> -if ((virtual & 0xF000) == BEPIu &&
> -((virtual & 0x0FFE) & ~bl) == BEPIl) {
> +  ifetch ? 'I' : 'D', i, eaddr, *BATu, *BATl);
> +if ((eaddr & 0xF000) == BEPIu &&
> +((eaddr & 0x0FFE) & ~bl) == BEPIl) {
>  /* BAT matches */
>  if (valid != 0) {
>  /* Get physical address */
>  ctx->raddr = (*BATl & 0xF000) |
> -((virtual & 0x0FFE & bl) | (*BATl & 0x0FFE)) |
> -(virtual & 0x0001F000);
> +((eaddr & 0x0FFE & bl) | (*BATl & 0x0FFE)) |
> +(eaddr & 0x0001F000);
>  /* Compute access rights */
>  ctx->prot = prot;
>  if (check_prot_access_type(ctx->prot, access_type)) {
> @@ -273,7 +273,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  if (ret < 0) {
>  if (qemu_log_enabled()) {
>  qemu_log_mask(CPU_LOG_MMU, "no BAT match for "
> -  TARGET_FMT_lx ":\n", virtual);
> +  TARGET_FMT_lx ":\n", eaddr);
>  for (i = 0; i < 4; i++) {
>  BATu = &BATut[i];
>  BATl = &BATlt[i];
> @@ -284,7 +284,7 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>" BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx
>"\n\t" TARGET_FMT_lx " " TARGET_FMT_lx " "
>TARGET_FMT_lx "\n", __func__, ifetch ? 'I' : 
> 'D',
> -  i, virtual, *BATu, *BATl, BEPIu, BEPIl, bl);
> +  i, eaddr, *BATu, *BATl, BEPIu, BEPIl, bl);
>  }
>  }
>  }

Re: [PATCH 24/43] target/ppc/mmu_common.c: Use defines instead of numeric constants

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Replace some BAT related constants with defines from mmu-hash32.h

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 26 +-
>  1 file changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 110936ca83..aa002bba35 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -201,7 +201,7 @@ static inline void bat_size_prot(CPUPPCState *env, 
> target_ulong *blp,
>  target_ulong bl;
>  int pp, valid, prot;
>  
> -bl = (*BATu & 0x1FFC) << 15;
> +bl = (*BATu & BATU32_BL) << 15;
>  valid = 0;
>  prot = 0;
>  if ((!FIELD_EX64(env->msr, MSR, PR) && (*BATu & 0x0002)) ||
> @@ -241,19 +241,19 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  for (i = 0; i < env->nb_BATs; i++) {
>  BATu = &BATut[i];
>  BATl = &BATlt[i];
> -BEPIu = *BATu & 0xF000;
> -BEPIl = *BATu & 0x0FFE;
> +BEPIu = *BATu & BATU32_BEPIU;
> +BEPIl = *BATu & BATU32_BEPIL;
>  bat_size_prot(env, &bl, &valid, &prot, BATu, BATl);
>  qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu "
>TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__,
>ifetch ? 'I' : 'D', i, eaddr, *BATu, *BATl);
> -if ((eaddr & 0xF000) == BEPIu &&
> -((eaddr & 0x0FFE) & ~bl) == BEPIl) {
> +if ((eaddr & BATU32_BEPIU) == BEPIu &&
> +((eaddr & BATU32_BEPIL) & ~bl) == BEPIl) {
>  /* BAT matches */
>  if (valid != 0) {
>  /* Get physical address */
> -ctx->raddr = (*BATl & 0xF000) |
> -((eaddr & 0x0FFE & bl) | (*BATl & 0x0FFE)) |
> +ctx->raddr = (*BATl & BATU32_BEPIU) |
> +((eaddr & BATU32_BEPIL & bl) | (*BATl & BATU32_BEPIL)) |
>  (eaddr & 0x0001F000);
>  /* Compute access rights */
>  ctx->prot = prot;
> @@ -277,9 +277,9 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  for (i = 0; i < 4; i++) {
>  BATu = &BATut[i];
>  BATl = &BATlt[i];
> -BEPIu = *BATu & 0xF000;
> -BEPIl = *BATu & 0x0FFE;
> -bl = (*BATu & 0x1FFC) << 15;
> +BEPIu = *BATu & BATU32_BEPIU;
> +BEPIl = *BATu & BATU32_BEPIL;
> +bl = (*BATu & BATU32_BL) << 15;
>  qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx
>" BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx
>"\n\t" TARGET_FMT_lx " " TARGET_FMT_lx " "
> @@ -520,9 +520,9 @@ static void mmu6xx_dump_BATs(CPUPPCState *env, int type)
>  for (i = 0; i < env->nb_BATs; i++) {
>  BATu = &BATut[i];
>  BATl = &BATlt[i];
> -BEPIu = *BATu & 0xF000;
> -BEPIl = *BATu & 0x0FFE;
> -bl = (*BATu & 0x1FFC) << 15;
> +BEPIu = *BATu & BATU32_BEPIU;
> +BEPIl = *BATu & BATU32_BEPIL;
> +bl = (*BATu & BATU32_BL) << 15;
>  qemu_printf("%s BAT%d BATu " TARGET_FMT_lx
>  " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " "
>  TARGET_FMT_lx " " TARGET_FMT_lx "\n",

Re: [PATCH 25/43] target/ppc: Remove bat_size_prot()

2024-07-03 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> There is already a hash32_bat_prot() function that does most if this
> and the rest can be inlined. Export hash32_bat_prot() and rename it to
> ppc_hash32_bat_prot() to match other functions and use it in
> get_bat_6xx_tlb().

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 18 +-
>  target/ppc/mmu-hash32.h | 14 +++
>  target/ppc/mmu_common.c | 52 ++---
>  3 files changed, 27 insertions(+), 57 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 160311de87..6f0f0bbb00 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -48,22 +48,6 @@ static target_ulong hash32_bat_size(int mmu_idx,
>  return BATU32_BEPI & ~((batu & BATU32_BL) << 15);
>  }
>  
> -static int hash32_bat_prot(PowerPCCPU *cpu,
> -   target_ulong batu, target_ulong batl)
> -{
> -int pp, prot;
> -
> -prot = 0;
> -pp = batl & BATL32_PP;
> -if (pp != 0) {
> -prot = PAGE_READ | PAGE_EXEC;
> -if (pp == 0x2) {
> -prot |= PAGE_WRITE;
> -}
> -}
> -return prot;
> -}
> -
>  static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
>  MMUAccessType access_type, int *prot,
>  int mmu_idx)
> @@ -95,7 +79,7 @@ static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, 
> target_ulong ea,
>  if (mask && ((ea & mask) == (batu & BATU32_BEPI))) {
>  hwaddr raddr = (batl & mask) | (ea & ~mask);
>  
> -*prot = hash32_bat_prot(cpu, batu, batl);
> +*prot = ppc_hash32_bat_prot(batu, batl);
>  
>  return raddr & TARGET_PAGE_MASK;
>  }
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index 5902cf8333..bd75f7d647 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -143,6 +143,20 @@ static inline int ppc_hash32_prot(bool key, int pp, bool 
> nx)
>  return nx ? prot : prot | PAGE_EXEC;
>  }
>  
> +static inline int ppc_hash32_bat_prot(target_ulong batu, target_ulong batl)
> +{
> +int prot = 0;
> +int pp = batl & BATL32_PP;
> +
> +if (pp) {
> +prot = PAGE_READ | PAGE_EXEC;
> +if (pp == 0x2) {
> +prot |= PAGE_WRITE;
> +}
> +}
> +return prot;
> +}
> +
>  typedef struct {
>  uint32_t pte0, pte1;
>  } ppc_hash_pte32_t;
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index aa002bba35..624ed51a92 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -193,40 +193,13 @@ static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr 
> *raddr, int *prot,
>  return ret;
>  }
>  
> -/* Perform BAT hit & translation */
> -static inline void bat_size_prot(CPUPPCState *env, target_ulong *blp,
> - int *validp, int *protp, target_ulong *BATu,
> - target_ulong *BATl)
> -{
> -target_ulong bl;
> -int pp, valid, prot;
> -
> -bl = (*BATu & BATU32_BL) << 15;
> -valid = 0;
> -prot = 0;
> -if ((!FIELD_EX64(env->msr, MSR, PR) && (*BATu & 0x0002)) ||
> -(FIELD_EX64(env->msr, MSR, PR) && (*BATu & 0x0001))) {
> -valid = 1;
> -pp = *BATl & 0x0003;
> -if (pp != 0) {
> -prot = PAGE_READ | PAGE_EXEC;
> -if (pp == 0x2) {
> -prot |= PAGE_WRITE;
> -}
> -}
> -}
> -*blp = bl;
> -*validp = valid;
> -*protp = prot;
> -}
> -
>  static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
> -   target_ulong eaddr, MMUAccessType access_type)
> +   target_ulong eaddr, MMUAccessType access_type,
> +   bool pr)
>  {
>  target_ulong *BATlt, *BATut, *BATu, *BATl;
>  target_ulong BEPIl, BEPIu, bl;
> -int i, valid, prot;
> -int ret = -1;
> +int i, ret = -1;
>  bool ifetch = access_type == MMU_INST_FETCH;
>  
>  qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__,
> @@ -243,20 +216,19 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  BATl = &BATlt[i];
>  BEPIu = *BATu & BATU32_BEPIU;
>  BEPIl = *BATu & BATU32_BEPIL;
> -bat_size_prot(env, &b

Re: [PATCH 27/43] target/ppc/mmu_common.c: Remove mmu_ctx_t

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Completely get rid of mmu_ctx_t after converting the remaining
> functions to pass raddr and prot without the context struct.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 25 +++--
>  1 file changed, 7 insertions(+), 18 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 4770b43630..60f8736210 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -37,12 +37,6 @@
>  
>  /* #define DUMP_PAGE_TABLES */
>  
> -/* Context used internally during MMU translations */
> -typedef struct {
> -hwaddr raddr;  /* Real address */
> -int prot;  /* Protection bits  */
> -} mmu_ctx_t;
> -
>  void ppc_store_sdr1(CPUPPCState *env, target_ulong value)
>  {
>  PowerPCCPU *cpu = env_archcpu(env);
> @@ -264,8 +258,8 @@ static int get_bat_6xx_tlb(CPUPPCState *env, hwaddr 
> *raddr, int *prot,
>  return ret;
>  }
>  
> -static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx,
> -   target_ulong eaddr,
> +static int mmu6xx_get_physical_address(CPUPPCState *env, hwaddr *raddr,
> +   int *prot, target_ulong eaddr,
> hwaddr *hashp, bool *keyp,
> MMUAccessType access_type, int type)
>  {
> @@ -277,8 +271,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  
>  /* First try to find a BAT entry if there are any */
>  if (env->nb_BATs &&
> -get_bat_6xx_tlb(env, &ctx->raddr, &ctx->prot, eaddr,
> -access_type, pr) == 0) {
> +get_bat_6xx_tlb(env, raddr, prot, eaddr, access_type, pr) == 0) {
>  return 0;
>  }
>  
> @@ -316,7 +309,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  *hashp = hash;
>  
>  /* Software TLB search */
> -return ppc6xx_tlb_check(env, &ctx->raddr, &ctx->prot, eaddr,
> +return ppc6xx_tlb_check(env, raddr, prot, eaddr,
>  access_type, ptem, key, nx);
>  }
>  
> @@ -333,7 +326,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>   * Should make the instruction do no-op.  As it already do
>   * no-op, it's quite easy :-)
>   */
> -ctx->raddr = eaddr;
> +*raddr = eaddr;
>  return 0;
>  case ACCESS_CODE: /* No code fetch is allowed in direct-store areas */
>  case ACCESS_FLOAT: /* Floating point load/store */
> @@ -343,7 +336,7 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  }
>  if ((access_type == MMU_DATA_STORE || !key) &&
>  (access_type == MMU_DATA_LOAD || key)) {
> -ctx->raddr = eaddr;
> +*raddr = eaddr;
>  return 2;
>  }
>  return -2;
> @@ -681,7 +674,6 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  {
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = &cpu->env;
> -mmu_ctx_t ctx;
>  hwaddr hash = 0; /* init to 0 to avoid used uninit warning */
>  bool key;
>  int type, ret;
> @@ -700,12 +692,9 @@ static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr,
>  type = ACCESS_INT;
>  }
>  
> -ctx.prot = 0;
> -ret = mmu6xx_get_physical_address(env, &ctx, eaddr, &hash, &key,
> +ret = mmu6xx_get_physical_address(env, raddrp, protp, eaddr, &hash, &key,
>access_type, type);
>  if (ret == 0) {
> -*raddrp = ctx.raddr;
> -*protp = ctx.prot;
>  *psizep = TARGET_PAGE_BITS;
>  return true;
>  } else if (!guest_visible) {

Re: [PATCH 26/43] target/ppc/mmu_common.c: Stop using ctx in get_bat_6xx_tlb()

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Pass raddr and prot in function parameters instead

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu_common.c | 17 +
>  1 file changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 624ed51a92..4770b43630 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -193,7 +193,7 @@ static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr 
> *raddr, int *prot,
>  return ret;
>  }
>  
> -static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx,
> +static int get_bat_6xx_tlb(CPUPPCState *env, hwaddr *raddr, int *prot,
> target_ulong eaddr, MMUAccessType access_type,
> bool pr)
>  {
> @@ -224,16 +224,16 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t 
> *ctx,
>  if ((eaddr & BATU32_BEPIU) == BEPIu &&
>  ((eaddr & BATU32_BEPIL) & ~bl) == BEPIl) {
>  /* Get physical address */
> -ctx->raddr = (*BATl & BATU32_BEPIU) |
> +*raddr = (*BATl & BATU32_BEPIU) |
>  ((eaddr & BATU32_BEPIL & bl) | (*BATl & BATU32_BEPIL)) |
>  (eaddr & 0x0001F000);
>  /* Compute access rights */
> -ctx->prot = ppc_hash32_bat_prot(*BATu, *BATl);
> -if (check_prot_access_type(ctx->prot, access_type)) {
> +*prot = ppc_hash32_bat_prot(*BATu, *BATl);
> +if (check_prot_access_type(*prot, access_type)) {
>  qemu_log_mask(CPU_LOG_MMU, "BAT %d match: r " 
> HWADDR_FMT_plx
> -  " prot=%c%c\n", i, ctx->raddr,
> -  ctx->prot & PAGE_READ ? 'R' : '-',
> -  ctx->prot & PAGE_WRITE ? 'W' : '-');
> +  " prot=%c%c\n", i, *raddr,
> +  *prot & PAGE_READ ? 'R' : '-',
> +  *prot & PAGE_WRITE ? 'W' : '-');
>  ret = 0;
>  } else {
>  ret = -2;
> @@ -277,7 +277,8 @@ static int mmu6xx_get_physical_address(CPUPPCState *env, 
> mmu_ctx_t *ctx,
>  
>  /* First try to find a BAT entry if there are any */
>  if (env->nb_BATs &&
> -get_bat_6xx_tlb(env, ctx, eaddr, access_type, pr) == 0) {
> +get_bat_6xx_tlb(env, &ctx->raddr, &ctx->prot, eaddr,
> +access_type, pr) == 0) {
>  return 0;
>  }
>

Re: [PATCH 28/43] target/ppc/mmu-hash32.c: Inline and remove ppc_hash32_pte_raddr()

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> This function is used only once and does not add more clarity than
> doing it inline.
>
> Signed-off-by: BALATON Zoltan 

Ah, not really sure I agree. Yes I suppose in this case because it
has that comment. But you could instead remove the comment and
leave the function there (because the comment is redundant with
the function name), and then your main function is 1 line
instead of 4.

Don't remove functions just because they're called once, if they
are a nice self-contained and well named thing. But okay for here
I suppose.

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/mmu-hash32.c | 18 +-
>  1 file changed, 5 insertions(+), 13 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 6f0f0bbb00..c4de1647e2 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -298,15 +298,6 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>  return pte_offset;
>  }
>  
> -static hwaddr ppc_hash32_pte_raddr(target_ulong sr, ppc_hash_pte32_t pte,
> -   target_ulong eaddr)
> -{
> -hwaddr rpn = pte.pte1 & HPTE32_R_RPN;
> -hwaddr mask = ~TARGET_PAGE_MASK;
> -
> -return (rpn & ~mask) | (eaddr & mask);
> -}
> -
>  bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType 
> access_type,
>hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
>bool guest_visible)
> @@ -440,11 +431,12 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>   */
>  prot &= ~PAGE_WRITE;
>  }
> - }
> +}
> +*protp = prot;
>  
>  /* 9. Determine the real address from the PTE */
> -
> -*raddrp = ppc_hash32_pte_raddr(sr, pte, eaddr);
> -*protp = prot;
> +*raddrp = pte.pte1 & HPTE32_R_RPN;
> +*raddrp &= TARGET_PAGE_MASK;
> +*raddrp |= eaddr & ~TARGET_PAGE_MASK;
>  return true;
>  }

Re: [PATCH 29/43] target/ppc/mmu-hash32.c: Move get_pteg_offset32() to the header

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> This function is a simple shared function, move it to other similar
> static inline functions in the header.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 7 ---
>  target/ppc/mmu-hash32.h | 6 +-
>  2 files changed, 5 insertions(+), 8 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index c4de1647e2..44b16142ab 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -201,13 +201,6 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, 
> target_ulong sr,
>  return false;
>  }
>  
> -hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
> -{
> -target_ulong mask = ppc_hash32_hpt_mask(cpu);
> -
> -return (hash * HASH_PTEG_SIZE_32) & mask;
> -}
> -
>  static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, hwaddr pteg_off,
>   bool secondary, target_ulong ptem,
>   ppc_hash_pte32_t *pte)
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index bd75f7d647..2838de031c 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -3,7 +3,6 @@
>  
>  #ifndef CONFIG_USER_ONLY
>  
> -hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash);
>  bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType 
> access_type,
>hwaddr *raddrp, int *psizep, int *protp, int mmu_idx,
>bool guest_visible);
> @@ -102,6 +101,11 @@ static inline void ppc_hash32_store_hpte1(PowerPCCPU 
> *cpu,
>  stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
>  }
>  
> +static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
> +{
> +return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
> +}
> +
>  static inline bool ppc_hash32_key(bool pr, target_ulong sr)
>  {
>  return pr ? (sr & SR32_KP) : (sr & SR32_KS);

Re: [PATCH 30/43] target/ppc: Unexport some functions from mmu-book3s-v3.h

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> The ppc_hash64_hpt_base() and ppc_hash64_hpt_mask() functions are
> mostly used by mmu-hash64.c only but there is one call to
> ppc_hash64_hpt_mask() in hw/ppc/spapr_vhyp_mmu.c.in a helper function
> that can be moved to mmu-hash64.c which allows these functions to be
> removed from the header.
>

Fine. Probably too big to inline anyway.

Reviewed-by: Nicholas Piggin 

> Signed-off-by: BALATON Zoltan 
> ---
>  hw/ppc/spapr_vhyp_mmu.c| 21 
>  target/ppc/mmu-book3s-v3.h | 40 ---
>  target/ppc/mmu-hash64.c| 49 ++
>  target/ppc/mmu-hash64.h|  1 +
>  4 files changed, 54 insertions(+), 57 deletions(-)
>
> diff --git a/hw/ppc/spapr_vhyp_mmu.c b/hw/ppc/spapr_vhyp_mmu.c
> index b3dd8b3a59..2d41d7f77b 100644
> --- a/hw/ppc/spapr_vhyp_mmu.c
> +++ b/hw/ppc/spapr_vhyp_mmu.c
> @@ -15,19 +15,6 @@
>  #include "helper_regs.h"
>  #include "hw/ppc/spapr.h"
>  #include "mmu-hash64.h"
> -#include "mmu-book3s-v3.h"
> -
> -
> -static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
> -{
> -/*
> - * hash value/pteg group index is normalized by HPT mask
> - */
> -if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
> -return false;
> -}
> -return true;
> -}
>  
>  static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
>  target_ulong opcode, target_ulong *args)
> @@ -70,7 +57,7 @@ static target_ulong h_enter(PowerPCCPU *cpu, 
> SpaprMachineState *spapr,
>  
>  pteh &= ~0x60ULL;
>  
> -if (!valid_ptex(cpu, ptex)) {
> +if (!ppc_hash64_valid_ptex(cpu, ptex)) {
>  return H_PARAMETER;
>  }
>  
> @@ -119,7 +106,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu
>  const ppc_hash_pte64_t *hptes;
>  target_ulong v, r;
>  
> -if (!valid_ptex(cpu, ptex)) {
> +if (!ppc_hash64_valid_ptex(cpu, ptex)) {
>  return REMOVE_PARM;
>  }
>  
> @@ -250,7 +237,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, 
> SpaprMachineState *spapr,
>  const ppc_hash_pte64_t *hptes;
>  target_ulong v, r;
>  
> -if (!valid_ptex(cpu, ptex)) {
> +if (!ppc_hash64_valid_ptex(cpu, ptex)) {
>  return H_PARAMETER;
>  }
>  
> @@ -287,7 +274,7 @@ static target_ulong h_read(PowerPCCPU *cpu, 
> SpaprMachineState *spapr,
>  int i, ridx, n_entries = 1;
>  const ppc_hash_pte64_t *hptes;
>  
> -if (!valid_ptex(cpu, ptex)) {
> +if (!ppc_hash64_valid_ptex(cpu, ptex)) {
>  return H_PARAMETER;
>  }
>  
> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> index f3f7993958..263ce55c1f 100644
> --- a/target/ppc/mmu-book3s-v3.h
> +++ b/target/ppc/mmu-book3s-v3.h
> @@ -83,46 +83,6 @@ static inline bool ppc64_v3_radix(PowerPCCPU *cpu)
>  return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR);
>  }
>  
> -static inline hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
> -{
> -uint64_t base;
> -
> -if (cpu->vhyp) {
> -return 0;
> -}
> -if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
> -ppc_v3_pate_t pate;
> -
> -if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
> -return 0;
> -}
> -base = pate.dw0;
> -} else {
> -base = cpu->env.spr[SPR_SDR1];
> -}
> -return base & SDR_64_HTABORG;
> -}
> -
> -static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu)
> -{
> -uint64_t base;
> -
> -if (cpu->vhyp) {
> -return cpu->vhyp_class->hpt_mask(cpu->vhyp);
> -}
> -if (cpu->env.mmu_model == POWERPC_MMU_3_00) {
> -ppc_v3_pate_t pate;
> -
> -if (!ppc64_v3_get_pate(cpu, cpu->env.spr[SPR_LPIDR], &pate)) {
> -return 0;
> -}
> -base = pate.dw0;
> -} else {
> -base = cpu->env.spr[SPR_SDR1];
> -}
> -return (1ULL << ((base & SDR_64_HTABSIZE) + 18 - 7)) - 1;
> -}
> -
>  #endif /* TARGET_PPC64 */
>  
>  #endif /* CONFIG_USER_ONLY */
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index cbc8efa0c3..7bc0323f26 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -508,6 +508,46 @@ static int ppc_hash64_amr_prot(PowerPCCPU *cpu, 
> ppc_hash_pte64_t pte)
>  return prot;
>  }
>  
> +static hwaddr ppc_hash64_hpt_base(PowerPCCPU *cpu)
> +{
> +uint64_t base;
> +
> +if (cpu-&g

Re: [PATCH 32/43] target/ppc: Remove includes from mmu-book3s-v3.h

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Drop includes from header that is not needed by the header itself and
> only include them from C files that really need it.

Acked-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-book3s-v3.h | 3 ---
>  target/ppc/mmu-hash64.c| 1 +
>  target/ppc/mmu-radix64.c   | 1 +
>  3 files changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> index 263ce55c1f..be66e26604 100644
> --- a/target/ppc/mmu-book3s-v3.h
> +++ b/target/ppc/mmu-book3s-v3.h
> @@ -20,9 +20,6 @@
>  #ifndef PPC_MMU_BOOK3S_V3_H
>  #define PPC_MMU_BOOK3S_V3_H
>  
> -#include "mmu-hash64.h"
> -#include "mmu-books.h"
> -
>  #ifndef CONFIG_USER_ONLY
>  
>  /*
> diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
> index 7bc0323f26..5e1983e334 100644
> --- a/target/ppc/mmu-hash64.c
> +++ b/target/ppc/mmu-hash64.c
> @@ -31,6 +31,7 @@
>  #include "hw/hw.h"
>  #include "internal.h"
>  #include "mmu-book3s-v3.h"
> +#include "mmu-books.h"
>  #include "helper_regs.h"
>  
>  #ifdef CONFIG_TCG
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index cf9619e847..be7a45f254 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -28,6 +28,7 @@
>  #include "internal.h"
>  #include "mmu-radix64.h"
>  #include "mmu-book3s-v3.h"
> +#include "mmu-books.h"
>  
>  /* Radix Partition Table Entry Fields */
>  #define PATE1_R_PRTB   0x0000

Re: [PATCH 31/43] target/ppc/mmu-radix64: Remove externally unused parts from header

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Move the parts not needed outside of mmu-radix64.c from the header to
> the C file to leave only parts in the header that need to be exported.
> Also drop unneded include of this header.
>
> Signed-off-by: BALATON Zoltan 

Acked-by: Nicholas Piggin 

> ---
>  target/ppc/mmu-book3s-v3.c |  1 -
>  target/ppc/mmu-radix64.c   | 49 +++
>  target/ppc/mmu-radix64.h   | 53 +-
>  3 files changed, 50 insertions(+), 53 deletions(-)
>
> diff --git a/target/ppc/mmu-book3s-v3.c b/target/ppc/mmu-book3s-v3.c
> index c8f69b3df9..a812cb5113 100644
> --- a/target/ppc/mmu-book3s-v3.c
> +++ b/target/ppc/mmu-book3s-v3.c
> @@ -21,7 +21,6 @@
>  #include "cpu.h"
>  #include "mmu-hash64.h"
>  #include "mmu-book3s-v3.h"
> -#include "mmu-radix64.h"
>  
>  bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid, ppc_v3_pate_t 
> *entry)
>  {
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 5a02e4963b..cf9619e847 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -29,6 +29,37 @@
>  #include "mmu-radix64.h"
>  #include "mmu-book3s-v3.h"
>  
> +/* Radix Partition Table Entry Fields */
> +#define PATE1_R_PRTB   0x0000
> +#define PATE1_R_PRTS   0x001F
> +
> +/* Radix Process Table Entry Fields */
> +#define PRTBE_R_GET_RTS(rts) \
> +rts >> 58) & 0x18) | ((rts >> 5) & 0x7)) + 31)
> +#define PRTBE_R_RPDB0x0F00
> +#define PRTBE_R_RPDS0x001F
> +
> +/* Radix Page Directory/Table Entry Fields */
> +#define R_PTE_VALID 0x8000
> +#define R_PTE_LEAF  0x4000
> +#define R_PTE_SW0   0x2000
> +#define R_PTE_RPN   0x01FFF000
> +#define R_PTE_SW1   0x0E00
> +#define R_GET_SW(sw)(((sw >> 58) & 0x8) | ((sw >> 9) & 0x7))
> +#define R_PTE_R 0x0100
> +#define R_PTE_C 0x0080
> +#define R_PTE_ATT   0x0030
> +#define R_PTE_ATT_NORMAL0x
> +#define R_PTE_ATT_SAO   0x0010
> +#define R_PTE_ATT_NI_IO 0x0020
> +#define R_PTE_ATT_TOLERANT_IO   0x0030
> +#define R_PTE_EAA_PRIV  0x0008
> +#define R_PTE_EAA_R 0x0004
> +#define R_PTE_EAA_RW0x0002
> +#define R_PTE_EAA_X 0x0001
> +#define R_PDE_NLB   PRTBE_R_RPDB
> +#define R_PDE_NLS   PRTBE_R_RPDS
> +
>  static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env,
>   vaddr eaddr,
>   uint64_t *lpid, uint64_t 
> *pid)
> @@ -180,6 +211,24 @@ static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, 
> MMUAccessType access_type,
>  }
>  }
>  
> +static int ppc_radix64_get_prot_eaa(uint64_t pte)
> +{
> +return (pte & R_PTE_EAA_R ? PAGE_READ : 0) |
> +   (pte & R_PTE_EAA_RW ? PAGE_READ | PAGE_WRITE : 0) |
> +   (pte & R_PTE_EAA_X ? PAGE_EXEC : 0);
> +}
> +
> +static int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu)
> +{
> +const CPUPPCState *env = &cpu->env;
> +int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */
> +int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 
> IAMR63:62 */
> +
> +return (amr & 0x2 ? 0 : PAGE_WRITE) | /* Access denied if bit is set */
> +   (amr & 0x1 ? 0 : PAGE_READ) |
> +   (iamr & 0x1 ? 0 : PAGE_EXEC);
> +}
> +
>  static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType 
> access_type,
> uint64_t pte, int *fault_cause, int *prot,
> int mmu_idx, bool partition_scoped)
> diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h
> index c5c04a1527..6620b3d648 100644
> --- a/target/ppc/mmu-radix64.h
> +++ b/target/ppc/mmu-radix64.h
> @@ -3,7 +3,7 @@
>  
>  #ifndef CONFIG_USER_ONLY
>  
> -#include "exec/page-protection.h"
> +#ifdef TARGET_PPC64
>  
>  /* Radix Quadrants */
>  #define R_EADDR_MASK0x3FFF
> @@ -14,61 +14,10 @@
>  #define R_EADDR_QUADRANT2   0x8000
>  #define R_EADDR_QUADRANT3   0xC000
>  
> -/* Radix Par

Re: [PATCH 33/43] target/ppc: Remove single use static inline function

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> The ger_pack_masks() function is only used once and the inverse of
> this operation is already inlined so it can be inlined too in the only
> caller and removed from the header.

Is this needed for later patches? I might prefer to keep it, even
move it into vsx-impl.c.inc and pull its inverse out into its own
function too even.

Thanks,
Nick

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/internal.h   | 9 -
>  target/ppc/translate/vsx-impl.c.inc | 6 --
>  2 files changed, 4 insertions(+), 11 deletions(-)
>
> diff --git a/target/ppc/internal.h b/target/ppc/internal.h
> index 20fb2ec593..8e5a241f74 100644
> --- a/target/ppc/internal.h
> +++ b/target/ppc/internal.h
> @@ -293,13 +293,4 @@ FIELD(GER_MSK, XMSK, 0, 4)
>  FIELD(GER_MSK, YMSK, 4, 4)
>  FIELD(GER_MSK, PMSK, 8, 8)
>  
> -static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk)
> -{
> -int msk = 0;
> -msk = FIELD_DP32(msk, GER_MSK, XMSK, xmsk);
> -msk = FIELD_DP32(msk, GER_MSK, YMSK, ymsk);
> -msk = FIELD_DP32(msk, GER_MSK, PMSK, pmsk);
> -return msk;
> -}
> -
>  #endif /* PPC_INTERNAL_H */
> diff --git a/target/ppc/translate/vsx-impl.c.inc 
> b/target/ppc/translate/vsx-impl.c.inc
> index 0266f09119..62950d348a 100644
> --- a/target/ppc/translate/vsx-impl.c.inc
> +++ b/target/ppc/translate/vsx-impl.c.inc
> @@ -2819,7 +2819,7 @@ static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a 
> *a)
>  static bool do_ger(DisasContext *ctx, arg_MMIRR_XX3 *a,
>  void (*helper)(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32))
>  {
> -uint32_t mask;
> +uint32_t mask = 0;
>  TCGv_ptr xt, xa, xb;
>  REQUIRE_INSNS_FLAGS2(ctx, ISA310);
>  REQUIRE_VSX(ctx);
> @@ -2832,7 +2832,9 @@ static bool do_ger(DisasContext *ctx, arg_MMIRR_XX3 *a,
>  xa = gen_vsr_ptr(a->xa);
>  xb = gen_vsr_ptr(a->xb);
>  
> -mask = ger_pack_masks(a->pmsk, a->ymsk, a->xmsk);
> +mask = FIELD_DP32(mask, GER_MSK, XMSK, a->xmsk);
> +mask = FIELD_DP32(mask, GER_MSK, YMSK, a->ymsk);
> +mask = FIELD_DP32(mask, GER_MSK, PMSK, a->pmsk);
>  helper(tcg_env, xa, xb, xt, tcg_constant_i32(mask));
>  return true;
>  }

Re: [PATCH 35/43] target/ppc/mmu-hash32.c: Change parameter type of ppc_hash32_bat_lookup()

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> This function takes PowerPCCPU but only needs the env from it. Change
> its parameter to CPUPPCState *env.
>

Reviewed-by: Nicholas Piggin 

> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 44b16142ab..a2c0ac05d2 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -48,11 +48,10 @@ static target_ulong hash32_bat_size(int mmu_idx,
>  return BATU32_BEPI & ~((batu & BATU32_BL) << 15);
>  }
>  
> -static hwaddr ppc_hash32_bat_lookup(PowerPCCPU *cpu, target_ulong ea,
> +static hwaddr ppc_hash32_bat_lookup(CPUPPCState *env, target_ulong ea,
>  MMUAccessType access_type, int *prot,
>  int mmu_idx)
>  {
> -CPUPPCState *env = &cpu->env;
>  target_ulong *BATlt, *BATut;
>  bool ifetch = access_type == MMU_INST_FETCH;
>  int i;
> @@ -316,7 +315,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  
>  /* 2. Check Block Address Translation entries (BATs) */
>  if (env->nb_BATs != 0) {
> -raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, protp, 
> mmu_idx);
> +raddr = ppc_hash32_bat_lookup(env, eaddr, access_type, protp, 
> mmu_idx);
>  if (raddr != -1) {
>  if (!check_prot_access_type(*protp, access_type)) {
>  if (guest_visible) {

Re: [PATCH 36/43] target/ppc/mmu-hash32: Remove some static inlines from header

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Two of these are not used anywhere and the other two are used only
> once and can be inlined and removed from the header.

I'd prefer to put these in the .c file. Probably calculating the
base once would generate marginally better code since it would not
have to keep reloading it (since there is a barrier there it can't
cache the value).

Thanks,
Nick

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c |  5 +++--
>  target/ppc/mmu-hash32.h | 32 
>  2 files changed, 3 insertions(+), 34 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index a2c0ac05d2..7a6a674f8a 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -206,17 +206,18 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, 
> hwaddr pteg_off,
>  {
>  hwaddr pte_offset = pteg_off;
>  target_ulong pte0, pte1;
> +hwaddr base = ppc_hash32_hpt_base(cpu);
>  int i;
>  
>  for (i = 0; i < HPTES_PER_GROUP; i++) {
> -pte0 = ppc_hash32_load_hpte0(cpu, pte_offset);
> +pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset);
>  /*
>   * pte0 contains the valid bit and must be read before pte1,
>   * otherwise we might see an old pte1 with a new valid bit and
>   * thus an inconsistent hpte value
>   */
>  smp_rmb();
> -pte1 = ppc_hash32_load_hpte1(cpu, pte_offset);
> +pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 
> 2);
>  
>  if ((pte0 & HPTE32_V_VALID)
>  && (secondary == !!(pte0 & HPTE32_V_SECONDARY))
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index 2838de031c..4db55fb0a0 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -69,38 +69,6 @@ static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
>  return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0x;
>  }
>  
> -static inline target_ulong ppc_hash32_load_hpte0(PowerPCCPU *cpu,
> - hwaddr pte_offset)
> -{
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -return ldl_phys(CPU(cpu)->as, base + pte_offset);
> -}
> -
> -static inline target_ulong ppc_hash32_load_hpte1(PowerPCCPU *cpu,
> - hwaddr pte_offset)
> -{
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -return ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2);
> -}
> -
> -static inline void ppc_hash32_store_hpte0(PowerPCCPU *cpu,
> -  hwaddr pte_offset, target_ulong 
> pte0)
> -{
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -stl_phys(CPU(cpu)->as, base + pte_offset, pte0);
> -}
> -
> -static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu,
> -  hwaddr pte_offset, target_ulong 
> pte1)
> -{
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -
> -stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1);
> -}
> -
>  static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
>  {
>  return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);

Re: [PATCH 37/43] target/ppc/mmu-hash32.c: Return and use pte address instead of base + offset

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> Change ppc_hash32_pteg_search() to return pte address instead of an
> offset to avoid needing to get the base and add offset to it when we
> already have the address we need.

I think this looks good, but would need small rebase if the previous
patch is changed.

Reviewed-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 51 -
>  1 file changed, 20 insertions(+), 31 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 7a6a674f8a..cc1e790d0e 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -204,58 +204,48 @@ static hwaddr ppc_hash32_pteg_search(PowerPCCPU *cpu, 
> hwaddr pteg_off,
>   bool secondary, target_ulong ptem,
>   ppc_hash_pte32_t *pte)
>  {
> -hwaddr pte_offset = pteg_off;
> +hwaddr pte_addr = ppc_hash32_hpt_base(cpu) + pteg_off;
>  target_ulong pte0, pte1;
> -hwaddr base = ppc_hash32_hpt_base(cpu);
>  int i;
>  
> -for (i = 0; i < HPTES_PER_GROUP; i++) {
> -pte0 = ldl_phys(CPU(cpu)->as, base + pte_offset);
> +for (i = 0; i < HPTES_PER_GROUP; i++, pte_addr += HASH_PTE_SIZE_32) {
> +pte0 = ldl_phys(CPU(cpu)->as, pte_addr);
>  /*
>   * pte0 contains the valid bit and must be read before pte1,
>   * otherwise we might see an old pte1 with a new valid bit and
>   * thus an inconsistent hpte value
>   */
>  smp_rmb();
> -pte1 = ldl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 
> 2);
> +pte1 = ldl_phys(CPU(cpu)->as, pte_addr + HASH_PTE_SIZE_32 / 2);
>  
>  if ((pte0 & HPTE32_V_VALID)
>  && (secondary == !!(pte0 & HPTE32_V_SECONDARY))
>  && HPTE32_V_COMPARE(pte0, ptem)) {
>  pte->pte0 = pte0;
>  pte->pte1 = pte1;
> -return pte_offset;
> +return pte_addr;
>  }
> -
> -pte_offset += HASH_PTE_SIZE_32;
>  }
> -
>  return -1;
>  }
>  
> -static void ppc_hash32_set_r(PowerPCCPU *cpu, hwaddr pte_offset, uint32_t 
> pte1)
> +static void ppc_hash32_set_r(PowerPCCPU *cpu, hwaddr pte_addr, uint32_t pte1)
>  {
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -hwaddr offset = pte_offset + 6;
> -
>  /* The HW performs a non-atomic byte update */
> -stb_phys(CPU(cpu)->as, base + offset, ((pte1 >> 8) & 0xff) | 0x01);
> +stb_phys(CPU(cpu)->as, pte_addr + 6, ((pte1 >> 8) & 0xff) | 0x01);
>  }
>  
> -static void ppc_hash32_set_c(PowerPCCPU *cpu, hwaddr pte_offset, uint64_t 
> pte1)
> +static void ppc_hash32_set_c(PowerPCCPU *cpu, hwaddr pte_addr, uint64_t pte1)
>  {
> -target_ulong base = ppc_hash32_hpt_base(cpu);
> -hwaddr offset = pte_offset + 7;
> -
>  /* The HW performs a non-atomic byte update */
> -stb_phys(CPU(cpu)->as, base + offset, (pte1 & 0xff) | 0x80);
> +stb_phys(CPU(cpu)->as, pte_addr + 7, (pte1 & 0xff) | 0x80);
>  }
>  
>  static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>   target_ulong sr, target_ulong eaddr,
>   ppc_hash_pte32_t *pte)
>  {
> -hwaddr pteg_off, pte_offset;
> +hwaddr pteg_off, pte_addr;
>  hwaddr hash;
>  uint32_t vsid, pgidx, ptem;
>  
> @@ -277,18 +267,18 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>  ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu),
>  vsid, ptem, hash);
>  pteg_off = get_pteg_offset32(cpu, hash);
> -pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte);
> -if (pte_offset == -1) {
> +pte_addr = ppc_hash32_pteg_search(cpu, pteg_off, 0, ptem, pte);
> +if (pte_addr == -1) {
>  /* Secondary PTEG lookup */
>  qemu_log_mask(CPU_LOG_MMU, "1 htab=" HWADDR_FMT_plx "/" 
> HWADDR_FMT_plx
>  " vsid=%" PRIx32 " api=%" PRIx32
>  " hash=" HWADDR_FMT_plx "\n", ppc_hash32_hpt_base(cpu),
>  ppc_hash32_hpt_mask(cpu), vsid, ptem, ~hash);
>  pteg_off = get_pteg_offset32(cpu, ~hash);
> -pte_offset = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte);
> +pte_addr = ppc_hash32_pteg_search(cpu, pteg_off, 1, ptem, pte);
>  }
>  
> -return pte_offset;
> +return pte_addr;
>  }
>  
>  bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUA

Re: [PATCH 39/43] target/ppc: Change parameter type of some inline functions

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> These functions take PowerPCCPU but only need the env from it. Change
> their parameter to CPUPPCState *env.

I suppose that's okay. Probably generates a little better code.

Acked-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  target/ppc/mmu-hash32.c | 13 +++--
>  target/ppc/mmu-hash32.h | 12 ++--
>  target/ppc/mmu_common.c | 20 +---
>  3 files changed, 22 insertions(+), 23 deletions(-)
>
> diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c
> index 6d0adf3357..f18faf0f46 100644
> --- a/target/ppc/mmu-hash32.c
> +++ b/target/ppc/mmu-hash32.c
> @@ -244,10 +244,11 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>   target_ulong sr, target_ulong eaddr,
>   ppc_hash_pte32_t *pte)
>  {
> +CPUPPCState *env = &cpu->env;
>  hwaddr hpt_base, pteg_off, pte_addr, hash;
>  uint32_t vsid, pgidx, ptem;
>  
> -hpt_base = ppc_hash32_hpt_base(cpu);
> +hpt_base = ppc_hash32_hpt_base(env);
>  vsid = sr & SR32_VSID;
>  pgidx = (eaddr & ~SEGMENT_MASK_256M) >> TARGET_PAGE_BITS;
>  hash = vsid ^ pgidx;
> @@ -256,21 +257,21 @@ static hwaddr ppc_hash32_htab_lookup(PowerPCCPU *cpu,
>  /* Page address translation */
>  qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx " htab_mask "
>HWADDR_FMT_plx " hash " HWADDR_FMT_plx "\n",
> -  hpt_base, ppc_hash32_hpt_mask(cpu), hash);
> +  hpt_base, ppc_hash32_hpt_mask(env), hash);
>  
>  /* Primary PTEG lookup */
>  qemu_log_mask(CPU_LOG_MMU, "0 htab=" HWADDR_FMT_plx "/" HWADDR_FMT_plx
>" vsid=%" PRIx32 " ptem=%" PRIx32 " hash=" HWADDR_FMT_plx
> -  "\n", hpt_base, ppc_hash32_hpt_mask(cpu), vsid, ptem, 
> hash);
> -pteg_off = get_pteg_offset32(cpu, hash);
> +  "\n", hpt_base, ppc_hash32_hpt_mask(env), vsid, ptem, 
> hash);
> +pteg_off = get_pteg_offset32(env, hash);
>  pte_addr = ppc_hash32_pteg_search(cpu, hpt_base + pteg_off, 0, ptem, 
> pte);
>  if (pte_addr == -1) {
>  /* Secondary PTEG lookup */
>  qemu_log_mask(CPU_LOG_MMU, "1 htab=" HWADDR_FMT_plx "/" 
> HWADDR_FMT_plx
>" vsid=%" PRIx32 " api=%" PRIx32 " hash=" 
> HWADDR_FMT_plx
> -  "\n", hpt_base, ppc_hash32_hpt_mask(cpu), vsid, ptem,
> +  "\n", hpt_base, ppc_hash32_hpt_mask(env), vsid, ptem,
>~hash);
> -pteg_off = get_pteg_offset32(cpu, ~hash);
> +pteg_off = get_pteg_offset32(env, ~hash);
>  pte_addr = ppc_hash32_pteg_search(cpu, hpt_base + pteg_off, 1, ptem,
>pte);
>  }
> diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h
> index 4db55fb0a0..ec8d881def 100644
> --- a/target/ppc/mmu-hash32.h
> +++ b/target/ppc/mmu-hash32.h
> @@ -59,19 +59,19 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  #define HPTE32_R_WIMG   0x0078
>  #define HPTE32_R_PP 0x0003
>  
> -static inline hwaddr ppc_hash32_hpt_base(PowerPCCPU *cpu)
> +static inline hwaddr ppc_hash32_hpt_base(CPUPPCState *env)
>  {
> -return cpu->env.spr[SPR_SDR1] & SDR_32_HTABORG;
> +return env->spr[SPR_SDR1] & SDR_32_HTABORG;
>  }
>  
> -static inline hwaddr ppc_hash32_hpt_mask(PowerPCCPU *cpu)
> +static inline hwaddr ppc_hash32_hpt_mask(CPUPPCState *env)
>  {
> -return ((cpu->env.spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0x;
> +return ((env->spr[SPR_SDR1] & SDR_32_HTABMASK) << 16) | 0x;
>  }
>  
> -static inline hwaddr get_pteg_offset32(PowerPCCPU *cpu, hwaddr hash)
> +static inline hwaddr get_pteg_offset32(CPUPPCState *env, hwaddr hash)
>  {
> -return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(cpu);
> +return (hash * HASH_PTEG_SIZE_32) & ppc_hash32_hpt_mask(env);
>  }
>  
>  static inline bool ppc_hash32_key(bool pr, target_ulong sr)
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index 60f8736210..b45eb64f6e 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -166,8 +166,8 @@ static int ppc6xx_tlb_check(CPUPPCState *env, hwaddr 
> *raddr, int *prot,
>  #if defined(DUMP_PAGE_TABLES)
>  if (qemu_loglevel_mask(CPU_LOG_MMU)) {
>

Re: [PATCH 40/43] target/ppc: Change parameter type of ppc64_v3_radix()

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> This function takes PowerPCCPU but only needs the env from it. Change
> its parameter to CPUPPCState *env.

Acked-by: Nicholas Piggin 

>
> Signed-off-by: BALATON Zoltan 
> ---
>  hw/ppc/spapr_rtas.c| 2 +-
>  target/ppc/mmu-book3s-v3.h | 4 ++--
>  target/ppc/mmu_common.c| 4 ++--
>  3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index f329693c55..38e94fc0d7 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -177,7 +177,7 @@ static void rtas_start_cpu(PowerPCCPU *callcpu, 
> SpaprMachineState *spapr,
>   * New cpus are expected to start in the same radix/hash mode
>   * as the existing CPUs
>   */
> -if (ppc64_v3_radix(callcpu)) {
> +if (ppc64_v3_radix(&callcpu->env)) {
>  lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
>  } else {
>  lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR);
> diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h
> index be66e26604..e52129ff7f 100644
> --- a/target/ppc/mmu-book3s-v3.h
> +++ b/target/ppc/mmu-book3s-v3.h
> @@ -75,9 +75,9 @@ bool ppc64_v3_get_pate(PowerPCCPU *cpu, target_ulong lpid,
>   * dig out the partition table in the fast path. This is
>   * also how the HW uses it.
>   */
> -static inline bool ppc64_v3_radix(PowerPCCPU *cpu)
> +static inline bool ppc64_v3_radix(CPUPPCState *env)
>  {
> -return !!(cpu->env.spr[SPR_LPCR] & LPCR_HR);
> +return !!(env->spr[SPR_LPCR] & LPCR_HR);
>  }
>  
>  #endif /* TARGET_PPC64 */
> diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c
> index b45eb64f6e..ab055ca96b 100644
> --- a/target/ppc/mmu_common.c
> +++ b/target/ppc/mmu_common.c
> @@ -565,7 +565,7 @@ void dump_mmu(CPUPPCState *env)
>  dump_slb(env_archcpu(env));
>  break;
>  case POWERPC_MMU_3_00:
> -if (ppc64_v3_radix(env_archcpu(env))) {
> +if (ppc64_v3_radix(env)) {
>  qemu_log_mask(LOG_UNIMP, "%s: the PPC64 MMU is unsupported\n",
>__func__);
>  } else {
> @@ -810,7 +810,7 @@ bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, 
> MMUAccessType access_type,
>  switch (cpu->env.mmu_model) {
>  #if defined(TARGET_PPC64)
>  case POWERPC_MMU_3_00:
> -if (ppc64_v3_radix(cpu)) {
> +if (ppc64_v3_radix(&cpu->env)) {
>  return ppc_radix64_xlate(cpu, eaddr, access_type, raddrp,
>   psizep, protp, mmu_idx, guest_visible);
>  }

Re: [PATCH 41/43] target/ppc: Change MMU xlate functions to take CPUState

2024-07-04 Thread Nicholas Piggin

On Mon May 27, 2024 at 9:13 AM AEST, BALATON Zoltan wrote:
> The callers of xlate functions get CPUState which is then cast to
> PowerPCCPU that is then cast back to CPUState by most xlate functions.
> Avoid this back and forth casting by passing the existing CPUState to
> xlate functions and let them convert it as needed.

I guess. Is this faster?

Thanks,
Nick

Re: [PATCH V13 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code

2024-07-04 Thread Nicholas Piggin

Looks like there is a bit of noise around this recently. Do we
think the hotplug patches can get over the line this time?

If not, perhaps we work with Salil to get this patch 1 upstream
at least.

Thanks,
Nick

On Tue Jun 25, 2024 at 3:08 PM AEST, Harsh Prateek Bora wrote:
> +qemu-devel, qemu-ppc
>
> Ping!
>
> On 6/17/24 15:18, Harsh Prateek Bora wrote:
> > 
> > + MST, Igor - to help with early review/merge. TIA.
> > 
> > On 6/14/24 16:06, Salil Mehta wrote:
> >> Hello
> >>
> >>>   From: Harsh Prateek Bora 
> >>>   Sent: Friday, June 14, 2024 6:24 AM
> >>>   Hi Paolo, Nick,
> >>>   Can this patch 1/8 be merged earlier provided we have got 
> >>> sufficient R-bys
> >>>   for it and the review of entire series may take a longer time?
> >>>   We have some ppc64 patches based on it, hence the ask.
> >>>   Hi Salil,
> >>>   I am hoping we are not expecting anymore changes to this patch, please
> >>>   confirm.
> >>
> >>
> >> I do not expect any change. I had requested Michael to merge the complete
> >> series as it is stranding other users. He then requested Igor to take 
> >> a final look but
> >> he has not reverted yet. I'll remind Michael again. BTW, can you reply 
> >> to below
> >> patch explicitly indicating your interest in the series so that MST 
> >> knows who else
> >> are the stake holders here
> >>
> >> https://lore.kernel.org/qemu-devel/20240605160327.3c71f...@imammedo.users.ipa.redhat.com/
> >>
> >>
> >> Hi Paolo,
> >>
> >> A request, would it be possible to skim through this series from KVM 
> >> perspective?
> >> (although nothing has changed which will affect the KVM and this is 
> >> architecture
> >> agnostic patch-set)
> >>
> >> Many thanks!
> >>
> >> Best
> >> Salil.
> >>
> >>
> >>>   regards,
> >>>   Harsh
> >>>   On 6/7/24 17:26, Salil Mehta wrote:
> >>>   > KVM vCPU creation is done once during the vCPU realization when Qemu
> >>>   > vCPU thread is spawned. This is common to all the architectures 
> >>> as of now.
> >>>   >
> >>>   > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
> >>>   > but the corresponding KVM vCPU object in the Host KVM is not 
> >>> destroyed
> >>>   > as KVM doesn't support vCPU removal. Therefore, its 
> >>> representative KVM
> >>>   > vCPU object/context in Qemu is parked.
> >>>   >
> >>>   > Refactor architecture common logic so that some APIs could be reused
> >>>   > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
> >>>   > Update new/old APIs with trace events. No functional change is 
> >>> intended
> >>>   here.
> >>>   >
> >>>   > Signed-off-by: Salil Mehta 
> >>>   > Reviewed-by: Gavin Shan 
> >>>   > Tested-by: Vishnu Pajjuri 
> >>>   > Reviewed-by: Jonathan Cameron 
> >>>   > Tested-by: Xianglai Li 
> >>>   > Tested-by: Miguel Luis 
> >>>   > Reviewed-by: Shaoqin Huang 
> >>>   > Reviewed-by: Vishnu Pajjuri 
> >>>   > Reviewed-by: Nicholas Piggin 
> >>>   > Tested-by: Zhao Liu 
> >>>   > Reviewed-by: Zhao Liu 
> >>>   > Reviewed-by: Harsh Prateek Bora 
> >>>   > ---
> >>>   >   accel/kvm/kvm-all.c    | 95 
> >>> 
> >>>   --
> >>>   >   accel/kvm/kvm-cpus.h   |  1 -
> >>>   >   accel/kvm/trace-events |  5 ++-
> >>>   >   include/sysemu/kvm.h   | 25 +++
> >>>   >   4 files changed, 92 insertions(+), 34 deletions(-)
> >>>   >
> >>>   > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
> >>>   > c0be9f5eed..8f9128bb92 100644
> >>>   > --- a/accel/kvm/kvm-all.c
> >>>   > +++ b/accel/kvm/kvm-all.c
> >>>   > @@ -340,14 +340,71 @@ err:
> >>>   >   return ret;
> >>>   >   }
> >>>   >
> >>>   > +void kvm_park_vcpu(CPUState *cpu)
> >>>   > +{
> >>>   > +    struct KVMParkedVcpu *vcpu;
> >>>   > +
> >>>   > +    trace_kvm_park_vcpu(cpu->cpu_in

Re: [PATCH v2 1/7] target/ppc: use locally stored msr and avoid indirect access

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> hreg_compute_hflags_value already stores msr locally to be used in most
> of the logic in the routine however some instances are still using
> env->msr which is unnecessary. Use locally stored value as available.
>
> Signed-off-by: Harsh Prateek Bora 

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/helper_regs.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 25258986e3..945fa1a596 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -106,10 +106,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState 
> *env)
>  
>  if (ppc_flags & POWERPC_FLAG_DE) {
>  target_ulong dbcr0 = env->spr[SPR_BOOKE_DBCR0];
> -if ((dbcr0 & DBCR0_ICMP) && FIELD_EX64(env->msr, MSR, DE)) {
> +if ((dbcr0 & DBCR0_ICMP) && FIELD_EX64(msr, MSR, DE)) {
>  hflags |= 1 << HFLAGS_SE;
>  }
> -if ((dbcr0 & DBCR0_BRT) && FIELD_EX64(env->msr, MSR, DE)) {
> +if ((dbcr0 & DBCR0_BRT) && FIELD_EX64(msr, MSR, DE)) {
>  hflags |= 1 << HFLAGS_BE;
>  }
>  } else {

Re: [PATCH v2 2/7] target/ppc: optimize hreg_compute_pmu_hflags_value

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> Cache env->spr[SPR_POWER_MMCR0] in a local variable as used in multiple
> conditions to avoid multiple indirect accesses.
>
> Signed-off-by: Harsh Prateek Bora 

Compiler might cache it in a reg, but anyway I like it.

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/helper_regs.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 945fa1a596..d09dcacd5e 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -50,15 +50,16 @@ void hreg_swap_gpr_tgpr(CPUPPCState *env)
>  static uint32_t hreg_compute_pmu_hflags_value(CPUPPCState *env)
>  {
>  uint32_t hflags = 0;
> -
>  #if defined(TARGET_PPC64)
> -if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC0) {
> +target_ulong mmcr0 = env->spr[SPR_POWER_MMCR0];
> +
> +if (mmcr0 & MMCR0_PMCC0) {
>  hflags |= 1 << HFLAGS_PMCC0;
>  }
> -if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
> +if (mmcr0 & MMCR0_PMCC1) {
>  hflags |= 1 << HFLAGS_PMCC1;
>  }
> -if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCjCE) {
> +if (mmcr0 & MMCR0_PMCjCE) {
>  hflags |= 1 << HFLAGS_PMCJCE;
>  }
>

Re: [PATCH v2 3/7] target/ppc: optimize hreg_compute_pmu_hflags_value

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> The second if-condition can be true only if the first one above is true.
> Enclose the latter into the former to avoid un-necessary check if first
> condition fails.
>
> Signed-off-by: Harsh Prateek Bora 
> Reviewed-by: BALATON Zoltan 

Ditto for this it's possible compiler can transform it, but I
like the code.

Reviewed-by: Nicholas Piggin 

> ---
>  target/ppc/helper_regs.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index d09dcacd5e..261a8ba79f 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -66,9 +66,9 @@ static uint32_t hreg_compute_pmu_hflags_value(CPUPPCState 
> *env)
>  #ifndef CONFIG_USER_ONLY
>  if (env->pmc_ins_cnt) {
>  hflags |= 1 << HFLAGS_INSN_CNT;
> -}
> -if (env->pmc_ins_cnt & 0x1e) {
> -hflags |= 1 << HFLAGS_PMC_OTHER;
> +if (env->pmc_ins_cnt & 0x1e) {
> +hflags |= 1 << HFLAGS_PMC_OTHER;
> +}
>  }
>  #endif
>  #endif

Re: [PATCH v2 4/7] target/ppc: optimize p9 exception handling routines

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> Currently, p9 exception handling has multiple if-condition checks where
> it does an indirect access to pending_interrupts via env. Pass the
> value during entry to avoid multiple indirect accesses.

Does code change? I don't mind, would like all CPU funtions done
the same way if we're going to do this though.

Thanks,
Nick

>
> Signed-off-by: Harsh Prateek Bora  
> ---
>  target/ppc/excp_helper.c | 47 +---
>  1 file changed, 25 insertions(+), 22 deletions(-)
>
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 0712098cf7..704eddac63 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -1842,10 +1842,12 @@ static int p8_next_unmasked_interrupt(CPUPPCState 
> *env)
>   PPC_INTERRUPT_WDT | PPC_INTERRUPT_CDOORBELL | PPC_INTERRUPT_FIT |  \
>   PPC_INTERRUPT_PIT | PPC_INTERRUPT_THERM)
>  
> -static int p9_interrupt_powersave(CPUPPCState *env)
> +static int p9_interrupt_powersave(CPUPPCState *env,
> +  uint32_t pending_interrupts)
>  {
> +
>  /* External Exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_EXT) &&
> +if ((pending_interrupts & PPC_INTERRUPT_EXT) &&
>  (env->spr[SPR_LPCR] & LPCR_EEE)) {
>  bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC);
>  if (!heic || !FIELD_EX64_HV(env->msr) ||
> @@ -1854,48 +1856,49 @@ static int p9_interrupt_powersave(CPUPPCState *env)
>  }
>  }
>  /* Decrementer Exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_DECR) &&
> +if ((pending_interrupts & PPC_INTERRUPT_DECR) &&
>  (env->spr[SPR_LPCR] & LPCR_DEE)) {
>  return PPC_INTERRUPT_DECR;
>  }
>  /* Machine Check or Hypervisor Maintenance Exception */
>  if (env->spr[SPR_LPCR] & LPCR_OEE) {
> -if (env->pending_interrupts & PPC_INTERRUPT_MCK) {
> +if (pending_interrupts & PPC_INTERRUPT_MCK) {
>  return PPC_INTERRUPT_MCK;
>  }
> -if (env->pending_interrupts & PPC_INTERRUPT_HMI) {
> +if (pending_interrupts & PPC_INTERRUPT_HMI) {
>  return PPC_INTERRUPT_HMI;
>  }
>  }
>  /* Privileged Doorbell Exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_DOORBELL) &&
> +if ((pending_interrupts & PPC_INTERRUPT_DOORBELL) &&
>  (env->spr[SPR_LPCR] & LPCR_PDEE)) {
>  return PPC_INTERRUPT_DOORBELL;
>  }
>  /* Hypervisor Doorbell Exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_HDOORBELL) &&
> +if ((pending_interrupts & PPC_INTERRUPT_HDOORBELL) &&
>  (env->spr[SPR_LPCR] & LPCR_HDEE)) {
>  return PPC_INTERRUPT_HDOORBELL;
>  }
>  /* Hypervisor virtualization exception */
> -if ((env->pending_interrupts & PPC_INTERRUPT_HVIRT) &&
> +if ((pending_interrupts & PPC_INTERRUPT_HVIRT) &&
>  (env->spr[SPR_LPCR] & LPCR_HVEE)) {
>  return PPC_INTERRUPT_HVIRT;
>  }
> -if (env->pending_interrupts & PPC_INTERRUPT_RESET) {
> +if (pending_interrupts & PPC_INTERRUPT_RESET) {
>  return PPC_INTERRUPT_RESET;
>  }
>  return 0;
>  }
>  
> -static int p9_next_unmasked_interrupt(CPUPPCState *env)
> +static int p9_next_unmasked_interrupt(CPUPPCState *env,
> +  uint32_t pending_interrupts)
>  {
>  CPUState *cs = env_cpu(env);
>  
>  /* Ignore MSR[EE] when coming out of some power management states */
>  bool msr_ee = FIELD_EX64(env->msr, MSR, EE) || env->resume_as_sreset;
>  
> -assert((env->pending_interrupts & P9_UNUSED_INTERRUPTS) == 0);
> +assert((pending_interrupts & P9_UNUSED_INTERRUPTS) == 0);
>  
>  if (cs->halted) {
>  if (env->spr[SPR_PSSCR] & PSSCR_EC) {
> @@ -1903,7 +1906,7 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env)
>   * When PSSCR[EC] is set, LPCR[PECE] controls which interrupts 
> can
>   * wakeup the processor
>   */
> -return p9_interrupt_powersave(env);
> +return p9_interrupt_powersave(env, pending_interrupts);
>  } else {
>  /*
>   * When it's clear, any system-caused exception exits 
> power-saving
> @@ -1914,12 +1917,12 @@ static int p9_next_unmasked_interrupt(CPUPPCState 
> *env)
>  }
>  
>  /* Machine check exception */
> -if (env->pending_interrupts & PPC_INTERRUPT_MCK) {
> +if (pending_interrupts & PPC_INTERRUPT_MCK) {
>  return PPC_INTERRUPT_MCK;
>  }
>  
>  /* Hypervisor decrementer exception */
> -if (env->pending_interrupts & PPC_INTERRUPT_HDECR) {
> +if (pending_interrupts & PPC_INTERRUPT_HDECR) {
>  /* LPCR will be clear when not supported so this will work */
>  bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE);
>  if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hdice) {
> @@ -1929,7 +1932,7 @@ static int p9

Re: [PATCH v2 1/7] target/ppc: use locally stored msr and avoid indirect access

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> hreg_compute_hflags_value already stores msr locally to be used in most
> of the logic in the routine however some instances are still using
> env->msr which is unnecessary. Use locally stored value as available.

BTW hreg_store_msr uses env->msr a bunch of times. Would a local
variable improve that too?

Thanks,
Nick

>
> Signed-off-by: Harsh Prateek Bora 
> ---
>  target/ppc/helper_regs.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 25258986e3..945fa1a596 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -106,10 +106,10 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState 
> *env)
>  
>  if (ppc_flags & POWERPC_FLAG_DE) {
>  target_ulong dbcr0 = env->spr[SPR_BOOKE_DBCR0];
> -if ((dbcr0 & DBCR0_ICMP) && FIELD_EX64(env->msr, MSR, DE)) {
> +if ((dbcr0 & DBCR0_ICMP) && FIELD_EX64(msr, MSR, DE)) {
>  hflags |= 1 << HFLAGS_SE;
>  }
> -if ((dbcr0 & DBCR0_BRT) && FIELD_EX64(env->msr, MSR, DE)) {
> +if ((dbcr0 & DBCR0_BRT) && FIELD_EX64(msr, MSR, DE)) {
>  hflags |= 1 << HFLAGS_BE;
>  }
>  } else {

Re: [PATCH v2 5/7] target/ppc: optimize p9 exception handling routines for lpcr

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> Like pending_interrupts, env->spr[SPR_LPCR] is being used at multiple
> places across p9 exception handlers. Pass the value during entry and
> avoid multiple indirect accesses.

Ditto for this (does it help code, other CPU functions should
be converted similarly).

Thanks,
Nick

>
> Signed-off-by: Harsh Prateek Bora 
> ---
>  target/ppc/excp_helper.c | 33 ++---
>  1 file changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 704eddac63..d3db81e6ae 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -1843,13 +1843,14 @@ static int p8_next_unmasked_interrupt(CPUPPCState 
> *env)
>   PPC_INTERRUPT_PIT | PPC_INTERRUPT_THERM)
>  
>  static int p9_interrupt_powersave(CPUPPCState *env,
> -  uint32_t pending_interrupts)
> +  uint32_t pending_interrupts,
> +  target_ulong lpcr)
>  {
>  
>  /* External Exception */
>  if ((pending_interrupts & PPC_INTERRUPT_EXT) &&
> -(env->spr[SPR_LPCR] & LPCR_EEE)) {
> -bool heic = !!(env->spr[SPR_LPCR] & LPCR_HEIC);
> +(lpcr & LPCR_EEE)) {
> +bool heic = !!(lpcr & LPCR_HEIC);
>  if (!heic || !FIELD_EX64_HV(env->msr) ||
>  FIELD_EX64(env->msr, MSR, PR)) {
>  return PPC_INTERRUPT_EXT;
> @@ -1857,11 +1858,11 @@ static int p9_interrupt_powersave(CPUPPCState *env,
>  }
>  /* Decrementer Exception */
>  if ((pending_interrupts & PPC_INTERRUPT_DECR) &&
> -(env->spr[SPR_LPCR] & LPCR_DEE)) {
> +(lpcr & LPCR_DEE)) {
>  return PPC_INTERRUPT_DECR;
>  }
>  /* Machine Check or Hypervisor Maintenance Exception */
> -if (env->spr[SPR_LPCR] & LPCR_OEE) {
> +if (lpcr & LPCR_OEE) {
>  if (pending_interrupts & PPC_INTERRUPT_MCK) {
>  return PPC_INTERRUPT_MCK;
>  }
> @@ -1871,17 +1872,17 @@ static int p9_interrupt_powersave(CPUPPCState *env,
>  }
>  /* Privileged Doorbell Exception */
>  if ((pending_interrupts & PPC_INTERRUPT_DOORBELL) &&
> -(env->spr[SPR_LPCR] & LPCR_PDEE)) {
> +(lpcr & LPCR_PDEE)) {
>  return PPC_INTERRUPT_DOORBELL;
>  }
>  /* Hypervisor Doorbell Exception */
>  if ((pending_interrupts & PPC_INTERRUPT_HDOORBELL) &&
> -(env->spr[SPR_LPCR] & LPCR_HDEE)) {
> +(lpcr & LPCR_HDEE)) {
>  return PPC_INTERRUPT_HDOORBELL;
>  }
>  /* Hypervisor virtualization exception */
>  if ((pending_interrupts & PPC_INTERRUPT_HVIRT) &&
> -(env->spr[SPR_LPCR] & LPCR_HVEE)) {
> +(lpcr & LPCR_HVEE)) {
>  return PPC_INTERRUPT_HVIRT;
>  }
>  if (pending_interrupts & PPC_INTERRUPT_RESET) {
> @@ -1891,7 +1892,8 @@ static int p9_interrupt_powersave(CPUPPCState *env,
>  }
>  
>  static int p9_next_unmasked_interrupt(CPUPPCState *env,
> -  uint32_t pending_interrupts)
> +  uint32_t pending_interrupts,
> +  target_ulong lpcr)
>  {
>  CPUState *cs = env_cpu(env);
>  
> @@ -1906,7 +1908,7 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env,
>   * When PSSCR[EC] is set, LPCR[PECE] controls which interrupts 
> can
>   * wakeup the processor
>   */
> -return p9_interrupt_powersave(env, pending_interrupts);
> +return p9_interrupt_powersave(env, pending_interrupts, lpcr);
>  } else {
>  /*
>   * When it's clear, any system-caused exception exits 
> power-saving
> @@ -1924,7 +1926,7 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env,
>  /* Hypervisor decrementer exception */
>  if (pending_interrupts & PPC_INTERRUPT_HDECR) {
>  /* LPCR will be clear when not supported so this will work */
> -bool hdice = !!(env->spr[SPR_LPCR] & LPCR_HDICE);
> +bool hdice = !!(lpcr & LPCR_HDICE);
>  if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hdice) {
>  /* HDEC clears on delivery */
>  return PPC_INTERRUPT_HDECR;
> @@ -1934,7 +1936,7 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env,
>  /* Hypervisor virtualization interrupt */
>  if (pending_interrupts & PPC_INTERRUPT_HVIRT) {
>  /* LPCR will be clear when not supported so this will work */
> -bool hvice = !!(env->spr[SPR_LPCR] & LPCR_HVICE);
> +bool hvice = !!(lpcr & LPCR_HVICE);
>  if ((msr_ee || !FIELD_EX64_HV(env->msr)) && hvice) {
>  return PPC_INTERRUPT_HVIRT;
>  }
> @@ -1942,8 +1944,8 @@ static int p9_next_unmasked_interrupt(CPUPPCState *env,
>  
>  /* External interrupt can ignore MSR:EE under some circumstances */
>  if (pending_interrupts & PPC_INTERRUPT_EXT) {
> -bool lpes0 = !

Re: [PATCH v2 7/7] target/ppc: redue code duplication across Power9/10 init code

2024-07-04 Thread Nicholas Piggin

On Thu May 23, 2024 at 3:14 PM AEST, Harsh Prateek Bora wrote:
> Power9/10 initialization code consists of a lot of logical OR of
> various flag bits as supported by respective Power platform during its
> initialization, most of which is duplicated and only selected bits are
> added or removed as needed with each new platform support being added.
> Remove the duplicate code and share using common macros.

This an the previous patch are fiddly to verify but good cleanups I
think. Couple of small things.

>
> Signed-off-by: Harsh Prateek Bora 
> ---
>  target/ppc/cpu_init.h |  77 ++
>  target/ppc/cpu_init.c | 123 ++
>  2 files changed, 92 insertions(+), 108 deletions(-)
>  create mode 100644 target/ppc/cpu_init.h
>
> diff --git a/target/ppc/cpu_init.h b/target/ppc/cpu_init.h
> new file mode 100644
> index 00..53909987b0
> --- /dev/null
> +++ b/target/ppc/cpu_init.h
> @@ -0,0 +1,77 @@

This should have a SPDX license tag I guess.

I suppose doing a new header for it is the way to go. That cpu_init.c
file is enormous...

> +#ifndef TARGET_PPC_CPU_INIT_H
> +#define TARGET_PPC_CPU_INIT_H
> +
> +#define POWERPC_FAMILY_POWER9_INSNS_FLAGS   \
> +PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | \
> +PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |   \
> +PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | PPC_FLOAT_FRSQRTES |  \
> +PPC_FLOAT_STFIWX | PPC_FLOAT_EXT |PPC_CACHE | PPC_CACHE_ICBI |  \
> +PPC_CACHE_DCBZ | PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBIE | \
> +PPC_MEM_TLBSYNC | PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |  \
> +PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD |   \
> +PPC_CILDST
> +
> +#define POWERPC_FAMILY_POWER9_INSNS_FLAGS2_COMMON   \
> +PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX | \
> +PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 |  \
> +PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |   \
> +PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | PPC2_ISA205 |  \
> +PPC2_ISA207S | PPC2_FP_CVT_S64 | PPC2_ISA300 | PPC2_PRCNTL |\
> +PPC2_MEM_LWSYNC | PPC2_BCDA_ISA206
> +
> +#define POWERPC_FAMILY_POWER9_INSNS_FLAGS2  \
> +POWERPC_FAMILY_POWER9_INSNS_FLAGS2_COMMON | PPC2_TM
> +#define POWERPC_FAMILY_POWER10_INSNS_FLAGS2 \
> +POWERPC_FAMILY_POWER9_INSNS_FLAGS2_COMMON | PPC2_ISA310
> +
> +#define POWERPC_POWER9_COMMON_PCC_MSR_MASK \
> +(1ull << MSR_SF) | \
> +(1ull << MSR_HV) | \
> +(1ull << MSR_VR) | \
> +(1ull << MSR_VSX) |\
> +(1ull << MSR_EE) | \
> +(1ull << MSR_PR) | \
> +(1ull << MSR_FP) | \
> +(1ull << MSR_ME) | \
> +(1ull << MSR_FE0) |\
> +(1ull << MSR_SE) | \
> +(1ull << MSR_DE) | \
> +(1ull << MSR_FE1) |\
> +(1ull << MSR_IR) | \
> +(1ull << MSR_DR) | \
> +(1ull << MSR_PMM) |\
> +(1ull << MSR_RI) | \
> +(1ull << MSR_LE)
> +
> +#define POWERPC_POWER9_PCC_MSR_MASK \
> +POWERPC_POWER9_COMMON_PCC_MSR_MASK | (1ull << MSR_TM)
> +#define POWERPC_POWER10_PCC_MSR_MASK \
> +POWERPC_POWER9_COMMON_PCC_MSR_MASK
> +#define POWERPC_POWER9_PCC_PCR_MASK \
> +PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07
> +#define POWERPC_POWER10_PCC_PCR_MASK \
> +POWERPC_POWER9_PCC_PCR_MASK | PCR_COMPAT_3_00
> +#define POWERPC_POWER9_PCC_PCR_SUPPORTED \
> +PCR_COMPAT_3_00 | PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05
> +#define POWERPC_POWER10_PCC_PCR_SUPPORTED \
> +POWERPC_POWER9_PCC_PCR_SUPPORTED | PCR_COMPAT_3_10
> +#define POWERPC_POWER9_PCC_LPCR_MASK\
> +LPCR_VPM1 | LPCR_ISL | LPCR_KBV | LPCR_DPFD |   \
> +(LPCR_PECE_U_MASK & LPCR_HVEE) | LPCR_ILE | LPCR_AIL |  \
> +LPCR_UPRT | LPCR_EVIRT | LPCR_ONL | LPCR_HR | LPCR_LD | \
> +(LPCR_PECE_L_MASK & (LPCR_PDEE|LPCR_HDEE|LPCR_EEE|LPCR_DEE|LPCR_OEE)) | \
> +LPCR_MER | LPCR_GTSE | LPCR_TC | LPCR_HEIC | LPCR_LPES0 | LPCR_HVICE |  \
> +LPCR_HDICE
> +/* DD2 adds an extra HAIL bit */
> +#define POWERPC_POWER10_PCC_LPCR_MASK \
> +POWERPC_POWER9_PCC_LPCR_MASK | LPCR_HAIL
> +#define POWERPC_POWER9_PCC_FLAGS_COMMON \
> +POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE |  \
> +POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |   \
> +POWERPC_FLAG_VSX | POWERPC_FLAG_SCV
> +
> +#define POWERPC_POWER9_PCC_FLAGS  \
> +POWERPC_POWER9_PCC_FLAGS_COMMON | POWERPC_FLAG_TM
> +#

Re: u-boot-sam460ex fixes

2024-08-05 Thread Nicholas Piggin

On Sat Aug 3, 2024 at 6:40 PM AEST, Michael Tokarev wrote:
> Hi!
>
> It's been a long time since everyone's fighting with u-boot-sam460ex code 
> which is
> very bad, suffers from countless issues.
>
> For one, it does not compile for quite a long time with current compilers.
>
> For example, here are changes which I apply to this code when building things 
> on
> Debian: https://salsa.debian.org/qemu-team/qemu/-/tree/master/debian/patches/
> (see u-boot-sam460ex-* files in there).  I just created another patch,
> u-boot-sam460ex-build.patch, to address numerous new issues revealed by gcc-14
> and its new defaults in Debian.
>
> Please note that most of the last patch are actually just workarounds, not 
> real
> fixes, - real fixes needs much more than that.
>
> For example, there are a LOT of *conflicting* function declarations in .c 
> files
> where the functions are being used, instead of writing them in a common .h 
> file
> and including in both users and where it's defined.
>
> There are a lot of free conversions between pointer and integer. Some of the
> functions almost always used with a pointer but expects an integer, or vise
> versa.
>
> This code is awful.
>
> But.
>
> Can at least this minimal set of changes be comitted, to let this source to
> be compiled at least somehow?  For the benefit of everyone.
>
> The last patch (-build) also fixes a real bug:
>
>   char arr[8] = { 0 };
> - i2c_write(0x68, 0x08, 1, &arr, 8);
> + i2c_write(0x68, 0x08, 1, arr, 8);

Not sure about u-boot. Have you tried to get the patches upstreamed?

Thanks,
Nick

Re: [PATCH v3 20/24] tests/functional: Convert the ppc_hv avocado test into a standalone test

2024-08-05 Thread Nicholas Piggin

Thanks for doing this.

I have a fix for this problem, it's a bug with the test harness
code but was not merged yet. I'll re-send it.

Thanks,
Nick

On Wed Jul 31, 2024 at 3:03 AM AEST, Daniel P. Berrangé wrote:
> From: Thomas Huth 
>
> Note: The original Avocado test seems currently to be broken, it hangs
> when the guest is trying to install additional packages. So mark it as
> broken for now until it gets fixed.
>
> Signed-off-by: Thomas Huth 
> ---
>  .../test_ppc64_hv.py} | 48 ---
>  1 file changed, 20 insertions(+), 28 deletions(-)
>  rename tests/{avocado/ppc_hv_tests.py => functional/test_ppc64_hv.py} (88%)
>  mode change 100644 => 100755
>
> diff --git a/tests/avocado/ppc_hv_tests.py b/tests/functional/test_ppc64_hv.py
> old mode 100644
> new mode 100755
> similarity index 88%
> rename from tests/avocado/ppc_hv_tests.py
> rename to tests/functional/test_ppc64_hv.py
> index bf8822bb97..a45657c87e
> --- a/tests/avocado/ppc_hv_tests.py
> +++ b/tests/functional/test_ppc64_hv.py
> @@ -1,3 +1,5 @@
> +#!/usr/bin/env python3
> +#
>  # Tests that specifically try to exercise hypervisor features of the
>  # target machines. powernv supports the Power hypervisor ISA, and
>  # pseries supports the nested-HV hypervisor spec.
> @@ -7,10 +9,10 @@
>  # This work is licensed under the terms of the GNU GPL, version 2 or
>  # later.  See the COPYING file in the top-level directory.
>  
> -from avocado import skipIf, skipUnless
> -from avocado.utils import archive
> -from avocado_qemu import QemuSystemTest
> -from avocado_qemu import wait_for_console_pattern, exec_command
> +from unittest import skipIf, skipUnless
> +from qemu_test.utils import archive_extract
> +from qemu_test import QemuSystemTest, Asset
> +from qemu_test import wait_for_console_pattern, exec_command
>  import os
>  import time
>  import subprocess
> @@ -47,7 +49,7 @@ def missing_deps():
>  @skipIf(missing_deps(), 'dependencies (%s) not installed' % ','.join(deps))
>  @skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test sometimes gets stuck 
> due to console handling problem')
>  @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited')
> -@skipUnless(os.getenv('SPEED') == 'slow', 'runtime limited')
> +@skip('broken test - guest fails to install packages')
>  class HypervisorTest(QemuSystemTest):
>  
>  timeout = 1000
> @@ -55,6 +57,12 @@ class HypervisorTest(QemuSystemTest):
>  panic_message = 'Kernel panic - not syncing'
>  good_message = 'VFS: Cannot open root device'
>  
> +# Alpine use sha256 so I recalculated this myself
> +ASSET_ISO = Asset(
> +('https://dl-cdn.alpinelinux.org/alpine/v3.18/'
> + 'releases/ppc64le/alpine-standard-3.18.4-ppc64le.iso'),
> +'c26b8d3e17c2f3f0fed02b4b1296589c2390e6d5548610099af75300edd7b3ff')
> +
>  def extract_from_iso(self, iso, path):
>  """
>  Extracts a file from an iso file into the test workdir
> @@ -84,14 +92,7 @@ def extract_from_iso(self, iso, path):
>  def setUp(self):
>  super().setUp()
>  
> -iso_url = 
> ('https://dl-cdn.alpinelinux.org/alpine/v3.18/releases/ppc64le/alpine-standard-3.18.4-ppc64le.iso')
> -
> -# Alpine use sha256 so I recalculated this myself
> -iso_sha256 = 
> 'c26b8d3e17c2f3f0fed02b4b1296589c2390e6d5548610099af75300edd7b3ff'
> -iso_path = self.fetch_asset(iso_url, asset_hash=iso_sha256,
> -algorithm = "sha256")
> -
> -self.iso_path = iso_path
> +self.iso_path = self.ASSET_ISO.fetch()
>  self.vmlinuz = self.extract_from_iso(iso_path, '/boot/vmlinuz-lts')
>  self.initramfs = self.extract_from_iso(iso_path, 
> '/boot/initramfs-lts')
>  
> @@ -159,12 +160,8 @@ def do_test_kvm(self, hpt=False):
>  wait_for_console_pattern(self, 'alpine:~#')
>  
>  def test_hv_pseries(self):
> -"""
> -:avocado: tags=arch:ppc64
> -:avocado: tags=machine:pseries
> -:avocado: tags=accel:tcg
> -"""
>  self.require_accelerator("tcg")
> +self.set_machine('pseries')
>  self.vm.add_args("-accel", "tcg,thread=multi")
>  self.vm.add_args('-device', 'nvme,serial=1234,drive=drive0')
>  self.vm.add_args("-machine", "x-vof=on,cap-nested-hv=on")
> @@ -174,12 +171,8 @@ def test_hv_pseries(self):
>  self.do_stop_alpine()
>  
>  def test_hv_pseries_kvm(self):
> -"""
> -:avocado: tags=arch:ppc64
> -:avocado: tags=machine:pseries
> -:avocado: tags=accel:kvm
> -"""
>  self.require_accelerator("kvm")
> +self.set_machine('pseries')
>  self.vm.add_args("-accel", "kvm")
>  self.vm.add_args('-device', 'nvme,serial=1234,drive=drive0')
>  self.vm.add_args("-machine", 
> "x-vof=on,cap-nested-hv=on,cap-ccf-assist=off")
> @@ -189,12 +182,8 @@ def test_hv_pseries_kvm(self):
>  self.do_stop_alpine()
>  
>  def test_hv

[PATCH 1/2] tests/avocado: exec_command should not consume console output

2024-08-05 Thread Nicholas Piggin

_console_interaction reads data from the console even when there is only
an input string to send, and no output data to wait on. This can cause
lines to be missed by wait_for_console_pattern calls that follows an
exec_command. Fix this by not reading the console if there is no pattern
to wait for.

This solves occasional hangs in ppc_hv_tests.py, usually when run on KVM
hosts that are fast enough to output important lines quickly enough to be
consumed by exec_command, so they get missed by subsequent wait for
pattern calls.

Signed-off-by: Nicholas Piggin 
---
 tests/avocado/avocado_qemu/__init__.py | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/tests/avocado/avocado_qemu/__init__.py 
b/tests/avocado/avocado_qemu/__init__.py
index a3da2a96bb..ef935614cf 100644
--- a/tests/avocado/avocado_qemu/__init__.py
+++ b/tests/avocado/avocado_qemu/__init__.py
@@ -135,6 +135,13 @@ def _console_interaction(test, success_message, 
failure_message,
 vm.console_socket.sendall(send_string.encode())
 if not keep_sending:
 send_string = None # send only once
+
+# Only consume console output if waiting for something
+if success_message is None and failure_message is None:
+if send_string is None:
+break
+continue
+
 try:
 msg = console.readline().decode().strip()
 except UnicodeDecodeError:
-- 
2.45.2

[PATCH 0/2] tests/avocado: Fix exec_command and enable ppc_hv_tests.py

2024-08-05 Thread Nicholas Piggin

This fixes an issue with exec_command eating console output and causing
the ppc_hv_tests.py to fail. A few other tests also use exec_command but
I didn't see any that subsequently checked console output.

Thanks,
Nick

Nicholas Piggin (2):
  tests/avocado: exec_command should not consume console output
  tests/avocado: Mark ppc_hv_tests.py as non-flaky after fixed console
interaction

 tests/avocado/avocado_qemu/__init__.py | 7 +++
 tests/avocado/ppc_hv_tests.py  | 1 -
 2 files changed, 7 insertions(+), 1 deletion(-)

-- 
2.45.2

[PATCH 2/2] tests/avocado: Mark ppc_hv_tests.py as non-flaky after fixed console interaction

2024-08-05 Thread Nicholas Piggin

Now that exec_command doesn't incorrectly consume console output,
and guest time is set correctly, ppc_hv_tests.py is working more
reliably. Try marking it non-flaky.

Signed-off-by: Nicholas Piggin 
---
 tests/avocado/ppc_hv_tests.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/avocado/ppc_hv_tests.py b/tests/avocado/ppc_hv_tests.py
index bf8822bb97..0e83bbac71 100644
--- a/tests/avocado/ppc_hv_tests.py
+++ b/tests/avocado/ppc_hv_tests.py
@@ -45,7 +45,6 @@ def missing_deps():
 # QEMU already installed and use that.
 # XXX: The order of these tests seems to matter, see git blame.
 @skipIf(missing_deps(), 'dependencies (%s) not installed' % ','.join(deps))
-@skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test sometimes gets stuck due 
to console handling problem')
 @skipUnless(os.getenv('AVOCADO_ALLOW_LARGE_STORAGE'), 'storage limited')
 @skipUnless(os.getenv('SPEED') == 'slow', 'runtime limited')
 class HypervisorTest(QemuSystemTest):
-- 
2.45.2

[PATCH 3/7] target/ppc: Fix mtDPDES targeting SMT siblings

2024-08-06 Thread Nicholas Piggin

A typo in the loop over SMT threads to set irq level for doorbells
when storing to DPDES meant everything was aimed at the CPU executing
the instruction.

Signed-off-by: Nicholas Piggin 
---
 target/ppc/misc_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c
index 1b83971375..f0ca80153b 100644
--- a/target/ppc/misc_helper.c
+++ b/target/ppc/misc_helper.c
@@ -288,7 +288,7 @@ void helper_store_dpdes(CPUPPCState *env, target_ulong val)
 PowerPCCPU *ccpu = POWERPC_CPU(ccs);
 uint32_t thread_id = ppc_cpu_tir(ccpu);
 
-ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & (0x1 << thread_id));
+ppc_set_irq(ccpu, PPC_INTERRUPT_DOORBELL, val & (0x1 << thread_id));
 }
 bql_unlock();
 }
-- 
2.45.2

[PATCH 2/7] ppc/pnv: Fix LPC POWER8 register sanity check

2024-08-06 Thread Nicholas Piggin

POWER8 does not have the ISA IRQ -> SERIRQ routing system of later
CPUs, instead all ISA IRQs are sent to the CPU via a single PSI
interrupt. There is a sanity check in the POWER8 case to ensure the
routing bits have not been set, because that would indicate a
programming error.

Those bits were incorrectly specified because of ppc bit numbering
fun. Coverity detected this as an always-zero expression.

Reported-by: Cédric Le Goater 
Resolves: Coverity CID 1558829 (partially)
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/pnv_lpc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index 80b79dfbbc..8c203d2059 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -427,8 +427,8 @@ static void pnv_lpc_eval_serirq_routes(PnvLpcController 
*lpc)
 int irq;
 
 if (!lpc->psi_has_serirq) {
-if ((lpc->opb_irq_route0 & PPC_BITMASK(8, 13)) ||
-(lpc->opb_irq_route1 & PPC_BITMASK(4, 31))) {
+if ((lpc->opb_irq_route0 & PPC_BITMASK32(8, 13)) ||
+(lpc->opb_irq_route1 & PPC_BITMASK32(4, 31))) {
 qemu_log_mask(LOG_GUEST_ERROR,
 "OPB: setting serirq routing on POWER8 system, ignoring.\n");
 }
-- 
2.45.2

[PATCH 1/7] ppc/pnv: Fix LPC serirq routing calculation

2024-08-06 Thread Nicholas Piggin

The serirq routing table is split over two registers, the calculation
for the high irqs in the second register did not subtract the irq
offset. This was spotted by Coverity as a shift-by-negative. Fix this
and change the open-coded shifting and masking to use extract32()
function so it's less error-prone.

This went unnoticed because irqs >= 14 are not used in a standard
QEMU/OPAL boot, changing the first QEMU serial-isa irq to 14 to test
does demonstrate serial irqs aren't received, and that this change
fixes that.

Reported-by: Cédric Le Goater 
Resolves: Coverity CID 1558829 (partially)
Signed-off-by: Nicholas Piggin 
---
 target/ppc/cpu.h |  1 +
 hw/ppc/pnv_lpc.c | 10 --
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 321ed2da75..bd32a1a5f8 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -40,6 +40,7 @@
 
 #define PPC_BIT_NR(bit) (63 - (bit))
 #define PPC_BIT(bit)(0x8000ULL >> (bit))
+#define PPC_BIT32_NR(bit)   (31 - (bit))
 #define PPC_BIT32(bit)  (0x8000 >> (bit))
 #define PPC_BIT8(bit)   (0x80 >> (bit))
 #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index f8aad955b5..80b79dfbbc 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -435,13 +435,19 @@ static void pnv_lpc_eval_serirq_routes(PnvLpcController 
*lpc)
 return;
 }
 
+/*
+ * Each of the ISA irqs is routed to one of the 4 SERIRQ irqs with 2
+ * bits, split across 2 OPB registers.
+ */
 for (irq = 0; irq <= 13; irq++) {
-int serirq = (lpc->opb_irq_route1 >> (31 - 5 - (irq * 2))) & 0x3;
+int serirq = extract32(lpc->opb_irq_route1,
+PPC_BIT32_NR(5 + irq * 2), 2);
 lpc->irq_to_serirq_route[irq] = serirq;
 }
 
 for (irq = 14; irq < ISA_NUM_IRQS; irq++) {
-int serirq = (lpc->opb_irq_route0 >> (31 - 9 - (irq * 2))) & 0x3;
+int serirq = extract32(lpc->opb_irq_route0,
+   PPC_BIT32_NR(9 + (irq - 14) * 2), 2);
 lpc->irq_to_serirq_route[irq] = serirq;
 }
 }
-- 
2.45.2

[PATCH 0/7] various ppc fixes

2024-08-06 Thread Nicholas Piggin

This fixes LPC serirq Coverity issues introduced in the merge
window that Cedric reported. Also fixes for an assorted bunch of 
emulation issues recently turned up when running PowerVM firmware
on the model.

Thanks,
Nick

Nicholas Piggin (7):
  ppc/pnv: Fix LPC serirq routing calculation
  ppc/pnv: Fix LPC POWER8 register sanity check
  target/ppc: Fix mtDPDES targeting SMT siblings
  target/ppc: PMIs are level triggered
  target/ppc: Fix doorbell delivery to threads in powersave
  target/ppc: Fix HFSCR facility checks
  target/ppc: Fix VRMA to not check virtual page class key protection

 target/ppc/cpu.h |  5 +++--
 hw/ppc/pnv_lpc.c | 14 ++
 target/ppc/excp_helper.c | 21 +
 target/ppc/misc_helper.c |  2 +-
 target/ppc/mmu-hash64.c  |  9 -
 5 files changed, 35 insertions(+), 16 deletions(-)

-- 
2.45.2

[PATCH 5/7] target/ppc: Fix doorbell delivery to threads in powersave

2024-08-06 Thread Nicholas Piggin

Doorbell exceptions are not not cleared when they cause a wake from
powersave state, only when they take the corresponding interrupt.
The sreset-on-wake logic must avoid clearing the interrupt in this
case.

Signed-off-by: Nicholas Piggin 
---
 target/ppc/excp_helper.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 701abe1b6d..b619a6adde 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2237,7 +2237,9 @@ static void p8_deliver_interrupt(CPUPPCState *env, int 
interrupt)
 powerpc_excp(cpu, POWERPC_EXCP_DECR);
 break;
 case PPC_INTERRUPT_DOORBELL:
-env->pending_interrupts &= ~PPC_INTERRUPT_DOORBELL;
+if (!env->resume_as_sreset) {
+env->pending_interrupts &= ~PPC_INTERRUPT_DOORBELL;
+}
 if (is_book3s_arch2x(env)) {
 powerpc_excp(cpu, POWERPC_EXCP_SDOOR);
 } else {
@@ -2245,7 +2247,9 @@ static void p8_deliver_interrupt(CPUPPCState *env, int 
interrupt)
 }
 break;
 case PPC_INTERRUPT_HDOORBELL:
-env->pending_interrupts &= ~PPC_INTERRUPT_HDOORBELL;
+if (!env->resume_as_sreset) {
+env->pending_interrupts &= ~PPC_INTERRUPT_HDOORBELL;
+}
 powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV);
 break;
 case PPC_INTERRUPT_PERFM:
@@ -2301,6 +2305,7 @@ static void p9_deliver_interrupt(CPUPPCState *env, int 
interrupt)
 
 case PPC_INTERRUPT_HDECR: /* Hypervisor decrementer exception */
 /* HDEC clears on delivery */
+/* XXX: should not see an HDEC if resume_as_sreset. assert? */
 env->pending_interrupts &= ~PPC_INTERRUPT_HDECR;
 powerpc_excp(cpu, POWERPC_EXCP_HDECR);
 break;
@@ -2320,11 +2325,15 @@ static void p9_deliver_interrupt(CPUPPCState *env, int 
interrupt)
 powerpc_excp(cpu, POWERPC_EXCP_DECR);
 break;
 case PPC_INTERRUPT_DOORBELL:
-env->pending_interrupts &= ~PPC_INTERRUPT_DOORBELL;
+if (!env->resume_as_sreset) {
+env->pending_interrupts &= ~PPC_INTERRUPT_DOORBELL;
+}
 powerpc_excp(cpu, POWERPC_EXCP_SDOOR);
 break;
 case PPC_INTERRUPT_HDOORBELL:
-env->pending_interrupts &= ~PPC_INTERRUPT_HDOORBELL;
+if (!env->resume_as_sreset) {
+env->pending_interrupts &= ~PPC_INTERRUPT_HDOORBELL;
+}
 powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV);
 break;
 case PPC_INTERRUPT_PERFM:
-- 
2.45.2

[PATCH 7/7] target/ppc: Fix VRMA to not check virtual page class key protection

2024-08-06 Thread Nicholas Piggin

Hash virtual real mode addressing is defined by the architecture
to not perform virtual page class key protection checks.

Signed-off-by: Nicholas Piggin 
---
 target/ppc/mmu-hash64.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c
index 5e1983e334..c8c2f8910a 100644
--- a/target/ppc/mmu-hash64.c
+++ b/target/ppc/mmu-hash64.c
@@ -993,6 +993,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 int exec_prot, pp_prot, amr_prot, prot;
 int need_prot;
 hwaddr raddr;
+bool vrma = false;
 
 /*
  * Note on LPCR usage: 970 uses HID4, but our special variant of
@@ -1022,6 +1023,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 }
 } else if (ppc_hash64_use_vrma(env)) {
 /* Emulated VRMA mode */
+vrma = true;
 slb = &vrma_slbe;
 if (build_vrma_slbe(cpu, slb) != 0) {
 /* Invalid VRMA setup, machine check */
@@ -1136,7 +1138,12 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, 
MMUAccessType access_type,
 
 exec_prot = ppc_hash64_pte_noexec_guard(cpu, pte);
 pp_prot = ppc_hash64_pte_prot(mmu_idx, slb, pte);
-amr_prot = ppc_hash64_amr_prot(cpu, pte);
+if (vrma) {
+/* VRMA does not check keys */
+amr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+} else {
+amr_prot = ppc_hash64_amr_prot(cpu, pte);
+}
 prot = exec_prot & pp_prot & amr_prot;
 
 need_prot = check_prot_access_type(PAGE_RWX, access_type);
-- 
2.45.2

[PATCH 6/7] target/ppc: Fix HFSCR facility checks

2024-08-06 Thread Nicholas Piggin

The HFSCR defines were being encoded as bit masks, but the users
expect (and analogous FSCR defines are) bit numbers.

Signed-off-by: Nicholas Piggin 
---
 target/ppc/cpu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index bd32a1a5f8..f7a2da2bbe 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -635,8 +635,8 @@ FIELD(MSR, LE, MSR_LE, 1)
 #define PSSCR_EC  PPC_BIT(43) /* Exit Criterion */
 
 /* HFSCR bits */
-#define HFSCR_MSGP PPC_BIT(53) /* Privileged Message Send Facilities */
-#define HFSCR_BHRB PPC_BIT(59) /* BHRB Instructions */
+#define HFSCR_MSGP PPC_BIT_NR(53) /* Privileged Message Send Facilities */
+#define HFSCR_BHRB PPC_BIT_NR(59) /* BHRB Instructions */
 #define HFSCR_IC_MSGP  0xA
 
 #define DBCR0_ICMP (1 << 27)
-- 
2.45.2

[PATCH 4/7] target/ppc: PMIs are level triggered

2024-08-06 Thread Nicholas Piggin

In Book-S / Power processors, the performance monitor interrupts are
driven by the MMCR0[PMAO] bit, which is level triggered and not cleared
by the interrupt.

Others may have different performance monitor architecture, but none of
those are implemented by QEMU.

Signed-off-by: Nicholas Piggin 
---
 target/ppc/excp_helper.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index f33fc36db2..701abe1b6d 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2187,7 +2187,6 @@ static void p7_deliver_interrupt(CPUPPCState *env, int 
interrupt)
 powerpc_excp(cpu, POWERPC_EXCP_DECR);
 break;
 case PPC_INTERRUPT_PERFM:
-env->pending_interrupts &= ~PPC_INTERRUPT_PERFM;
 powerpc_excp(cpu, POWERPC_EXCP_PERFM);
 break;
 case 0:
@@ -2250,7 +2249,6 @@ static void p8_deliver_interrupt(CPUPPCState *env, int 
interrupt)
 powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV);
 break;
 case PPC_INTERRUPT_PERFM:
-env->pending_interrupts &= ~PPC_INTERRUPT_PERFM;
 powerpc_excp(cpu, POWERPC_EXCP_PERFM);
 break;
 case PPC_INTERRUPT_EBB: /* EBB exception */
@@ -2330,7 +2328,6 @@ static void p9_deliver_interrupt(CPUPPCState *env, int 
interrupt)
 powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV);
 break;
 case PPC_INTERRUPT_PERFM:
-env->pending_interrupts &= ~PPC_INTERRUPT_PERFM;
 powerpc_excp(cpu, POWERPC_EXCP_PERFM);
 break;
 case PPC_INTERRUPT_EBB: /* EBB exception */
@@ -2444,7 +2441,6 @@ static void ppc_deliver_interrupt(CPUPPCState *env, int 
interrupt)
 powerpc_excp(cpu, POWERPC_EXCP_SDOOR_HV);
 break;
 case PPC_INTERRUPT_PERFM:
-env->pending_interrupts &= ~PPC_INTERRUPT_PERFM;
 powerpc_excp(cpu, POWERPC_EXCP_PERFM);
 break;
 case PPC_INTERRUPT_THERM:  /* Thermal interrupt */
-- 
2.45.2

[PATCH] ppc/pnv: ADU fix possible buffer overrun with invalid size

2024-08-06 Thread Nicholas Piggin

The ADU LPC transfer-size field is 7 bits, but the supported sizes for
LPC access via ADU appear to be 1, 2, 4, 8. The data buffer could
overrun if firmware set an invalid size field, so add checks to reject
them with a message.

Reported-by: Cédric Le Goater 
Resolves: Coverity CID 1558830
Fixes: 24bd283bccb33 ("ppc/pnv: Implement ADU access to LPC space")
Signed-off-by: Nicholas Piggin 
---
 hw/ppc/pnv_adu.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/hw/ppc/pnv_adu.c b/hw/ppc/pnv_adu.c
index 81b7d6e526..f636dedf79 100644
--- a/hw/ppc/pnv_adu.c
+++ b/hw/ppc/pnv_adu.c
@@ -116,6 +116,12 @@ static void pnv_adu_xscom_write(void *opaque, hwaddr addr, 
uint64_t val,
 uint32_t lpc_size = lpc_cmd_size(adu);
 uint64_t data = 0;
 
+if (!is_power_of_2(lpc_size) || lpc_size > sizeof(data)) {
+qemu_log_mask(LOG_GUEST_ERROR, "ADU: Unsupported LPC access "
+   "size:%" PRId32 "\n", lpc_size);
+break;
+}
+
 pnv_lpc_opb_read(adu->lpc, lpc_addr, (void *)&data, lpc_size);
 
 /*
@@ -135,6 +141,12 @@ static void pnv_adu_xscom_write(void *opaque, hwaddr addr, 
uint64_t val,
 uint32_t lpc_size = lpc_cmd_size(adu);
 uint64_t data;
 
+if (!is_power_of_2(lpc_size) || lpc_size > sizeof(data)) {
+qemu_log_mask(LOG_GUEST_ERROR, "ADU: Unsupported LPC access "
+   "size:%" PRId32 "\n", lpc_size);
+break;
+}
+
 data = cpu_to_be64(val) >> ((lpc_addr & 7) * 8); /* See above */
 pnv_lpc_opb_write(adu->lpc, lpc_addr, (void *)&data, lpc_size);
 }
-- 
2.45.2

Re: [PATCH for-9.1] tcg/ppc: Sync tcg_out_test and constraints

2024-08-08 Thread Nicholas Piggin

On Thu Aug 8, 2024 at 2:46 AM AEST, Philippe Mathieu-Daudé wrote:
> On 7/8/24 06:08, Richard Henderson wrote:
> > Ensure the code structure is the same for matching constraints
> > and emitting code, lest we allow constants that cannot be
> > trivially tested.
> > 
> > Cc: qemu-sta...@nongnu.org
> > Fixes: ad788aebbab ("tcg/ppc: Support TCG_COND_TST{EQ,NE}")
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2487
> > Signed-off-by: Richard Henderson 
> > ---
> >   tcg/ppc/tcg-target.c.inc | 21 ++---
> >   1 file changed, 10 insertions(+), 11 deletions(-)
> > 
> > diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> > index 7f3829beeb..3553a47ba9 100644
> > --- a/tcg/ppc/tcg-target.c.inc
> > +++ b/tcg/ppc/tcg-target.c.inc
> > @@ -325,9 +325,11 @@ static bool tcg_target_const_match(int64_t sval, int 
> > ct,
> >   if ((uval & ~0x) == 0 || (uval & ~0xull) == 0) {
> >   return 1;
> >   }
> > -if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32
> > -? mask_operand(uval, &mb, &me)
> > -: mask64_operand(uval << clz64(uval), &mb, &me)) {
> > +if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
> > +return 1;
> > +}
> > +if (TCG_TARGET_REG_BITS == 64 &&
> > +mask64_operand(uval << clz64(uval), &mb, &me)) {
> >   return 1;
> >   }
> >   return 0;
> > @@ -1749,8 +1751,6 @@ static void tcg_out_test(TCGContext *s, TCGReg dest, 
> > TCGReg arg1, TCGArg arg2,
> >   
> >   if (type == TCG_TYPE_I32) {
> >   arg2 = (uint32_t)arg2;
> > -} else if (arg2 == (uint32_t)arg2) {
> > -type = TCG_TYPE_I32;
> >   }
> >   
> >   if ((arg2 & ~0x) == 0) {
> > @@ -1761,12 +1761,11 @@ static void tcg_out_test(TCGContext *s, TCGReg 
> > dest, TCGReg arg1, TCGArg arg2,
> >   tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
> >   return;
> >   }
> > -if (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I32) {
> > -if (mask_operand(arg2, &mb, &me)) {
> > -tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
> > -return;
> > -}
> > -} else {
> > +if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
> > +tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
> > +return;
> > +}
> > +if (TCG_TARGET_REG_BITS == 64) {
> >   int sh = clz64(arg2);
> >   if (mask64_operand(arg2 << sh, &mb, &me)) {
> >   tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
>
> Preferably having someone from PPC also reviewing this,
>
> Reviewed-by: Philippe Mathieu-Daudé 

I don't know much of the back end, but I think I follow where the
match was letting through a constant that could not be emitted.
Thanks for the fix and review.

Bigger problem seems to be I'm not testing x86 on ppc so I'll have
to rectify that. Also IIRC we were supposed to be adding a ppc
runner to gitlab CI, I'll have to check what's happening with that,
I think it got stalled on IT security rigmarole :(

Thanks,
Nick

[RFC PATCH] accel/tcg: clear all TBs from a page when it is written to

2024-08-09 Thread Nicholas Piggin

This is not a clean patch, but does fix a problem I hit with TB
invalidation due to the target software writing to memory with TBs.

Lockup messages are triggering in Linux due to page clearing taking a
long time when a code page has been freed, because it takes a lot of
notdirty notifiers, which massively slows things down. Linux might
possibly have a bug here too because it seems to hang indefinitely in
some cases, but even if it didn't, the latency of clearing these pages
is very high.

This showed when running KVM on the emulated machine, starting and
stopping guests. That causes lots of instruction pages to be freed.
Usually if you're just running Linux, executable pages remain in
pagecache so you get fewer of these bombs in the kernel memory
allocator. But page reclaim, JITs, deleting executable files, etc.,
could trigger it too.

Invalidating all TBs from the page on any hit seems to avoid the problem
and generally speeds things up.

How important is the precise invalidation? These days I assume the
tricky kind of SMC that frequently writes code close to where it's
executing is pretty rare and might not be something we really care about
for performance. Could we remove sub-page TB invalidation entirely?

Thanks,
Nick
---
 accel/tcg/tb-maint.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index cc0f5afd47..d9a76b1665 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -1107,6 +1107,9 @@ tb_invalidate_phys_page_range__locked(struct 
page_collection *pages,
 TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
 #endif /* TARGET_HAS_PRECISE_SMC */
 
+start &= TARGET_PAGE_MASK;
+last |= ~TARGET_PAGE_MASK;
+
 /* Range may not cross a page. */
 tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
 
-- 
2.45.2

[PATCH v6 00/10] replay: fixes and new test cases

2024-08-12 Thread Nicholas Piggin

Since v5, I cut down the series significantly to just the better
reviewed parts, without adding new CI testing, since there are
still be a few hiccups. aarch64 had some hangs Alex noticed, and
x86_64 doesn't seem to be working anymore for me (with the big
replay_linux.py test). But with this series, things are much closer,
ppc64 does get through replay_linux.py (but requires some ppc
specific fixes and the new test to be added, so I leave that out
for now).

Hopefully we can get this minimal series in and in the next
release I'll try to get something stable enough for CI so it
doesn't keep breaking.

Thanks,
Nick

Nicholas Piggin (10):
  scripts/replay-dump.py: Update to current rr record format
  scripts/replay-dump.py: rejig decoders in event number order
  tests/avocado: excercise scripts/replay-dump.py in replay tests
  replay: allow runstate shutdown->running when replaying trace
  Revert "replay: stop us hanging in rr_wait_io_event"
  tests/avocado: replay_kernel.py add x86-64 q35 machine test
  chardev: set record/replay on the base device of a muxed device
  virtio-net: Use replay_schedule_bh_event for bhs that affect machine
state
  virtio-net: Use virtual time for RSC timers
  savevm: Fix load_snapshot error path crash

 include/sysemu/replay.h|   5 -
 include/sysemu/runstate.h  |   1 +
 accel/tcg/tcg-accel-ops-rr.c   |   2 +-
 chardev/char.c |  71 +-
 hw/net/virtio-net.c|  17 ++--
 migration/savevm.c |   1 +
 replay/replay.c|  23 +
 system/runstate.c  |  31 +-
 scripts/replay-dump.py | 167 ++---
 tests/avocado/replay_kernel.py |  31 +-
 tests/avocado/replay_linux.py  |  10 ++
 11 files changed, 245 insertions(+), 114 deletions(-)

-- 
2.45.2

[PATCH v6 02/10] scripts/replay-dump.py: rejig decoders in event number order

2024-08-12 Thread Nicholas Piggin

Sort decoder functions to be ascending in order of event number,
same as the decoder tables.

Reviewed-by: Alex Bennée 
Signed-off-by: Nicholas Piggin 
---
 scripts/replay-dump.py | 56 +-
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/scripts/replay-dump.py b/scripts/replay-dump.py
index 419ee3257b..b82659cfb6 100755
--- a/scripts/replay-dump.py
+++ b/scripts/replay-dump.py
@@ -139,6 +139,19 @@ def swallow_bytes(eid, name, dumpfile, nr):
 """Swallow nr bytes of data without looking at it"""
 dumpfile.seek(nr, os.SEEK_CUR)
 
+total_insns = 0
+
+def decode_instruction(eid, name, dumpfile):
+global total_insns
+ins_diff = read_dword(dumpfile)
+total_insns += ins_diff
+print_event(eid, name, "+ %d -> %d" % (ins_diff, total_insns))
+return True
+
+def decode_interrupt(eid, name, dumpfile):
+print_event(eid, name)
+return True
+
 def decode_exception(eid, name, dumpfile):
 print_event(eid, name)
 return True
@@ -198,15 +211,6 @@ def decode_async_net(eid, name, dumpfile):
 print_event(eid, name, "net:%x flags:%x bytes:%d" % (net_id, flags, size))
 return True
 
-total_insns = 0
-
-def decode_instruction(eid, name, dumpfile):
-global total_insns
-ins_diff = read_dword(dumpfile)
-total_insns += ins_diff
-print_event(eid, name, "+ %d -> %d" % (ins_diff, total_insns))
-return True
-
 def decode_shutdown(eid, name, dumpfile):
 print_event(eid, name)
 return True
@@ -222,6 +226,21 @@ def decode_audio_out(eid, name, dumpfile):
 print_event(eid, name, "%d" % (audio_data))
 return True
 
+def decode_random(eid, name, dumpfile):
+ret = read_dword(dumpfile)
+size = read_dword(dumpfile)
+swallow_bytes(eid, name, dumpfile, size)
+if (ret):
+print_event(eid, name, "%d bytes (getrandom failed)" % (size))
+else:
+print_event(eid, name, "%d bytes" % (size))
+return True
+
+def decode_clock(eid, name, dumpfile):
+clock_data = read_qword(dumpfile)
+print_event(eid, name, "0x%x" % (clock_data))
+return True
+
 def __decode_checkpoint(eid, name, dumpfile, old):
 """Decode a checkpoint.
 
@@ -252,25 +271,6 @@ def decode_checkpoint_init(eid, name, dumpfile):
 print_event(eid, name)
 return True
 
-def decode_interrupt(eid, name, dumpfile):
-print_event(eid, name)
-return True
-
-def decode_clock(eid, name, dumpfile):
-clock_data = read_qword(dumpfile)
-print_event(eid, name, "0x%x" % (clock_data))
-return True
-
-def decode_random(eid, name, dumpfile):
-ret = read_dword(dumpfile)
-size = read_dword(dumpfile)
-swallow_bytes(eid, name, dumpfile, size)
-if (ret):
-print_event(eid, name, "%d bytes (getrandom failed)" % (size))
-else:
-print_event(eid, name, "%d bytes" % (size))
-return True
-
 def decode_end(eid, name, dumpfile):
 print_event(eid, name)
 return False
-- 
2.45.2

[PATCH v6 07/10] chardev: set record/replay on the base device of a muxed device

2024-08-12 Thread Nicholas Piggin

chardev events to a muxed device don't get recorded because e.g.,
qemu_chr_be_write() checks whether the base device has the record flag
set.

This can be seen when replaying a trace that has characters typed into
the console, an examination of the log shows they are not recorded.

Setting QEMU_CHAR_FEATURE_REPLAY on the base chardev fixes the problem.

Reviewed-by: Alex Bennée 
Signed-off-by: Nicholas Piggin 
---
 chardev/char.c | 71 +++---
 1 file changed, 50 insertions(+), 21 deletions(-)

diff --git a/chardev/char.c b/chardev/char.c
index 3c43fb1278..ba847b6e9e 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -615,11 +615,24 @@ ChardevBackend *qemu_chr_parse_opts(QemuOpts *opts, Error 
**errp)
 return backend;
 }
 
-Chardev *qemu_chr_new_from_opts(QemuOpts *opts, GMainContext *context,
-Error **errp)
+static void qemu_chardev_set_replay(Chardev *chr, Error **errp)
+{
+if (replay_mode != REPLAY_MODE_NONE) {
+if (CHARDEV_GET_CLASS(chr)->chr_ioctl) {
+error_setg(errp, "Replay: ioctl is not supported "
+ "for serial devices yet");
+return;
+}
+qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_REPLAY);
+replay_register_char_driver(chr);
+}
+}
+
+static Chardev *__qemu_chr_new_from_opts(QemuOpts *opts, GMainContext *context,
+ bool replay, Error **errp)
 {
 const ChardevClass *cc;
-Chardev *chr = NULL;
+Chardev *base = NULL, *chr = NULL;
 ChardevBackend *backend = NULL;
 const char *name = qemu_opt_get(opts, "backend");
 const char *id = qemu_opts_id(opts);
@@ -657,11 +670,11 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts, 
GMainContext *context,
 chr = qemu_chardev_new(bid ? bid : id,
object_class_get_name(OBJECT_CLASS(cc)),
backend, context, errp);
-
 if (chr == NULL) {
 goto out;
 }
 
+base = chr;
 if (bid) {
 Chardev *mux;
 qapi_free_ChardevBackend(backend);
@@ -681,11 +694,25 @@ Chardev *qemu_chr_new_from_opts(QemuOpts *opts, 
GMainContext *context,
 out:
 qapi_free_ChardevBackend(backend);
 g_free(bid);
+
+if (replay && base) {
+/* RR should be set on the base device, not the mux */
+qemu_chardev_set_replay(base, errp);
+}
+
 return chr;
 }
 
-Chardev *qemu_chr_new_noreplay(const char *label, const char *filename,
-   bool permit_mux_mon, GMainContext *context)
+Chardev *qemu_chr_new_from_opts(QemuOpts *opts, GMainContext *context,
+Error **errp)
+{
+/* XXX: should this really not record/replay? */
+return __qemu_chr_new_from_opts(opts, context, false, errp);
+}
+
+static Chardev *__qemu_chr_new(const char *label, const char *filename,
+   bool permit_mux_mon, GMainContext *context,
+   bool replay)
 {
 const char *p;
 Chardev *chr;
@@ -693,14 +720,22 @@ Chardev *qemu_chr_new_noreplay(const char *label, const 
char *filename,
 Error *err = NULL;
 
 if (strstart(filename, "chardev:", &p)) {
-return qemu_chr_find(p);
+chr = qemu_chr_find(p);
+if (replay) {
+qemu_chardev_set_replay(chr, &err);
+if (err) {
+error_report_err(err);
+return NULL;
+}
+}
+return chr;
 }
 
 opts = qemu_chr_parse_compat(label, filename, permit_mux_mon);
 if (!opts)
 return NULL;
 
-chr = qemu_chr_new_from_opts(opts, context, &err);
+chr = __qemu_chr_new_from_opts(opts, context, replay, &err);
 if (!chr) {
 error_report_err(err);
 goto out;
@@ -722,24 +757,18 @@ out:
 return chr;
 }
 
+Chardev *qemu_chr_new_noreplay(const char *label, const char *filename,
+   bool permit_mux_mon, GMainContext *context)
+{
+return __qemu_chr_new(label, filename, permit_mux_mon, context, false);
+}
+
 static Chardev *qemu_chr_new_permit_mux_mon(const char *label,
   const char *filename,
   bool permit_mux_mon,
   GMainContext *context)
 {
-Chardev *chr;
-chr = qemu_chr_new_noreplay(label, filename, permit_mux_mon, context);
-if (chr) {
-if (replay_mode != REPLAY_MODE_NONE) {
-qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_REPLAY);
-}
-if (qemu_chr_replay(chr) && CHARDEV_GET_CLASS(chr)->chr_ioctl) {
-error_report("Replay: ioctl is not supported "
- "for serial devices yet");
-}
-replay_register_char_driver(chr);
-}
-return chr;
+r

[PATCH v6 03/10] tests/avocado: excercise scripts/replay-dump.py in replay tests

2024-08-12 Thread Nicholas Piggin

This runs replay-dump.py after recording a trace, and fails the test if
the script fails.

replay-dump.py is modified to exit with non-zero if an error is
encountered while parsing, to support this.

Reviewed-by: Alex Bennée 
Reviewed-by: Pavel Dovgalyuk 
Signed-off-by: Nicholas Piggin 

v5: Update timeout to 180s because x86 was just exceeding 120s in
gitlab with this change
---
 scripts/replay-dump.py |  6 --
 tests/avocado/replay_kernel.py | 13 -
 tests/avocado/replay_linux.py  | 10 ++
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/scripts/replay-dump.py b/scripts/replay-dump.py
index b82659cfb6..4ce7ff51cc 100755
--- a/scripts/replay-dump.py
+++ b/scripts/replay-dump.py
@@ -21,6 +21,7 @@
 import argparse
 import struct
 import os
+import sys
 from collections import namedtuple
 from os import path
 
@@ -100,7 +101,7 @@ def call_decode(table, index, dumpfile):
 print("Could not decode index: %d" % (index))
 print("Entry is: %s" % (decoder))
 print("Decode Table is:\n%s" % (table))
-return False
+raise(Exception("unknown event"))
 else:
 return decoder.fn(decoder.eid, decoder.name, dumpfile)
 
@@ -121,7 +122,7 @@ def print_event(eid, name, string=None, event_count=None):
 def decode_unimp(eid, name, _unused_dumpfile):
 "Unimplemented decoder, will trigger exit"
 print("%s not handled - will now stop" % (name))
-return False
+raise(Exception("unhandled event"))
 
 def decode_plain(eid, name, _unused_dumpfile):
 "Plain events without additional data"
@@ -434,6 +435,7 @@ def decode_file(filename):
 dumpfile)
 except Exception as inst:
 print(f"error {inst}")
+sys.exit(1)
 
 finally:
 print(f"Reached {dumpfile.tell()} of {dumpsize} bytes")
diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index 232d287c27..a668af9d36 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -13,6 +13,7 @@
 import shutil
 import logging
 import time
+import subprocess
 
 from avocado import skip
 from avocado import skipUnless
@@ -31,7 +32,7 @@ class ReplayKernelBase(LinuxKernelTest):
 terminates.
 """
 
-timeout = 120
+timeout = 180
 KERNEL_COMMON_COMMAND_LINE = 'printk.time=1 panic=-1 '
 
 def run_vm(self, kernel_path, kernel_command_line, console_pattern,
@@ -63,6 +64,8 @@ def run_vm(self, kernel_path, kernel_command_line, 
console_pattern,
 vm.shutdown()
 logger.info('finished the recording with log size %s bytes'
 % os.path.getsize(replay_path))
+self.run_replay_dump(replay_path)
+logger.info('successfully tested replay-dump.py')
 else:
 vm.wait()
 logger.info('successfully finished the replay')
@@ -70,6 +73,14 @@ def run_vm(self, kernel_path, kernel_command_line, 
console_pattern,
 logger.info('elapsed time %.2f sec' % elapsed)
 return elapsed
 
+def run_replay_dump(self, replay_path):
+try:
+subprocess.check_call(["./scripts/replay-dump.py",
+   "-f", replay_path],
+  stdout=subprocess.DEVNULL)
+except subprocess.CalledProcessError:
+self.fail('replay-dump.py failed')
+
 def run_rr(self, kernel_path, kernel_command_line, console_pattern,
shift=7, args=None):
 replay_path = os.path.join(self.workdir, 'replay.bin')
diff --git a/tests/avocado/replay_linux.py b/tests/avocado/replay_linux.py
index b4673261ce..5916922435 100644
--- a/tests/avocado/replay_linux.py
+++ b/tests/avocado/replay_linux.py
@@ -94,6 +94,8 @@ def launch_and_wait(self, record, args, shift):
 vm.shutdown()
 logger.info('finished the recording with log size %s bytes'
 % os.path.getsize(replay_path))
+self.run_replay_dump(replay_path)
+logger.info('successfully tested replay-dump.py')
 else:
 vm.event_wait('SHUTDOWN', self.timeout)
 vm.wait()
@@ -108,6 +110,14 @@ def run_rr(self, args=None, shift=7):
 logger = logging.getLogger('replay')
 logger.info('replay overhead {:.2%}'.format(t2 / t1 - 1))
 
+def run_replay_dump(self, replay_path):
+try:
+subprocess.check_call(["./scripts/replay-dump.py",
+   "-f", replay_path],
+  stdout=subprocess.DEVNULL)
+except subprocess.CalledProcessError:
+self.fail('replay-dump.py failed')
+
 @skipUnless(os.getenv('AVOCADO_TIMEOUT_EXPECTED'), 'Test might timeout')
 class ReplayLinuxX8664(ReplayLinux):
 """
-- 
2.45.2

[PATCH v6 10/10] savevm: Fix load_snapshot error path crash

2024-08-12 Thread Nicholas Piggin

An error path missed setting *errp, which can cause a NULL deref.

Reviewed-by: Alex Bennée 
Signed-off-by: Nicholas Piggin 
---
 migration/savevm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/migration/savevm.c b/migration/savevm.c
index 85958d7b09..6bb404b9c8 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -3288,6 +3288,7 @@ bool load_snapshot(const char *name, const char *vmstate,
 /* Don't even try to load empty VM states */
 ret = bdrv_snapshot_find(bs_vm_state, &sn, name);
 if (ret < 0) {
+error_setg(errp, "Snapshot can not be found");
 return false;
 } else if (sn.vm_state_size == 0) {
 error_setg(errp, "This is a disk-only snapshot. Revert to it "
-- 
2.45.2

[PATCH v6 04/10] replay: allow runstate shutdown->running when replaying trace

2024-08-12 Thread Nicholas Piggin

When replaying a trace, it is possible to go from shutdown to running
with a reverse-debugging step. This can be useful if the problem being
debugged triggers a reset or shutdown.

This can be tested by making a recording of a machine that shuts down,
then using -action shutdown=pause when replaying it. Continuing to the
end of the trace then reverse-stepping in gdb crashes due to invalid
runstate transition.

Just permitting the transition seems to be all that's necessary for
reverse-debugging to work well in such a state.

Reviewed-by: Alex Bennée 
Reviewed-by: Pavel Dovgalyuk 
Signed-off-by: Nicholas Piggin 
---
 include/sysemu/runstate.h |  1 +
 replay/replay.c   |  2 ++
 system/runstate.c | 31 ---
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index e210a37abf..11c7ff3ffb 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -9,6 +9,7 @@ void runstate_set(RunState new_state);
 RunState runstate_get(void);
 bool runstate_is_running(void);
 bool runstate_needs_reset(void);
+void runstate_replay_enable(void);
 
 typedef void VMChangeStateHandler(void *opaque, bool running, RunState state);
 
diff --git a/replay/replay.c b/replay/replay.c
index a2c576c16e..b8564a4813 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -385,6 +385,8 @@ static void replay_enable(const char *fname, int mode)
 replay_fetch_data_kind();
 }
 
+runstate_replay_enable();
+
 replay_init_events();
 }
 
diff --git a/system/runstate.c b/system/runstate.c
index c833316f6d..a0e2a5fd22 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -181,6 +181,12 @@ static const RunStateTransition runstate_transitions_def[] 
= {
 { RUN_STATE__MAX, RUN_STATE__MAX },
 };
 
+static const RunStateTransition replay_play_runstate_transitions_def[] = {
+{ RUN_STATE_SHUTDOWN, RUN_STATE_RUNNING},
+
+{ RUN_STATE__MAX, RUN_STATE__MAX },
+};
+
 static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX];
 
 bool runstate_check(RunState state)
@@ -188,14 +194,33 @@ bool runstate_check(RunState state)
 return current_run_state == state;
 }
 
-static void runstate_init(void)
+static void transitions_set_valid(const RunStateTransition *rst)
 {
 const RunStateTransition *p;
 
-memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
-for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) {
+for (p = rst; p->from != RUN_STATE__MAX; p++) {
 runstate_valid_transitions[p->from][p->to] = true;
 }
+}
+
+void runstate_replay_enable(void)
+{
+assert(replay_mode != REPLAY_MODE_NONE);
+
+if (replay_mode == REPLAY_MODE_PLAY) {
+/*
+ * When reverse-debugging, it is possible to move state from
+ * shutdown to running.
+ */
+transitions_set_valid(&replay_play_runstate_transitions_def[0]);
+}
+}
+
+static void runstate_init(void)
+{
+memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
+
+transitions_set_valid(&runstate_transitions_def[0]);
 
 qemu_mutex_init(&vmstop_lock);
 }
-- 
2.45.2

[PATCH v6 01/10] scripts/replay-dump.py: Update to current rr record format

2024-08-12 Thread Nicholas Piggin

The v12 format support for replay-dump has a few issues still. This
fixes async decoding; adds event, shutdown, and end decoding; fixes
audio in / out events, fixes checkpoint checking of following async
events.

Reviewed-by: Alex Bennée 
Signed-off-by: Nicholas Piggin 
---
 scripts/replay-dump.py | 127 ++---
 1 file changed, 93 insertions(+), 34 deletions(-)

diff --git a/scripts/replay-dump.py b/scripts/replay-dump.py
index d668193e79..419ee3257b 100755
--- a/scripts/replay-dump.py
+++ b/scripts/replay-dump.py
@@ -20,6 +20,7 @@
 
 import argparse
 import struct
+import os
 from collections import namedtuple
 from os import path
 
@@ -134,6 +135,17 @@ def swallow_async_qword(eid, name, dumpfile):
 print("  %s(%d) @ %d" % (name, eid, step_id))
 return True
 
+def swallow_bytes(eid, name, dumpfile, nr):
+"""Swallow nr bytes of data without looking at it"""
+dumpfile.seek(nr, os.SEEK_CUR)
+
+def decode_exception(eid, name, dumpfile):
+print_event(eid, name)
+return True
+
+# v12 does away with the additional event byte and encodes it in the main type
+# Between v8 and v9, REPLAY_ASYNC_BH_ONESHOT was added, but we don't decode
+# those versions so leave it out.
 async_decode_table = [ Decoder(0, "REPLAY_ASYNC_EVENT_BH", 
swallow_async_qword),
Decoder(1, "REPLAY_ASYNC_INPUT", decode_unimp),
Decoder(2, "REPLAY_ASYNC_INPUT_SYNC", decode_unimp),
@@ -142,8 +154,8 @@ def swallow_async_qword(eid, name, dumpfile):
Decoder(5, "REPLAY_ASYNC_EVENT_NET", decode_unimp),
 ]
 # See replay_read_events/replay_read_event
-def decode_async(eid, name, dumpfile):
-"""Decode an ASYNC event"""
+def decode_async_old(eid, name, dumpfile):
+"""Decode an ASYNC event (pre-v8)"""
 
 print_event(eid, name)
 
@@ -157,6 +169,35 @@ def decode_async(eid, name, dumpfile):
 
 return call_decode(async_decode_table, async_event_kind, dumpfile)
 
+def decode_async_bh(eid, name, dumpfile):
+op_id = read_qword(dumpfile)
+print_event(eid, name)
+return True
+
+def decode_async_bh_oneshot(eid, name, dumpfile):
+op_id = read_qword(dumpfile)
+print_event(eid, name)
+return True
+
+def decode_async_char_read(eid, name, dumpfile):
+char_id = read_byte(dumpfile)
+size = read_dword(dumpfile)
+print_event(eid, name, "device:%x chars:%s" % (char_id, 
dumpfile.read(size)))
+return True
+
+def decode_async_block(eid, name, dumpfile):
+op_id = read_qword(dumpfile)
+print_event(eid, name)
+return True
+
+def decode_async_net(eid, name, dumpfile):
+net_id = read_byte(dumpfile)
+flags = read_dword(dumpfile)
+size = read_dword(dumpfile)
+swallow_bytes(eid, name, dumpfile, size)
+print_event(eid, name, "net:%x flags:%x bytes:%d" % (net_id, flags, size))
+return True
+
 total_insns = 0
 
 def decode_instruction(eid, name, dumpfile):
@@ -166,6 +207,10 @@ def decode_instruction(eid, name, dumpfile):
 print_event(eid, name, "+ %d -> %d" % (ins_diff, total_insns))
 return True
 
+def decode_shutdown(eid, name, dumpfile):
+print_event(eid, name)
+return True
+
 def decode_char_write(eid, name, dumpfile):
 res = read_dword(dumpfile)
 offset = read_dword(dumpfile)
@@ -177,7 +222,7 @@ def decode_audio_out(eid, name, dumpfile):
 print_event(eid, name, "%d" % (audio_data))
 return True
 
-def decode_checkpoint(eid, name, dumpfile):
+def __decode_checkpoint(eid, name, dumpfile, old):
 """Decode a checkpoint.
 
 Checkpoints contain a series of async events with their own specific data.
@@ -189,14 +234,20 @@ def decode_checkpoint(eid, name, dumpfile):
 
 # if the next event is EVENT_ASYNC there are a bunch of
 # async events to read, otherwise we are done
-if next_event != 3:
-print_event(eid, name, "no additional data", event_number)
-else:
+if (old and next_event == 3) or (not old and next_event >= 3 and 
next_event <= 9):
 print_event(eid, name, "more data follows", event_number)
+else:
+print_event(eid, name, "no additional data", event_number)
 
 replay_state.reuse_event(next_event)
 return True
 
+def decode_checkpoint_old(eid, name, dumpfile):
+return __decode_checkpoint(eid, name, dumpfile, False)
+
+def decode_checkpoint(eid, name, dumpfile):
+return __decode_checkpoint(eid, name, dumpfile, True)
+
 def decode_checkpoint_init(eid, name, dumpfile):
 print_event(eid, name)
 return True
@@ -212,15 +263,23 @@ def decode_clock(eid, name, dumpfile):
 
 def decode_random(eid, name, dumpfile):
 ret = read_dword(dumpfile)
-data = read_array(dumpfile)
-print_event(eid, "%d bytes of random da

[PATCH v6 05/10] Revert "replay: stop us hanging in rr_wait_io_event"

2024-08-12 Thread Nicholas Piggin

This reverts commit 1f881ea4a444ef36a8b6907b0b82be4b3af253a2.

That commit causes reverse_debugging.py test failures, and does
not seem to solve the root cause of the problem x86-64 still
hangs in record/replay tests.

The problem with short-cutting the iowait that was taken during
record phase is that related events will not get consumed at the
same points (e.g., reading the clock).

A hang with zero icount always seems to be a symptom of an earlier
problem that has caused the recording to become out of synch with
the execution and consumption of events by replay.

Acked-by: Alex Bennée 
Signed-off-by: Nicholas Piggin 
---
 include/sysemu/replay.h  |  5 -
 accel/tcg/tcg-accel-ops-rr.c |  2 +-
 replay/replay.c  | 21 -
 3 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index f229b2109c..8102fa54f0 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -73,11 +73,6 @@ int replay_get_instructions(void);
 /*! Updates instructions counter in replay mode. */
 void replay_account_executed_instructions(void);
 
-/**
- * replay_can_wait: check if we should pause for wait-io
- */
-bool replay_can_wait(void);
-
 /* Processing clocks and other time sources */
 
 /*! Save the specified clock */
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 48c38714bd..c59c77da4b 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -109,7 +109,7 @@ static void rr_wait_io_event(void)
 {
 CPUState *cpu;
 
-while (all_cpu_threads_idle() && replay_can_wait()) {
+while (all_cpu_threads_idle()) {
 rr_stop_kick_timer();
 qemu_cond_wait_bql(first_cpu->halt_cond);
 }
diff --git a/replay/replay.c b/replay/replay.c
index b8564a4813..895fa6b67a 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -451,27 +451,6 @@ void replay_start(void)
 replay_enable_events();
 }
 
-/*
- * For none/record the answer is yes.
- */
-bool replay_can_wait(void)
-{
-if (replay_mode == REPLAY_MODE_PLAY) {
-/*
- * For playback we shouldn't ever be at a point we wait. If
- * the instruction count has reached zero and we have an
- * unconsumed event we should go around again and consume it.
- */
-if (replay_state.instruction_count == 0 && 
replay_state.has_unread_data) {
-return false;
-} else {
-replay_sync_error("Playback shouldn't have to iowait");
-}
-}
-return true;
-}
-
-
 void replay_finish(void)
 {
 if (replay_mode == REPLAY_MODE_NONE) {
-- 
2.45.2

[PATCH v6 08/10] virtio-net: Use replay_schedule_bh_event for bhs that affect machine state

2024-08-12 Thread Nicholas Piggin

The regular qemu_bh_schedule() calls result in non-deterministic
execution of the bh in record-replay mode, which causes replay failure.

Reviewed-by: Alex Bennée 
Reviewed-by: Pavel Dovgalyuk 
Signed-off-by: Nicholas Piggin 
---
 hw/net/virtio-net.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 08aa0b65e3..10ebaae5e2 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -40,6 +40,7 @@
 #include "migration/misc.h"
 #include "standard-headers/linux/ethtool.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/replay.h"
 #include "trace.h"
 #include "monitor/qdev.h"
 #include "monitor/monitor.h"
@@ -417,7 +418,7 @@ static void virtio_net_set_status(struct VirtIODevice 
*vdev, uint8_t status)
 timer_mod(q->tx_timer,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 
n->tx_timeout);
 } else {
-qemu_bh_schedule(q->tx_bh);
+replay_bh_schedule_event(q->tx_bh);
 }
 } else {
 if (q->tx_timer) {
@@ -2672,7 +2673,7 @@ static void virtio_net_tx_complete(NetClientState *nc, 
ssize_t len)
  */
 virtio_queue_set_notification(q->tx_vq, 0);
 if (q->tx_bh) {
-qemu_bh_schedule(q->tx_bh);
+replay_bh_schedule_event(q->tx_bh);
 } else {
 timer_mod(q->tx_timer,
   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
@@ -2838,7 +2839,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, 
VirtQueue *vq)
 return;
 }
 virtio_queue_set_notification(vq, 0);
-qemu_bh_schedule(q->tx_bh);
+replay_bh_schedule_event(q->tx_bh);
 }
 
 static void virtio_net_tx_timer(void *opaque)
@@ -2921,7 +2922,7 @@ static void virtio_net_tx_bh(void *opaque)
 /* If we flush a full burst of packets, assume there are
  * more coming and immediately reschedule */
 if (ret >= n->tx_burst) {
-qemu_bh_schedule(q->tx_bh);
+replay_bh_schedule_event(q->tx_bh);
 q->tx_waiting = 1;
 return;
 }
@@ -2935,7 +2936,7 @@ static void virtio_net_tx_bh(void *opaque)
 return;
 } else if (ret > 0) {
 virtio_queue_set_notification(q->tx_vq, 0);
-qemu_bh_schedule(q->tx_bh);
+replay_bh_schedule_event(q->tx_bh);
 q->tx_waiting = 1;
 }
 }
-- 
2.45.2

[PATCH v6 06/10] tests/avocado: replay_kernel.py add x86-64 q35 machine test

2024-08-12 Thread Nicholas Piggin

The x86-64 pc machine is flaky with record/replay, but q35 is more
stable. Add a q35 test to replay_kernel.py.

Reviewed-by: Alex Bennée 
Tested-by: Alex Bennée 
Signed-off-by: Nicholas Piggin 
---
 tests/avocado/replay_kernel.py | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py
index a668af9d36..e22c200a36 100644
--- a/tests/avocado/replay_kernel.py
+++ b/tests/avocado/replay_kernel.py
@@ -110,7 +110,7 @@ def test_i386_pc(self):
 self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
 
 # See https://gitlab.com/qemu-project/qemu/-/issues/2094
-@skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test sometimes gets 
stuck')
+@skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'pc machine is unstable 
with replay')
 def test_x86_64_pc(self):
 """
 :avocado: tags=arch:x86_64
@@ -128,6 +128,22 @@ def test_x86_64_pc(self):
 
 self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
 
+def test_x86_64_q35(self):
+"""
+:avocado: tags=arch:x86_64
+:avocado: tags=machine:q35
+"""
+kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora'
+  '/linux/releases/29/Everything/x86_64/os/images/pxeboot'
+  '/vmlinuz')
+kernel_hash = '23bebd2680757891cf7adedb033532163a792495'
+kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash)
+
+kernel_command_line = self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0'
+console_pattern = 'VFS: Cannot open root device'
+
+self.run_rr(kernel_path, kernel_command_line, console_pattern, shift=5)
+
 def test_mips_malta(self):
 """
 :avocado: tags=arch:mips
-- 
2.45.2

[PATCH v6 09/10] virtio-net: Use virtual time for RSC timers

2024-08-12 Thread Nicholas Piggin

Receive coalescing is visible to the target machine, so its timers
should use virtual time like other timers in virtio-net, to be
compatible with record-replay.

Signed-off-by: Nicholas Piggin 
---
 hw/net/virtio-net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 10ebaae5e2..ed33a32877 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -2124,7 +2124,7 @@ static void virtio_net_rsc_purge(void *opq)
 chain->stat.timer++;
 if (!QTAILQ_EMPTY(&chain->buffers)) {
 timer_mod(chain->drain_timer,
-  qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
+  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
 }
 }
 
@@ -2360,7 +2360,7 @@ static size_t 
virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain,
 chain->stat.empty_cache++;
 virtio_net_rsc_cache_buf(chain, nc, buf, size);
 timer_mod(chain->drain_timer,
-  qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout);
+  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout);
 return size;
 }
 
@@ -2598,7 +2598,7 @@ static VirtioNetRscChain 
*virtio_net_rsc_lookup_chain(VirtIONet *n,
 chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
 chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
 }
-chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST,
+chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
   virtio_net_rsc_purge, chain);
 memset(&chain->stat, 0, sizeof(chain->stat));
 
-- 
2.45.2

Re: [PATCH v2 16/21] virtio-net: Use replay_schedule_bh_event for bhs that affect machine state

2024-08-13 Thread Nicholas Piggin

On Wed Aug 14, 2024 at 6:48 AM AEST, Michael S. Tsirkin wrote:
> On Tue, Aug 13, 2024 at 09:23:24PM +0100, Alex Bennée wrote:
> > From: Nicholas Piggin 
> > 
> > The regular qemu_bh_schedule() calls result in non-deterministic
> > execution of the bh in record-replay mode, which causes replay failure.
> > 
> > Reviewed-by: Alex Bennée 
> > Reviewed-by: Pavel Dovgalyuk 
> > Signed-off-by: Nicholas Piggin 
> > Message-Id: <20240813050638.446172-9-npig...@gmail.com>
> > Signed-off-by: Alex Bennée 
> > ---
> >  hw/net/virtio-net.c | 11 ++-
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 08aa0b65e3..10ebaae5e2 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -40,6 +40,7 @@
> >  #include "migration/misc.h"
> >  #include "standard-headers/linux/ethtool.h"
> >  #include "sysemu/sysemu.h"
> > +#include "sysemu/replay.h"
> >  #include "trace.h"
> >  #include "monitor/qdev.h"
> >  #include "monitor/monitor.h"
> > @@ -417,7 +418,7 @@ static void virtio_net_set_status(struct VirtIODevice 
> > *vdev, uint8_t status)
> >  timer_mod(q->tx_timer,
> > qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 
> > n->tx_timeout);
> >  } else {
> > -qemu_bh_schedule(q->tx_bh);
> > +replay_bh_schedule_event(q->tx_bh);
> >  }
> >  } else {
> >  if (q->tx_timer) {
> > @@ -2672,7 +2673,7 @@ static void virtio_net_tx_complete(NetClientState 
> > *nc, ssize_t len)
> >   */
> >  virtio_queue_set_notification(q->tx_vq, 0);
> >  if (q->tx_bh) {
> > -qemu_bh_schedule(q->tx_bh);
> > +replay_bh_schedule_event(q->tx_bh);
> >  } else {
> >  timer_mod(q->tx_timer,
> >qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 
> > n->tx_timeout);
> > @@ -2838,7 +2839,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice 
> > *vdev, VirtQueue *vq)
> >  return;
> >  }
> >  virtio_queue_set_notification(vq, 0);
> > -qemu_bh_schedule(q->tx_bh);
> > +replay_bh_schedule_event(q->tx_bh);
> >  }
> >  
> >  static void virtio_net_tx_timer(void *opaque)
> > @@ -2921,7 +2922,7 @@ static void virtio_net_tx_bh(void *opaque)
> >  /* If we flush a full burst of packets, assume there are
> >   * more coming and immediately reschedule */
> >  if (ret >= n->tx_burst) {
> > -qemu_bh_schedule(q->tx_bh);
> > +replay_bh_schedule_event(q->tx_bh);
> >  q->tx_waiting = 1;
> >  return;
> >  }
> > @@ -2935,7 +2936,7 @@ static void virtio_net_tx_bh(void *opaque)
> >  return;
> >  } else if (ret > 0) {
> >  virtio_queue_set_notification(q->tx_vq, 0);
> > -qemu_bh_schedule(q->tx_bh);
> > +replay_bh_schedule_event(q->tx_bh);
> >  q->tx_waiting = 1;
> >  }
> >  }
> > -- 
> > 2.39.2
>
>
> Is this really the only way to fix this? I do not think
> virtio has any business knowing about replay.
> What does this API do, even? BH but not broken with replay?
> Do we ever want replay broken? Why not fix qemu_bh_schedule?
> And when we add another feature which we do not want to break
> will we do foo_bar_replay_bh_schedule_event or what?

I agree with you. We need to do this (a couple of other hw
subsystems already do and likely some are still broken vs
replay and would need to be converted), but I think it's
mostly a case of bad naming. You're right the caller should
not know about replay at all, what it should be is whether
the event is for the target machine or the host harness,
same as timers are VIRTUAL / HOST.

So I think we just need to make a qemu_bh_schedule_,
or qemu_bh_scheudle_event(... QEMU_EVENT_VIRTUAL/HOST/etc).
I had started on a conversion once but not completed it.
I could resurrect if there is agreement on the API?

Thanks,
Nick

Re: [RFC PATCH] accel/tcg: clear all TBs from a page when it is written to

2024-08-13 Thread Nicholas Piggin

On Mon Aug 12, 2024 at 11:25 AM AEST, Richard Henderson wrote:
> On 8/9/24 17:47, Nicholas Piggin wrote:
> > This is not a clean patch, but does fix a problem I hit with TB
> > invalidation due to the target software writing to memory with TBs.
> > 
> > Lockup messages are triggering in Linux due to page clearing taking a
> > long time when a code page has been freed, because it takes a lot of
> > notdirty notifiers, which massively slows things down. Linux might
> > possibly have a bug here too because it seems to hang indefinitely in
> > some cases, but even if it didn't, the latency of clearing these pages
> > is very high.
> > 
> > This showed when running KVM on the emulated machine, starting and
> > stopping guests. That causes lots of instruction pages to be freed.
> > Usually if you're just running Linux, executable pages remain in
> > pagecache so you get fewer of these bombs in the kernel memory
> > allocator. But page reclaim, JITs, deleting executable files, etc.,
> > could trigger it too.
> > 
> > Invalidating all TBs from the page on any hit seems to avoid the problem
> > and generally speeds things up.
> > 
> > How important is the precise invalidation? These days I assume the
> > tricky kind of SMC that frequently writes code close to where it's
> > executing is pretty rare and might not be something we really care about
> > for performance. Could we remove sub-page TB invalidation entirely?
>
> Happens on x86 and s390 regularly enough, so we can't remove it.
>
> > @@ -1107,6 +1107,9 @@ tb_invalidate_phys_page_range__locked(struct 
> > page_collection *pages,
> >   TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : 
> > NULL;
> >   #endif /* TARGET_HAS_PRECISE_SMC */
> >   
> > +start &= TARGET_PAGE_MASK;
> > +last |= ~TARGET_PAGE_MASK;
> > +
> >   /* Range may not cross a page. */
> >   tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
>
> This would definitely break SMC.

They can't invalidate the instruction currently being executed?
I'll experiment a bit more.

> However, there's a better solution.  We're already iterating over all of the 
> TBs on the 
> current page only.  Move *everything* except the tb_phys_invalidate__locked 
> call into the 
> SMC ifdef, and unconditionally invalidate every TB selected in the loop.

Okay. I suspect *most* of the time even the strict SMC archs would
not be writing to the same page they're executing either. But I can
start with the !SMC.

> We experimented with something like this for aarch64, which used to spend a 
> lot of the 
> kernel startup time invalidating code pages from the (somewhat bloated) EDK2 
> bios.  But it 
> turned out the bigger problem was address space randomization, and with 
> CF_PCREL the 
> problem appeared to go away.

Interesting.

> I don't think we've done any kvm-under-tcg performance testing, but lockup 
> messages would 
> certainly be something to look for...

Yeah, actually Linux is throwing the messages a bit more recently
at least on distros that enable page clearing at alloc for security,
because that clearing is a big chunk that can happen in critical
sections.

Thanks for the suggestion, I'll give it a try.

Thanks,
Nick

Re: [PATCH v2 16/21] virtio-net: Use replay_schedule_bh_event for bhs that affect machine state

2024-08-15 Thread Nicholas Piggin

On Thu Aug 15, 2024 at 3:25 AM AEST, Alex Bennée wrote:
> "Michael S. Tsirkin"  writes:
>
> > On Wed, Aug 14, 2024 at 04:05:34PM +1000, Nicholas Piggin wrote:
> >> On Wed Aug 14, 2024 at 6:48 AM AEST, Michael S. Tsirkin wrote:
> >> > On Tue, Aug 13, 2024 at 09:23:24PM +0100, Alex Bennée wrote:
> >> > > From: Nicholas Piggin 
> >> > > 
> >> > > The regular qemu_bh_schedule() calls result in non-deterministic
> >> > > execution of the bh in record-replay mode, which causes replay failure.
> >> > > 
> >> > > Reviewed-by: Alex Bennée 
> >> > > Reviewed-by: Pavel Dovgalyuk 
> >> > > Signed-off-by: Nicholas Piggin 
> >> > > Message-Id: <20240813050638.446172-9-npig...@gmail.com>
> >> > > Signed-off-by: Alex Bennée 
> >> > > ---
> >> > >  hw/net/virtio-net.c | 11 ++-
> >> > >  1 file changed, 6 insertions(+), 5 deletions(-)
> >> > > 
> >> > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> >> > > index 08aa0b65e3..10ebaae5e2 100644
> >> > > --- a/hw/net/virtio-net.c
> >> > > +++ b/hw/net/virtio-net.c
> >> > > @@ -40,6 +40,7 @@
> >> > >  #include "migration/misc.h"
> >> > >  #include "standard-headers/linux/ethtool.h"
> >> > >  #include "sysemu/sysemu.h"
> >> > > +#include "sysemu/replay.h"
> >> > >  #include "trace.h"
> >> > >  #include "monitor/qdev.h"
> >> > >  #include "monitor/monitor.h"
> >> > > @@ -417,7 +418,7 @@ static void virtio_net_set_status(struct 
> >> > > VirtIODevice *vdev, uint8_t status)
> >> > >  timer_mod(q->tx_timer,
> >> > > qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) 
> >> > > + n->tx_timeout);
> >> > >  } else {
> >> > > -qemu_bh_schedule(q->tx_bh);
> >> > > +replay_bh_schedule_event(q->tx_bh);
> >> > >  }
> >> > >  } else {
> >> > >  if (q->tx_timer) {
> >> > > @@ -2672,7 +2673,7 @@ static void 
> >> > > virtio_net_tx_complete(NetClientState *nc, ssize_t len)
> >> > >   */
> >> > >  virtio_queue_set_notification(q->tx_vq, 0);
> >> > >  if (q->tx_bh) {
> >> > > -qemu_bh_schedule(q->tx_bh);
> >> > > +replay_bh_schedule_event(q->tx_bh);
> >> > >  } else {
> >> > >  timer_mod(q->tx_timer,
> >> > >qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 
> >> > > n->tx_timeout);
> >> > > @@ -2838,7 +2839,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice 
> >> > > *vdev, VirtQueue *vq)
> >> > >  return;
> >> > >  }
> >> > >  virtio_queue_set_notification(vq, 0);
> >> > > -qemu_bh_schedule(q->tx_bh);
> >> > > +replay_bh_schedule_event(q->tx_bh);
> >> > >  }
> >> > >  
> >> > >  static void virtio_net_tx_timer(void *opaque)
> >> > > @@ -2921,7 +2922,7 @@ static void virtio_net_tx_bh(void *opaque)
> >> > >  /* If we flush a full burst of packets, assume there are
> >> > >   * more coming and immediately reschedule */
> >> > >  if (ret >= n->tx_burst) {
> >> > > -qemu_bh_schedule(q->tx_bh);
> >> > > +replay_bh_schedule_event(q->tx_bh);
> >> > >  q->tx_waiting = 1;
> >> > >  return;
> >> > >  }
> >> > > @@ -2935,7 +2936,7 @@ static void virtio_net_tx_bh(void *opaque)
> >> > >  return;
> >> > >  } else if (ret > 0) {
> >> > >  virtio_queue_set_notification(q->tx_vq, 0);
> >> > > -qemu_bh_schedule(q->tx_bh);
> >> > > +replay_bh_schedule_event(q->tx_bh);
> >> > >  q->tx_waiting = 1;
> >> > >  }
> >> > >  }
> >> > > -- 
> >> > > 2.39.2
> >> >
> >> >
> >> > Is this really the only way to fix this? I do not th

Re: [PATCH v3] hw/ppc: Implement -dtb support for PowerNV

2024-08-15 Thread Nicholas Piggin

On Tue Aug 13, 2024 at 11:45 PM AEST, Aditya Gupta wrote:
> Currently any device tree passed with -dtb option in QEMU, was ignored
> by the PowerNV code.
>
> Read and pass the passed -dtb to the kernel, thus enabling easier
> debugging with custom DTBs.
>
> The existing behaviour when -dtb is 'not' passed, is preserved as-is.
>
> But when a '-dtb' is passed, it completely overrides any dtb nodes or
> changes QEMU might have done, such as '-append' arguments to the kernel
> (which are mentioned in /chosen/bootargs in the dtb), hence add warning
> when -dtb is being used
>
> Signed-off-by: Aditya Gupta 

This looks pretty good, I'm inclined to take it as a bug fix fo this
release.  One little nit is MachineState.fdt vs PnvMachineState.fdt
which is now confusing. I would call the new PnvMachineState member
something like fdt_from_dtb, or fdt_override?

The other question... Some machines rebuild fdt at init, others at
reset time. As far as I understood, spapr has to rebuild on reset
because C-A-S call can update the fdt so you have to undo that on
reset. Did powernv just copy that without really needing it, I wonder?
Maybe that rearranged to just do it at init time (e.g., see
hw/riscv/virt.c which is simpler).

Thanks,
Nick

>
> ---
> Changelog
> ===
> v3:
>  + use 'load_device_tree' to read the device tree, instead of 
> g_file_get_contents
>  + tested that passed dtb does NOT get ignored on system_reset
>
> v2:
>  + move reading dtb and warning to pnv_init
>
> v1:
>  + use 'g_file_get_contents' and add check for -append & -dtb as suggested by 
> Daniel
> ---
> ---
>  hw/ppc/pnv.c | 34 ++
>  include/hw/ppc/pnv.h |  2 ++
>  2 files changed, 32 insertions(+), 4 deletions(-)
>
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index 3526852685b4..14225f7e48af 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -736,10 +736,13 @@ static void pnv_reset(MachineState *machine, 
> ShutdownCause reason)
>  }
>  }
>  
> -fdt = pnv_dt_create(machine);
> -
> -/* Pack resulting tree */
> -_FDT((fdt_pack(fdt)));
> +if (pnv->fdt) {
> +fdt = pnv->fdt;
> +} else {
> +fdt = pnv_dt_create(machine);
> +/* Pack resulting tree */
> +_FDT((fdt_pack(fdt)));
> +}
>  
>  qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
>  cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
> @@ -952,6 +955,14 @@ static void pnv_init(MachineState *machine)
>  g_free(sz);
>  exit(EXIT_FAILURE);
>  }
> +
> +/* checks for invalid option combinations */
> +if (machine->dtb && (strlen(machine->kernel_cmdline) != 0)) {
> +error_report("-append and -dtb cannot be used together, as passed"
> +" command line is ignored in case of custom dtb");
> +exit(EXIT_FAILURE);
> +}
> +
>  memory_region_add_subregion(get_system_memory(), 0, machine->ram);
>  
>  /*
> @@ -1003,6 +1014,21 @@ static void pnv_init(MachineState *machine)
>  }
>  }
>  
> +/* load dtb if passed */
> +if (machine->dtb) {
> +int fdt_size;
> +
> +warn_report("with manually passed dtb, some options like '-append'"
> +" will get ignored and the dtb passed will be used as-is");
> +
> +/* read the file 'machine->dtb', and load it into 'fdt' buffer */
> +pnv->fdt = load_device_tree(machine->dtb, &fdt_size);
> +if (!pnv->fdt) {
> +error_report("Could not load dtb '%s'", machine->dtb);
> +exit(1);
> +}
> +}
> +
>  /* MSIs are supported on this platform */
>  msi_nonbroken = true;
>  
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> index fcb6699150c8..20b68fd9264e 100644
> --- a/include/hw/ppc/pnv.h
> +++ b/include/hw/ppc/pnv.h
> @@ -91,6 +91,8 @@ struct PnvMachineState {
>  uint32_t initrd_base;
>  long initrd_size;
>  
> +void *fdt;
> +
>  uint32_t num_chips;
>  PnvChip  **chips;
>

< 2 3 4 5 6 7 8 9 10 11 >

601 - 700 of 1768 matches

Mail list logo