[RFC v2 2/2] hw/riscv: Add server platform reference machine

2024-03-12 Thread Fei Wu
The RISC-V Server Platform specification[1] defines a standardized set
of hardware and software capabilities, that portable system software,
such as OS and hypervisors can rely on being present in a RISC-V server
platform.

A corresponding Qemu RISC-V server platform reference (rvsp-ref for
short) machine type is added to provide a environment for firmware/OS
development and testing. The main features included in rvsp-ref are:

 - Based on riscv virt machine type
 - A new memory map as close as virt machine as possible
 - A new virt CPU type rvsp-ref-cpu for server platform compliance
 - AIA
 - PCIe AHCI
 - PCIe NIC
 - No virtio device
 - No fw_cfg device
 - No ACPI table provided
 - Only minimal device tree nodes

[1] https://github.com/riscv-non-isa/riscv-server-platform

Signed-off-by: Fei Wu 
---
 configs/devices/riscv64-softmmu/default.mak |1 +
 hw/riscv/Kconfig|   12 +
 hw/riscv/meson.build|1 +
 hw/riscv/server_platform_ref.c  | 1276 +++
 4 files changed, 1290 insertions(+)
 create mode 100644 hw/riscv/server_platform_ref.c

diff --git a/configs/devices/riscv64-softmmu/default.mak 
b/configs/devices/riscv64-softmmu/default.mak
index 3f68059448..a1d98e49ef 100644
--- a/configs/devices/riscv64-softmmu/default.mak
+++ b/configs/devices/riscv64-softmmu/default.mak
@@ -10,5 +10,6 @@ CONFIG_SPIKE=y
 CONFIG_SIFIVE_E=y
 CONFIG_SIFIVE_U=y
 CONFIG_RISCV_VIRT=y
+CONFIG_SERVER_PLATFORM_REF=y
 CONFIG_MICROCHIP_PFSOC=y
 CONFIG_SHAKTI_C=y
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 5d644eb7b1..5674589e66 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -48,6 +48,18 @@ config RISCV_VIRT
 select ACPI
 select ACPI_PCI
 
+config SERVER_PLATFORM_REF
+bool
+select RISCV_NUMA
+select GOLDFISH_RTC
+select PCI
+select PCI_EXPRESS_GENERIC_BRIDGE
+select PFLASH_CFI01
+select SERIAL
+select RISCV_ACLINT
+select RISCV_APLIC
+select RISCV_IMSIC
+
 config SHAKTI_C
 bool
 select RISCV_ACLINT
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index 2f7ee81be3..bb3aff91ea 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: 
files('numa.c'))
 riscv_ss.add(files('riscv_hart.c'))
 riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
 riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
+riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: 
files('server_platform_ref.c'))
 riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
 riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
 riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
new file mode 100644
index 00..b552650265
--- /dev/null
+++ b/hw/riscv/server_platform_ref.c
@@ -0,0 +1,1276 @@
+/*
+ * QEMU RISC-V Server Platform (RVSP) Reference Board
+ *
+ * Copyright (c) 2024 Intel, Inc.
+ *
+ * This board is compliant RISC-V Server platform specification and leveraging
+ * a lot of riscv virt code.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "qemu/guest-random.h"
+#include "qapi/error.h"
+#include "qapi/qapi-visit-common.h"
+#include "hw/boards.h"
+#include "hw/loader.h"
+#include "hw/sysbus.h"
+#include "hw/qdev-properties.h"
+#include "hw/char/serial.h"
+#include "hw/block/flash.h"
+#include "hw/ide/pci.h"
+#include "hw/ide/ahci-pci.h"
+#include "hw/pci/pci.h"
+#include "hw/pci-host/gpex.h"
+#include "hw/core/sysbus-fdt.h"
+#include "hw/riscv/riscv_hart.h"
+#include "hw/riscv/boot.h"
+#include "hw/riscv/numa.h"
+#include "hw/intc/riscv_aclint.h"
+#include "hw/intc/riscv_aplic.h"
+#include "hw/intc/riscv_imsic.h"
+#include "chardev/char.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/runstate.h"
+#include "

[RFC v2 1/2] target/riscv: Add server platform reference cpu

2024-03-12 Thread Fei Wu
The harts requirements of RISC-V server platform [1] require RVA23 ISA
profile support, plus Sv48, Svadu, H, Sscofmpf etc. This patch provides
a virt CPU type (rvsp-ref) as compliant as possible.

[1] 
https://github.com/riscv-non-isa/riscv-server-platform/blob/main/server_platform_requirements.adoc

Signed-off-by: Fei Wu 
---
 target/riscv/cpu-qom.h |  1 +
 target/riscv/cpu.c | 61 ++
 2 files changed, 62 insertions(+)

diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index 3670cfe6d9..adb934d19e 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -49,6 +49,7 @@
 #define TYPE_RISCV_CPU_SIFIVE_U54   RISCV_CPU_TYPE_NAME("sifive-u54")
 #define TYPE_RISCV_CPU_THEAD_C906   RISCV_CPU_TYPE_NAME("thead-c906")
 #define TYPE_RISCV_CPU_VEYRON_V1RISCV_CPU_TYPE_NAME("veyron-v1")
+#define TYPE_RISCV_CPU_RVSP_REF RISCV_CPU_TYPE_NAME("rvsp-ref")
 #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host")
 
 OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 5a48d30828..6685fe0c01 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -2312,6 +2312,66 @@ static void rva22s64_profile_cpu_init(Object *obj)
 
 RVA22S64.enabled = true;
 }
+
+static void rv64_rvsp_ref_cpu_init(Object *obj)
+{
+CPURISCVState *env = &RISCV_CPU(obj)->env;
+RISCVCPU *cpu = RISCV_CPU(obj);
+
+riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV);
+
+/* FIXME: change to 1.13 */
+env->priv_ver = PRIV_VERSION_1_12_0;
+
+/* RVA22S64 */
+cpu->cfg.mmu = true;
+cpu->cfg.ext_zifencei = true;
+cpu->cfg.ext_zicsr = true;
+cpu->cfg.ext_zicntr = true;
+cpu->cfg.ext_zihpm = true;
+cpu->cfg.ext_zihintpause = true;
+cpu->cfg.ext_zba = true;
+cpu->cfg.ext_zbb = true;
+cpu->cfg.ext_zbs = true;
+cpu->cfg.ext_zic64b = true;
+cpu->cfg.ext_zicbom = true;
+cpu->cfg.ext_zicbop = true;
+cpu->cfg.ext_zicboz = true;
+cpu->cfg.cbom_blocksize = 64;
+cpu->cfg.cbop_blocksize = 64;
+cpu->cfg.cboz_blocksize = 64;
+cpu->cfg.ext_zfhmin = true;
+cpu->cfg.ext_zkt = true;
+cpu->cfg.ext_svade = true;
+cpu->cfg.ext_svpbmt = true;
+cpu->cfg.ext_svinval = true;
+
+/* RVA23U64 */
+cpu->cfg.ext_zvfhmin = true;
+cpu->cfg.ext_zvbb = true;
+cpu->cfg.ext_zvkt = true;
+cpu->cfg.ext_zihintntl = true;
+cpu->cfg.ext_zicond = true;
+cpu->cfg.ext_zcb = true;
+cpu->cfg.ext_zfa = true;
+cpu->cfg.ext_zawrs = true;
+
+/* RVA23S64 */
+cpu->cfg.ext_svnapot = true;
+cpu->cfg.ext_sstc = true;
+cpu->cfg.ext_sscofpmf = true;
+cpu->cfg.ext_smstateen = true;
+
+cpu->cfg.ext_smaia = true;
+cpu->cfg.ext_ssaia = true;
+
+/* Server Platform */
+#ifndef CONFIG_USER_ONLY
+set_satp_mode_max_supported(cpu, VM_1_10_SV48);
+#endif
+cpu->cfg.ext_svadu = true;
+cpu->cfg.ext_zkr = true;
+}
 #endif
 
 static const gchar *riscv_gdb_arch_name(CPUState *cs)
@@ -2577,6 +2637,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV64E,MXL_RV64,  
rv64e_bare_cpu_init),
 DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22U64,  MXL_RV64,  
rva22u64_profile_cpu_init),
 DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22S64,  MXL_RV64,  
rva22s64_profile_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_RVSP_REF,   MXL_RV64,  
rv64_rvsp_ref_cpu_init),
 #endif
 };
 
-- 
2.34.1




[RFC v2 0/2] Add RISC-V Server Platform Reference Board

2024-03-12 Thread Fei Wu
The RISC-V Server Platform specification[1] defines a standardized set
of hardware and software capabilities, that portable system software,
such as OS and hypervisors can rely on being present in a RISC-V server
platform. This patchset provides a RISC-V Server Platform (RVSP)
reference implementation on qemu which is in compliance with the spec
as faithful as possible.

The reference board can be running with tag edk2-stable202308 in
upstream edk2 repo[2].

The qemu command line used:

$QEMU -nographic -m 4G -smp 2 \
-machine rvsp-ref,pflash0=pflash0,pflash1=pflash1 \
-blockdev 
node-name=pflash0,driver=file,read-only=on,filename=RISCV_VIRT_CODE.fd \
-blockdev node-name=pflash1,driver=file,filename=RISCV_VIRT_VARS.fd \
-bios fw_dynamic.bin \
-drive file=$BRS_IMG,if=ide,format=raw

Since there is no ACPI table generated in this new machine type, a
corresponding EDK-II platform (WIP) is needed to provide related ACPI
tables.

For testing purposes only, we used a workaround to generate the ACPI
tables in Qemu with a dedicated downstream patch.

[1] https://github.com/riscv-non-isa/riscv-server-platform
[2] https://github.com/tianocore/edk2.git


v2:
- move cpu definition the 1st patch
- replace sifive_test with power reset syscon
- add versioning scheme to fdt
- integrate recent changes in virt.c
  - use g_autofree
  - create_fdt/finalize_fdt
  - riscv_isa_write_fdt
- some cleanups


Fei Wu (2):
  target/riscv: Add server platform reference cpu
  hw/riscv: Add server platform reference machine

 configs/devices/riscv64-softmmu/default.mak |1 +
 hw/riscv/Kconfig|   12 +
 hw/riscv/meson.build|1 +
 hw/riscv/server_platform_ref.c  | 1276 +++
 target/riscv/cpu-qom.h  |1 +
 target/riscv/cpu.c  |   61 +
 6 files changed, 1352 insertions(+)
 create mode 100644 hw/riscv/server_platform_ref.c

-- 
2.34.1




[RFC 2/2] target/riscv: Add server platform reference cpu

2024-03-04 Thread Fei Wu
The harts requirements of RISC-V server platform [1] require RVA23 ISA
profile support, plus Sv48, Svadu, H, Sscofmpf etc. This patch provides
a virt CPU type (rvsp-ref) as compliant as possible.

[1] 
https://github.com/riscv-non-isa/riscv-server-platform/blob/main/server_platform_requirements.adoc

Signed-off-by: Fei Wu 
---
 hw/riscv/server_platform_ref.c |  6 +++-
 target/riscv/cpu-qom.h |  1 +
 target/riscv/cpu.c | 62 ++
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
index ae90c4b27a..52ec607cee 100644
--- a/hw/riscv/server_platform_ref.c
+++ b/hw/riscv/server_platform_ref.c
@@ -1205,11 +1205,15 @@ static void rvsp_ref_machine_class_init(ObjectClass 
*oc, void *data)
 {
 char str[128];
 MachineClass *mc = MACHINE_CLASS(oc);
+static const char * const valid_cpu_types[] = {
+TYPE_RISCV_CPU_RVSP_REF,
+};
 
 mc->desc = "RISC-V Server SoC Reference board";
 mc->init = rvsp_ref_machine_init;
 mc->max_cpus = RVSP_CPUS_MAX;
-mc->default_cpu_type = TYPE_RISCV_CPU_BASE;
+mc->default_cpu_type = TYPE_RISCV_CPU_RVSP_REF;
+mc->valid_cpu_types = valid_cpu_types;
 mc->pci_allow_0_address = true;
 mc->default_nic = "e1000e";
 mc->possible_cpu_arch_ids = riscv_numa_possible_cpu_arch_ids;
diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index 3670cfe6d9..adb934d19e 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -49,6 +49,7 @@
 #define TYPE_RISCV_CPU_SIFIVE_U54   RISCV_CPU_TYPE_NAME("sifive-u54")
 #define TYPE_RISCV_CPU_THEAD_C906   RISCV_CPU_TYPE_NAME("thead-c906")
 #define TYPE_RISCV_CPU_VEYRON_V1RISCV_CPU_TYPE_NAME("veyron-v1")
+#define TYPE_RISCV_CPU_RVSP_REF RISCV_CPU_TYPE_NAME("rvsp-ref")
 #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host")
 
 OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 5ff0192c52..bc91be702b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -2282,6 +2282,67 @@ static void rva22s64_profile_cpu_init(Object *obj)
 
 RVA22S64.enabled = true;
 }
+
+static void rv64_rvsp_ref_cpu_init(Object *obj)
+{
+CPURISCVState *env = &RISCV_CPU(obj)->env;
+RISCVCPU *cpu = RISCV_CPU(obj);
+
+riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV);
+
+/* FIXME: change to 1.13 */
+env->priv_ver = PRIV_VERSION_1_12_0;
+
+/* RVA22U64 */
+cpu->cfg.mmu = true;
+cpu->cfg.ext_zifencei = true;
+cpu->cfg.ext_zicsr = true;
+cpu->cfg.ext_zicntr = true;
+cpu->cfg.ext_zihpm = true;
+cpu->cfg.ext_zihintpause = true;
+cpu->cfg.ext_zba = true;
+cpu->cfg.ext_zbb = true;
+cpu->cfg.ext_zbs = true;
+cpu->cfg.zic64b = true;
+cpu->cfg.ext_zicbom = true;
+cpu->cfg.ext_zicbop = true;
+cpu->cfg.ext_zicboz = true;
+cpu->cfg.cbom_blocksize = 64;
+cpu->cfg.cbop_blocksize = 64;
+cpu->cfg.cboz_blocksize = 64;
+cpu->cfg.ext_zfhmin = true;
+cpu->cfg.ext_zkt = true;
+
+/* RVA23U64 */
+cpu->cfg.ext_zvfhmin = true;
+cpu->cfg.ext_zvbb = true;
+cpu->cfg.ext_zvkt = true;
+cpu->cfg.ext_zihintntl = true;
+cpu->cfg.ext_zicond = true;
+cpu->cfg.ext_zcb = true;
+cpu->cfg.ext_zfa = true;
+cpu->cfg.ext_zawrs = true;
+
+/* RVA23S64 */
+cpu->cfg.ext_zifencei = true;
+cpu->cfg.svade = true;
+cpu->cfg.ext_svpbmt = true;
+cpu->cfg.ext_svinval = true;
+cpu->cfg.ext_svnapot = true;
+cpu->cfg.ext_sstc = true;
+cpu->cfg.ext_sscofpmf = true;
+cpu->cfg.ext_smstateen = true;
+
+cpu->cfg.ext_smaia = true;
+cpu->cfg.ext_ssaia = true;
+
+/* Server Platform */
+#ifndef CONFIG_USER_ONLY
+set_satp_mode_max_supported(cpu, VM_1_10_SV48);
+#endif
+cpu->cfg.ext_svadu = true;
+cpu->cfg.ext_zkr = true;
+}
 #endif
 
 static const gchar *riscv_gdb_arch_name(CPUState *cs)
@@ -2547,6 +2608,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV64E,MXL_RV64,  
rv64e_bare_cpu_init),
 DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22U64,  MXL_RV64,  
rva22u64_profile_cpu_init),
 DEFINE_PROFILE_CPU(TYPE_RISCV_CPU_RVA22S64,  MXL_RV64,  
rva22s64_profile_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_RVSP_REF,   MXL_RV64,  
rv64_rvsp_ref_cpu_init),
 #endif
 };
 
-- 
2.34.1




[RFC 1/2] hw/riscv: Add server platform reference machine

2024-03-04 Thread Fei Wu
The RISC-V Server Platform specification[1] defines a standardized set
of hardware and software capabilities, that portable system software,
such as OS and hypervisors can rely on being present in a RISC-V server
platform.

A corresponding Qemu RISC-V server platform reference (rvsp-ref for
short) machine type is added to provide a environment for firmware/OS
development and testing. The main features included in rvsp-ref are:

 - Based on riscv virt machine type
 - A new memory map as close as virt machine as possible
 - A new virt CPU type rvsp-ref-cpu for server platform compliance
 - AIA
 - PCIe AHCI
 - PCIe NIC
 - No virtio device
 - No fw_cfg device
 - No ACPI table provided
 - Only minimal device tree nodes

[1] https://github.com/riscv-non-isa/riscv-server-platform

Signed-off-by: Fei Wu 
---
 configs/devices/riscv64-softmmu/default.mak |1 +
 hw/riscv/Kconfig|   13 +
 hw/riscv/meson.build|1 +
 hw/riscv/server_platform_ref.c  | 1244 +++
 4 files changed, 1259 insertions(+)
 create mode 100644 hw/riscv/server_platform_ref.c

diff --git a/configs/devices/riscv64-softmmu/default.mak 
b/configs/devices/riscv64-softmmu/default.mak
index 3f68059448..a1d98e49ef 100644
--- a/configs/devices/riscv64-softmmu/default.mak
+++ b/configs/devices/riscv64-softmmu/default.mak
@@ -10,5 +10,6 @@ CONFIG_SPIKE=y
 CONFIG_SIFIVE_E=y
 CONFIG_SIFIVE_U=y
 CONFIG_RISCV_VIRT=y
+CONFIG_SERVER_PLATFORM_REF=y
 CONFIG_MICROCHIP_PFSOC=y
 CONFIG_SHAKTI_C=y
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 5d644eb7b1..debac5a7f5 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -48,6 +48,19 @@ config RISCV_VIRT
 select ACPI
 select ACPI_PCI
 
+config SERVER_PLATFORM_REF
+bool
+select RISCV_NUMA
+select GOLDFISH_RTC
+select PCI
+select PCI_EXPRESS_GENERIC_BRIDGE
+select PFLASH_CFI01
+select SERIAL
+select RISCV_ACLINT
+select RISCV_APLIC
+select RISCV_IMSIC
+select SIFIVE_TEST
+
 config SHAKTI_C
 bool
 select RISCV_ACLINT
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index 2f7ee81be3..bb3aff91ea 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -4,6 +4,7 @@ riscv_ss.add(when: 'CONFIG_RISCV_NUMA', if_true: 
files('numa.c'))
 riscv_ss.add(files('riscv_hart.c'))
 riscv_ss.add(when: 'CONFIG_OPENTITAN', if_true: files('opentitan.c'))
 riscv_ss.add(when: 'CONFIG_RISCV_VIRT', if_true: files('virt.c'))
+riscv_ss.add(when: 'CONFIG_SERVER_PLATFORM_REF', if_true: 
files('server_platform_ref.c'))
 riscv_ss.add(when: 'CONFIG_SHAKTI_C', if_true: files('shakti_c.c'))
 riscv_ss.add(when: 'CONFIG_SIFIVE_E', if_true: files('sifive_e.c'))
 riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
diff --git a/hw/riscv/server_platform_ref.c b/hw/riscv/server_platform_ref.c
new file mode 100644
index 00..ae90c4b27a
--- /dev/null
+++ b/hw/riscv/server_platform_ref.c
@@ -0,0 +1,1244 @@
+/*
+ * QEMU RISC-V Server Platfrom (RVSP) Reference Board
+ *
+ * Copyright (c) 2024 Intel, Inc.
+ *
+ * This board is compliant RISC-V Server platform specification and leveraging
+ * a lot of riscv virt code.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "qemu/guest-random.h"
+#include "qapi/error.h"
+#include "qapi/qapi-visit-common.h"
+#include "hw/boards.h"
+#include "hw/loader.h"
+#include "hw/sysbus.h"
+#include "hw/qdev-properties.h"
+#include "hw/char/serial.h"
+#include "hw/block/flash.h"
+#include "hw/ide/pci.h"
+#include "hw/ide/ahci-pci.h"
+#include "hw/pci/pci.h"
+#include "hw/pci-host/gpex.h"
+#include "hw/core/sysbus-fdt.h"
+#include "hw/riscv/riscv_hart.h"
+#include "hw/riscv/boot.h"
+#include "hw/riscv/numa.h"
+#include "hw/intc/riscv_aclint.h"
+#include "hw/intc/riscv_aplic.h"
+#include "hw/intc/riscv_imsic.h"
+#include "hw/misc/sifive_test.h"
+#include "chardev/char.h"
+#include "sysemu/device_

[RFC 0/2] Add RISC-V Server Platform Reference Board

2024-03-04 Thread Fei Wu
The RISC-V Server Platform specification[1] defines a standardized set
of hardware and software capabilities, that portable system software,
such as OS and hypervisors can rely on being present in a RISC-V server
platform. This patchset provides a RISC-V Server Platform (RVSP)
reference implementation on qemu which is in compliance with the spec
as faithful as possible.

The reference board can be running with tag edk2-stable202308 in
upstream edk2 repo[2].

The qemu command line used:

$QEMU -nographic -m 4G -smp 2 \
-machine rvsp-ref,pflash0=pflash0,pflash1=pflash1 \
-blockdev 
node-name=pflash0,driver=file,read-only=on,filename=RISCV_VIRT_CODE.fd \
-blockdev node-name=pflash1,driver=file,filename=RISCV_VIRT_VARS.fd \
-bios fw_dynamic.bin \
-drive file=$BRS_IMG,if=ide,format=raw

Since there is no ACPI table generated in this new machine type, a
corresponding EDK-II platform (WIP) is needed to provide related ACPI
tables.

For testing purposes only, we used a workaround to generate the ACPI
tables in Qemu with a dedicated downstream patch.

[1] https://github.com/riscv-non-isa/riscv-server-platform
[2] https://github.com/tianocore/edk2.git

Fei Wu (2):
  hw/riscv: Add server platform reference machine
  target/riscv: Add server platform reference cpu

 configs/devices/riscv64-softmmu/default.mak |1 +
 hw/riscv/Kconfig|   13 +
 hw/riscv/meson.build|1 +
 hw/riscv/server_platform_ref.c  | 1248 +++
 target/riscv/cpu-qom.h  |1 +
 target/riscv/cpu.c  |   62 +
 6 files changed, 1326 insertions(+)
 create mode 100644 hw/riscv/server_platform_ref.c

-- 
2.34.1




[PATCH] hw/riscv: split RAM into low and high memory

2023-07-30 Thread Fei Wu
riscv virt platform's memory started at 0x8000 and
straddled the 4GiB boundary. Curiously enough, this choice
of a memory layout will prevent from launching a VM with
a bit more than 2000MiB and PCIe pass-thru on an x86 host, due
to identity mapping requirements for the MSI doorbell on x86,
and these (APIC/IOAPIC) live right below 4GiB.

So just split the RAM range into two portions:
- 1 GiB range from 0x8000 to 0xc000.
- The remainder at 0x1

...leaving a hole between the ranges.

Signed-off-by: Andrei Warkentin 
Signed-off-by: Fei Wu 
---
 hw/riscv/virt.c | 74 -
 include/hw/riscv/virt.h |  1 +
 2 files changed, 66 insertions(+), 9 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index d90286dc46..8fbdc7220c 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -75,7 +75,9 @@
 #error "Can't accomodate all IMSIC groups in address space"
 #endif
 
-static const MemMapEntry virt_memmap[] = {
+#define LOW_MEM (1 * GiB)
+
+static MemMapEntry virt_memmap[] = {
 [VIRT_DEBUG] ={0x0, 0x100 },
 [VIRT_MROM] = { 0x1000,0xf000 },
 [VIRT_TEST] = {   0x10,0x1000 },
@@ -96,6 +98,7 @@ static const MemMapEntry virt_memmap[] = {
 [VIRT_PCIE_ECAM] ={ 0x3000,0x1000 },
 [VIRT_PCIE_MMIO] ={ 0x4000,0x4000 },
 [VIRT_DRAM] = { 0x8000,   0x0 },
+[VIRT_DRAM_HIGH] ={ 0x1,  0x0 },
 };
 
 /* PCIe high mmio is fixed for RV32 */
@@ -295,15 +298,12 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
socket,
 }
 }
 
-static void create_fdt_socket_memory(RISCVVirtState *s,
- const MemMapEntry *memmap, int socket)
+static void create_fdt_socket_mem_range(RISCVVirtState *s, uint64_t addr,
+uint64_t size, int socket)
 {
 char *mem_name;
-uint64_t addr, size;
 MachineState *ms = MACHINE(s);
 
-addr = memmap[VIRT_DRAM].base + riscv_socket_mem_offset(ms, socket);
-size = riscv_socket_mem_size(ms, socket);
 mem_name = g_strdup_printf("/memory@%lx", (long)addr);
 qemu_fdt_add_subnode(ms->fdt, mem_name);
 qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
@@ -313,6 +313,34 @@ static void create_fdt_socket_memory(RISCVVirtState *s,
 g_free(mem_name);
 }
 
+static void create_fdt_socket_memory(RISCVVirtState *s,
+ const MemMapEntry *memmap, int socket)
+{
+uint64_t addr, size;
+MachineState *mc = MACHINE(s);
+uint64_t sock_offset = riscv_socket_mem_offset(mc, socket);
+uint64_t sock_size = riscv_socket_mem_size(mc, socket);
+
+if (sock_offset < memmap[VIRT_DRAM].size) {
+uint64_t low_mem_end = memmap[VIRT_DRAM].base + memmap[VIRT_DRAM].size;
+
+addr = memmap[VIRT_DRAM].base + sock_offset;
+size = MIN(low_mem_end - addr, sock_size);
+create_fdt_socket_mem_range(s, addr, size, socket);
+
+size = sock_size - size;
+if (size > 0) {
+create_fdt_socket_mem_range(s, memmap[VIRT_DRAM_HIGH].base,
+size, socket);
+}
+} else {
+addr = memmap[VIRT_DRAM_HIGH].base +
+   sock_offset - memmap[VIRT_DRAM].size;
+
+create_fdt_socket_mem_range(s, addr, sock_size, socket);
+}
+}
+
 static void create_fdt_socket_clint(RISCVVirtState *s,
 const MemMapEntry *memmap, int socket,
 uint32_t *intc_phandles)
@@ -1334,10 +1362,12 @@ static void virt_machine_done(Notifier *notifier, void 
*data)
 
 static void virt_machine_init(MachineState *machine)
 {
-const MemMapEntry *memmap = virt_memmap;
+MemMapEntry *memmap = virt_memmap;
 RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
 MemoryRegion *system_memory = get_system_memory();
 MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
+MemoryRegion *ram_below_4g, *ram_above_4g;
+uint64_t ram_size_low, ram_size_high;
 char *soc_name;
 DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip;
 int i, base_hartid, hart_count;
@@ -1448,6 +1478,17 @@ static void virt_machine_init(MachineState *machine)
 }
 }
 
+if (machine->ram_size > LOW_MEM) {
+ram_size_high = machine->ram_size - LOW_MEM;
+ram_size_low = LOW_MEM;
+} else {
+ram_size_high = 0;
+ram_size_low = machine->ram_size;
+}
+
+memmap[VIRT_DRAM].size = ram_size_low;
+memmap[VIRT_DRAM_HIGH].size = ram_size_high;
+
 if (riscv_is_32bit(&s->soc[0])) {
 #if HOST_LONG_BITS == 64
 /* limit RAM size in a 32-bit system */
@@ -1460,7 +1501,8 @@ static void virt_machine_init(MachineState *machine)
 virt_high_pcie_memmap.size = VIRT32_HIGH_PCIE_MMIO_SIZE;
 } 

[PATCH v16 5/9] tb-stats: reset the tracked TBs on a tb_flush

2023-06-28 Thread Fei Wu
From: Alex Bennée 

We keep track of translations but can only do so up until the
translation cache is flushed. At that point we really have no idea if
we can re-create a translation because all the active tracking
information has been reset.

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-maint.c|  1 +
 accel/tcg/tb-stats.c| 18 ++
 include/exec/tb-stats.h |  8 
 3 files changed, 27 insertions(+)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 264bdd84b3..1ebe6fc60e 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -762,6 +762,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
tb_flush_count)
 qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 tb_remove_all();
 
+tbstats_reset_tbs();
 tcg_region_reset_all();
 /* XXX: flush processor icache at this point if cache flush is expensive */
 qatomic_inc(&tb_ctx.tb_flush_count);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index c90dde37d0..7c7f700c89 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -103,6 +103,24 @@ void clean_tbstats(void)
 qht_destroy(&tb_ctx.tb_stats);
 }
 
+/*
+ * We have to reset the tbs array on a tb_flush as those
+ * TranslationBlocks no longer exist.
+ */
+
+static void reset_tbs_array(void *p, uint32_t hash, void *userp)
+{
+TBStatistics *tbs = p;
+g_ptr_array_set_size(tbs->tbs, 0);
+}
+
+void tbstats_reset_tbs(void)
+{
+if (tb_ctx.tb_stats.map) {
+qht_iter(&tb_ctx.tb_stats, reset_tbs_array, NULL);
+}
+}
+
 void init_tb_stats_htable(void)
 {
 if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index ef6e8b6388..cef177bc69 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -94,4 +94,12 @@ void dump_jit_profile_info(GString *buf);
 
 void clean_tbstats(void);
 
+/**
+ * tbstats_reset_tbs: reset the linked array of TBs
+ *
+ * Reset the list of tbs for a given array. Should be called from
+ * safe work during tb_flush.
+ */
+void tbstats_reset_tbs(void);
+
 #endif
-- 
2.25.1




[PATCH v16 6/9] tb-stats: Adding info [tb-list|tb] commands to HMP

2023-06-28 Thread Fei Wu
From: "Vanderson M. do Rosario" 

These commands allow the exploration of TBs generated by the TCG.
Understand which one hotter, with more guest/host instructions... and
examine their guest code.

The goal of this command is to allow the dynamic exploration of TCG
behavior and code quality. Therefore, for now, a corresponding QMP
command is not worthwhile.

Example of output:

--

TB id:0 | phys:0xa21f562e virt:0x flags:0x00028010 0 inv/1
| exec:6171503732/0 guest inst cov:94.77%
| trans:1 ints: g:8 op:28 op_opt:24 spills:0
| h/g (host bytes / guest insts): 37.00

0xa21f562e:  2797  auipc a5,8192   # 0xa21f762e
0xa21f5632:  a2278793  addi  a5,a5,-1502
0xa21f5636:  639c  lda5,0(a5)
0xa21f5638:  00178713  addi  a4,a5,1
0xa21f563c:  2797  auipc a5,8192   # 0xa21f763c
0xa21f5640:  a1478793  addi  a5,a5,-1516
0xa21f5644:  e398  sda4,0(a5)
0xa21f5646:  b7e5  j -24   # 0xa21f562e

--

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-10-vanderson...@gmail.com>
[AJB: fix authorship, dropped coverset]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   | 112 
 accel/tcg/tb-stats.c  | 177 ++
 disas/disas-mon.c |  15 +++-
 disas/disas.c |   2 +
 hmp-commands-info.hx  |  16 
 include/disas/disas.h |   8 +-
 include/exec/tb-stats.h   |  25 ++
 include/monitor/hmp.h |   2 +
 monitor/hmp-cmds-target.c |   3 +-
 9 files changed, 356 insertions(+), 4 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 60b66f16ff..0ce5a1cb45 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -8,18 +8,22 @@
 
 #include "qemu/osdep.h"
 #include "qemu/accel.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
 #include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
 #include "monitor/hmp.h"
+#include "monitor/hmp-target.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
 #include "exec/tb-stats.h"
 #include "exec/tb-flush.h"
+#include "disas/disas.h"
+#include "tb-context.h"
 #include "internal.h"
 #include "tb-context.h"
 
@@ -185,6 +189,114 @@ void hmp_tbstats(Monitor *mon, const QDict *qdict)
 
 }
 
+struct tblist_dump_info {
+int count;
+int sortedby;
+Monitor *mon;
+};
+
+static void do_dump_tblist_info_safe(CPUState *cpu, run_on_cpu_data info)
+{
+struct tblist_dump_info *tbdi = info.host_ptr;
+g_autoptr(GString) buf = g_string_new("");
+
+dump_tblist_info(buf, tbdi->count, tbdi->sortedby);
+monitor_printf(tbdi->mon, "%s", buf->str);
+
+g_free(tbdi);
+}
+
+void hmp_info_tblist(Monitor *mon, const QDict *qdict)
+{
+int number_int;
+const char *sortedby_str = NULL;
+
+if (!tcg_enabled()) {
+monitor_printf(mon, "Only available with accel=tcg\n");
+return;
+}
+if (!tb_ctx.tb_stats.map) {
+monitor_printf(mon, "no TB information recorded\n");
+return;
+}
+
+number_int = qdict_get_try_int(qdict, "number", 10);
+sortedby_str = qdict_get_try_str(qdict, "sortedby");
+
+int sortedby = SORT_BY_HOTNESS;
+if (sortedby_str == NULL || strcmp(sortedby_str, "hotness") == 0) {
+sortedby = SORT_BY_HOTNESS;
+} else if (strcmp(sortedby_str, "hg") == 0) {
+sortedby = SORT_BY_HG;
+} else if (strcmp(sortedby_str, "spills") == 0) {
+sortedby = SORT_BY_SPILLS;
+} else {
+monitor_printf(mon, "valid sort options are: hotness hg spills\n");
+return;
+}
+
+struct tblist_dump_info *tbdi = g_new(struct tblist_dump_info, 1);
+tbdi->count = number_int;
+tbdi->sortedby = sortedby;
+tbdi->mon = mon;
+async_safe_run_on_cpu(first_cpu, do_dump_tblist_info_safe,
+  RUN_ON_CPU_HOST_PTR(tbdi));
+}
+
+struct tb_dump_info {
+int id;
+Monitor *mon;
+};
+
+static void do_dump_tb_info_safe(CPUState *cpu, run_on_cpu_data info)
+{
+struct tb_dump_info *tbdi = info.host_ptr;
+int id = tbdi->id;
+Monitor *mon = tbdi->mon;
+g_autoptr(GString) buf = g_string_new("");
+
+TBStatistics *tbs = get_tbstats_by_id(id);
+if (tbs == NULL) {
+monitor_printf(mon, "TB %d information is not recorded\n", 

[PATCH v16 2/9] accel: collecting TB execution count

2023-06-28 Thread Fei Wu
From: "Vanderson M. do Rosario" 

If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

[Richard Henderson created the inline gen_tb_exec_count]

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-3-vanderson...@gmail.com>
[AJB: Fix author]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/cpu-exec.c|  6 ++
 accel/tcg/tb-stats.c| 18 ++
 accel/tcg/tcg-runtime.c |  1 +
 accel/tcg/translate-all.c   |  6 --
 accel/tcg/translator.c  | 26 ++
 include/exec/tb-stats-flags.h   |  6 ++
 include/exec/tb-stats.h |  7 +++
 include/tcg/tcg-temp-internal.h |  2 ++
 8 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index ba1890a373..ba8ac61069 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
@@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
 mmap_unlock();
 }
 
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+tb->tb_stats->executions.atomic++;
+}
+
 cpu_exec_enter(cpu);
+
 /* execute the generated code */
 trace_exec_tb(tb, pc);
 cpu_tb_exec(cpu, tb, &tb_exit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 2b6be943ef..2b7321b548 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -20,6 +20,7 @@ enum TBStatsStatus {
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
+static uint32_t tbstats_flag;
 
 void init_tb_stats_htable(void)
 {
@@ -44,3 +45,20 @@ bool tb_stats_collection_enabled(void)
 {
 return tcg_collect_tb_stats == TB_STATS_RUNNING;
 }
+
+uint32_t get_tbstats_flag(void)
+{
+return tbstats_flag;
+}
+
+void set_tbstats_flag(uint32_t flag)
+{
+tbstats_flag = flag;
+}
+
+bool tb_stats_enabled(TranslationBlock *tb, uint32_t flag)
+{
+return tb_stats_collection_enabled() &&
+   tb->tb_stats &&
+   (tbstats_flag & flag);
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index 9fa539ad3d..14d684c2ee 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,6 +27,7 @@
 #include "exec/helper-proto-common.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5f47b862d8..d7a35e0ea5 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -388,9 +388,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 /*
  * We want to fetch the stats structure before we start code
  * generation so we can count interesting things about this
- * generation.
+ * generation. If dfilter is in effect we will only collect stats
+ * for the specified range.
  */
-if (tb_stats_collection_enabled()) {
+if (tb_stats_collection_enabled() &&
+qemu_log_in_addr_range(tb->pc)) {
 tb->tb_stats = tb_get_stats(phys_pc, cflags & CF_PCREL ? 0 : pc,
 cs_base, flags);
 } else {
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 0fd9efceba..4743fa7d01 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -10,10 +10,12 @@
 #include "qemu/osdep.h"
 #include "qemu/log.h"
 #include "qemu/error-report.h"
+#include "tcg/tcg-temp-internal.h"
 #include "exec/exec-all.h"
 #include "exec/translator.h"
 #include "exec/translate-all.h"
 #include "exec/plugin-gen.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg-op-common.h"
 
 static void gen_io_start(void)
@@ -117,6 +119,29 @@ static void gen_tb_end(const TranslationBlock *tb, 
uint32_t cflags,
 }
 }
 
+static void gen_tb_exec_count(TranslationBlock *tb)
+{
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
+
+tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal);
+if (sizeof(tb->tb_stats->executions.normal) == 4) {
+TCGv_i32 t = tcg_temp_ebb_new_i32();
+  

[PATCH v16 4/9] monitor: adding tb_stats hmp command

2023-06-28 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Adding tb_stats [start|stop|status] command to hmp.
This allows controlling the collection of statistics.
It is also possible to set the flag of collection:
all, jit, or exec.

The goal of this command is to allow the dynamic exploration
of the TCG behavior and quality. Therefore, for now, a
corresponding QMP command is not worthwhile.

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-8-vanderson...@gmail.com>
Message-Id: <20190829173437.5926-9-vanderson...@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   | 98 +++
 accel/tcg/tb-stats.c  | 17 ++
 hmp-commands.hx   | 16 ++
 include/exec/tb-stats-flags.h |  1 +
 include/exec/tb-stats.h   |  4 ++
 include/monitor/hmp.h |  1 +
 6 files changed, 137 insertions(+)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 567950a7ed..60b66f16ff 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -11,14 +11,19 @@
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
 #include "exec/tb-stats.h"
+#include "exec/tb-flush.h"
 #include "internal.h"
+#include "tb-context.h"
 
+enum TbstatsCmd { TBS_CMD_START, TBS_CMD_STOP, TBS_CMD_STATUS };
 
 static void dump_drift_info(GString *buf)
 {
@@ -87,6 +92,99 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
+struct TbstatsCommand {
+enum TbstatsCmd cmd;
+uint32_t flag;
+Monitor *mon;
+};
+
+static void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
+{
+struct TbstatsCommand *cmdinfo = icmd.host_ptr;
+int cmd = cmdinfo->cmd;
+uint32_t flag = cmdinfo->flag;
+Monitor *mon = cmdinfo->mon;
+
+switch (cmd) {
+case TBS_CMD_START:
+if (tb_stats_collection_enabled()) {
+monitor_printf(mon, "TB information already being recorded\n");
+break;
+}
+
+set_tbstats_flag(flag);
+enable_collect_tb_stats();
+tb_flush(cpu);
+break;
+case TBS_CMD_STOP:
+if (tb_stats_collection_disabled()) {
+monitor_printf(mon, "TB information not being recorded\n");
+break;
+}
+
+/* Dissalloc all TBStatistics structures and stop creating new ones */
+disable_collect_tb_stats();
+clean_tbstats();
+tb_flush(cpu);
+break;
+case TBS_CMD_STATUS:
+if (tb_stats_collection_enabled()) {
+uint32_t flag = get_tbstats_flag();
+monitor_printf(mon, "tb_stats is enabled with flag:\n");
+monitor_printf(mon, "EXEC: %d\n", !!(flag & TB_EXEC_STATS));
+monitor_printf(mon, " JIT: %d\n", !!(flag & TB_JIT_STATS));
+} else {
+monitor_printf(mon, "tb_stats is disabled\n");
+}
+break;
+default: /* INVALID */
+g_assert_not_reached();
+break;
+}
+
+g_free(cmdinfo);
+}
+
+void hmp_tbstats(Monitor *mon, const QDict *qdict)
+{
+if (!tcg_enabled()) {
+monitor_printf(mon, "Only available with accel=tcg\n");
+return;
+}
+
+char *cmd = (char *) qdict_get_try_str(qdict, "command");
+enum TbstatsCmd icmd = -1;
+
+if (strcmp(cmd, "start") == 0) {
+icmd = TBS_CMD_START;
+} else if (strcmp(cmd, "stop") == 0) {
+icmd = TBS_CMD_STOP;
+} else if (strcmp(cmd, "status") == 0) {
+icmd = TBS_CMD_STATUS;
+} else {
+monitor_printf(mon, "Invalid command\n");
+return;
+}
+
+char *sflag = (char *) qdict_get_try_str(qdict, "flag");
+uint32_t flag = TB_EXEC_STATS | TB_JIT_STATS;
+if (sflag) {
+if (strcmp(sflag, "jit") == 0) {
+flag = TB_JIT_STATS;
+} else if (strcmp(sflag, "exec") == 0) {
+flag = TB_EXEC_STATS;
+}
+}
+
+struct TbstatsCommand *tbscommand = g_new0(struct TbstatsCommand, 1);
+tbscommand->cmd = icmd;
+tbscommand->flag = flag;
+tbscommand->mon = mon;
+async_safe_run_on_cpu(first_cpu, do_hmp_tbstats_safe,
+  RUN_ON_CPU_HOST_PTR(tbscommand));
+
+}
+
 static void hmp_tcg_register(void)
 {
 monitor_register_hmp_info_hrt("jit", qmp_x_query_jit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
in

[PATCH v16 9/9] docs: add tb-stats how to

2023-06-28 Thread Fei Wu
Signed-off-by: Fei Wu 
---
 docs/devel/index-tcg.rst   |   1 +
 docs/devel/tcg-tbstats.rst | 126 +
 2 files changed, 127 insertions(+)
 create mode 100644 docs/devel/tcg-tbstats.rst

diff --git a/docs/devel/index-tcg.rst b/docs/devel/index-tcg.rst
index b44ff8b5a4..8d2832737f 100644
--- a/docs/devel/index-tcg.rst
+++ b/docs/devel/index-tcg.rst
@@ -14,4 +14,5 @@ are only implementing things for HW accelerated hypervisors.
multi-thread-tcg
tcg-icount
tcg-plugins
+   tcg-tbstats
replay
diff --git a/docs/devel/tcg-tbstats.rst b/docs/devel/tcg-tbstats.rst
new file mode 100644
index 00..3940a3cb9d
--- /dev/null
+++ b/docs/devel/tcg-tbstats.rst
@@ -0,0 +1,126 @@
+
+TBStatistics
+
+
+What is TBStatistics
+
+
+TBStatistics (tb_stats) is a tool to gather various internal information of TCG
+during binary translation, this allows us to identify such as hottest TBs,
+guest to host instruction translation ratio, number of spills during register
+allocation and more.
+
+What does TBStatistics collect
+===
+
+TBStatistics mainly collects the following stats:
+
+* TB exec stats, e.g. the execution count of each TB
+* TB jit stats, e.g. guest insn count, tcg ops, tcg spill etc.
+* opcount of each instruction, use 'info opcount' to show it
+
+
+How to use TBStatistics
+===
+
+1. HMP interface
+
+
+TBStatistics provides HMP interface, you can try the following examples after
+connecting to the monitor.
+
+* First check the help info::
+
+(qemu) help tb_stats
+tb_stats command [flag] -- Control tb statistics collection:tb_stats 
(start|stop|status) [all|jit|exec]
+
+(qemu) help info tb-list
+info tb-list [number sortedby] -- show a [number] translated blocks sorted 
by [sortedby]sortedby opts: hotness hg spills
+
+(qemu) help info tb
+info tb id [flag1,flag2,...] -- show information about one translated 
block by id.dump flags can be used to set dump code level: out_asm in_asm op
+
+* Enable TBStatistics::
+
+(qemu) tb_stats start all
+(qemu)
+
+* Get interested TB list::
+
+(qemu) info tb-list 2
+TB id:0 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 0 inv/1
+| exec:1464084/0 guest inst cov:0.15%
+| trans:1 inst: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+
+TB id:1 | phys:0x2adf0c virt:0x800adf0c flags:0x01024001 0 inv/1
+| exec:1033298/0 guest inst cov:0.28%
+| trans:1 inst: g:8 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 86.00
+
+* Dive into the specific TB::
+
+(qemu) info tb 0
+--
+
+TB id:0 | phys:0x63474e virt:0x flags:0x01028001 0 inv/1
+| exec:131719290/0 guest inst cov:8.44%
+| trans:1 ints: g:9 op:36 op_opt:34 spills:0
+| h/g (host bytes / guest insts): 51.57
+
+0x0063474e:  00194a83  lbu s5,1(s2)
+0x00634752:  00094803  lbu a6,0(s2)
+0x00634756:  0b09  addis6,s6,2
+0x00634758:  008a9a9b  slliw   s5,s5,8
+0x0063475c:  01586833  or  a6,a6,s5
+0x00634760:  ff0b1f23  sh  a6,-2(s6)
+0x00634764:  1c7d  addis8,s8,-1
+0x00634766:  0909  addis2,s2,2
+0x00634768:  fe0c13e3  bnezs8,-26  
# 0x63474e
+
+--
+
+* Stop TBStatistics after investigation, this will disable TBStatistics 
completely.::
+
+(qemu) tb_stats stop
+(qemu)
+
+* Definitely, TBStatistics can be restarted for another round of 
investigation.::
+
+(qemu) tb_stats start all
+(qemu)
+
+
+2. Start TBStatistics with command line
+---
+
+If you don't want to missing anything at guest starting, command line option is
+provided to start TBStatistics at start:::
+
+-d tb_stats_{all,jit,exec}
+
+
+3. Dump hottest at exit
+---
+
+TBStatistics is able to dump hottest TB information at exit as follows:::
+
+-d tb_stats_{all,jit,exec}[:dump_num_at_exit]
+
+e.g. starting qemu like this:::
+
+-d tb_stats_all:2
+
+QEMU prints the following at exit:::
+
+TB id:0 | phys:0x242b8 virt:0x000242b8 flags:0x00024078 0 inv/0
+| exec:1161/0 guest inst cov:10.36%
+| trans:1 ints: g:2 op:20 op_opt:19 spills:0
+| h/g (host bytes / guest insts): 61.50
+
+TB id:1 | phys:0x242be virt:0x000242be flags:0x00024078 0 inv/0
+| exec:1161/0 guest inst cov:10.36%
+| trans:1 ints: g:2 op:20 op_opt:18 spills:0
+| h/g (host byte

[PATCH v16 8/9] tb-stats: dump hot TBs at the end of the execution

2023-06-28 Thread Fei Wu
Dump the hottest TBs if -d tb_stats_{all,jit,exec}[:dump_num_at_exit]
---
 accel/tcg/tb-stats.c  | 21 +
 include/exec/tb-stats-dump.h  | 21 +
 include/exec/tb-stats-flags.h |  1 +
 linux-user/exit.c |  2 ++
 softmmu/runstate.c|  2 ++
 stubs/tb-stats.c  |  4 
 util/log.c| 12 ++--
 7 files changed, 61 insertions(+), 2 deletions(-)
 create mode 100644 include/exec/tb-stats-dump.h

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 73f0a25781..1b038b7b85 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -17,6 +17,7 @@
 #include "qemu/log.h"
 
 #include "exec/tb-stats.h"
+#include "exec/tb-stats-dump.h"
 #include "tb-context.h"
 
 #include "internal.h"
@@ -29,6 +30,7 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t tbstats_flag;
+static int max_dump_tbs;
 
 static GPtrArray *last_search;
 
@@ -300,6 +302,20 @@ void dump_tblist_info(GString *buf, int total, int sort_by)
 }
 }
 
+/*
+ * Dump the final stats
+ */
+void tb_stats_dump(void)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+g_autoptr(GString) buf = g_string_new("");
+dump_tblist_info(buf, max_dump_tbs, SORT_BY_HOTNESS);
+qemu_printf("%s", buf->str);
+}
+
 void enable_collect_tb_stats(void)
 {
 tcg_collect_tb_stats = TB_STATS_RUNNING;
@@ -343,3 +359,8 @@ bool tbs_stats_enabled(struct TBStatistics *tbs, uint32_t 
flag)
 return tb_stats_collection_enabled() &&
(tbstats_flag & flag);
 }
+
+void set_tbstats_max_tbs(int max)
+{
+max_dump_tbs = max;
+}
diff --git a/include/exec/tb-stats-dump.h b/include/exec/tb-stats-dump.h
new file mode 100644
index 00..197c6148e9
--- /dev/null
+++ b/include/exec/tb-stats-dump.h
@@ -0,0 +1,21 @@
+/*
+ * TB Stats common dump functions across sysemu/linux-user
+ *
+ * Copyright (c) 2019 Linaro
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef _TB_STATS_DUMP_H_
+#define _TB_STATS_DUMP_H_
+
+/**
+ * tb_stats_dump: dump final tb_stats at end of execution
+ */
+#ifdef CONFIG_TCG
+void tb_stats_dump(void);
+#else
+static inline void tb_stats_dump(void) { /* do nothing */ };
+#endif
+
+#endif /* _TB_STATS_DUMP_H_ */
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index 936bd35707..52d0429eeb 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -24,5 +24,6 @@ bool tb_stats_collection_disabled(void);
 
 uint32_t get_tbstats_flag(void);
 void set_tbstats_flag(uint32_t flag);
+void set_tbstats_max_tbs(int max);
 
 #endif
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 3017d28a3c..4fd23bcf60 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -25,6 +25,7 @@
 #ifdef CONFIG_GPROF
 #include 
 #endif
+#include "exec/tb-stats-dump.h"
 
 #ifdef CONFIG_GCOV
 extern void __gcov_dump(void);
@@ -41,4 +42,5 @@ void preexit_cleanup(CPUArchState *env, int code)
 gdb_exit(code);
 qemu_plugin_user_exit();
 perf_exit();
+tb_stats_dump();
 }
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index a9fbcf4862..74cc7430c6 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -30,6 +30,7 @@
 #include "crypto/cipher.h"
 #include "crypto/init.h"
 #include "exec/cpu-common.h"
+#include "exec/tb-stats-dump.h"
 #include "gdbstub/syscalls.h"
 #include "hw/boards.h"
 #include "migration/misc.h"
@@ -817,6 +818,7 @@ void qemu_cleanup(void)
 vm_shutdown();
 replay_finish();
 
+tb_stats_dump();
 job_cancel_sync_all();
 bdrv_close_all();
 
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
index 38ee12313e..78ae1aa217 100644
--- a/stubs/tb-stats.c
+++ b/stubs/tb-stats.c
@@ -30,3 +30,7 @@ void set_tbstats_flag(uint32_t flag)
 {
 return;
 }
+
+void set_tbstats_max_tbs(int max) {
+return;
+}
diff --git a/util/log.c b/util/log.c
index 59c781de36..3c15fcac01 100644
--- a/util/log.c
+++ b/util/log.c
@@ -498,8 +498,9 @@ const QEMULogItem qemu_log_items[] = {
   "open a separate log file per thread; filename must contain '%d'" },
 { CPU_LOG_TB_VPU, "vpu",
   "include VPU registers in the 'cpu' logging" },
-{ CPU_LOG_TB_STATS, "tb_stats_{all,jit,exec}",
-  "enable collection of TBs statistics at startup" },
+{ CPU_LOG_TB_STATS, "tb_stats_{all,jit,exec}[:dump_num_at_exit]",
+  "enable collection of TBs statistics at startup "
+  "and dump at exit until given a limit" },
 { 0, NULL, NULL },
 };
 
@@ -526,16 +527,23 @@ int qemu_str_to_log_mask(const char *str)
 uint32_t flag = TB_NONE_STATS;
 if (g_str_has_prefix(p, "all")) {
 flag = TB_ALL_STATS;
+p += 3;
 } else if (g_str_has_prefix(p, "jit")) {
 flag = TB_JIT_STATS;
+p += 3;
 } else if (g_str_has_prefix(p, "exec")) {
 flag = TB_EXEC_STATS;
+p += 4;

[PATCH v16 7/9] debug: add -d tb_stats to control TBStatistics

2023-06-28 Thread Fei Wu
Capture TBS at startup instead of an explicit 'tb_stats start' command.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 include/exec/tb-stats-flags.h |  1 +
 include/qemu/log.h|  1 +
 stubs/meson.build |  1 +
 stubs/tb-stats.c  | 32 
 util/log.c| 18 ++
 5 files changed, 53 insertions(+)
 create mode 100644 stubs/tb-stats.c

diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index d8b844be99..936bd35707 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -14,6 +14,7 @@
 #define TB_NONE_STATS (0)  /* no stats */
 #define TB_EXEC_STATS (1 << 0)
 #define TB_JIT_STATS  (1 << 1)
+#define TB_ALL_STATS  (TB_EXEC_STATS | TB_JIT_STATS)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
diff --git a/include/qemu/log.h b/include/qemu/log.h
index df59bfabcd..eca10f02a3 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -36,6 +36,7 @@ bool qemu_log_separate(void);
 #define LOG_STRACE (1 << 19)
 #define LOG_PER_THREAD (1 << 20)
 #define CPU_LOG_TB_VPU (1 << 21)
+#define CPU_LOG_TB_STATS   (1 << 22)
 
 /* Lock/unlock output. */
 
diff --git a/stubs/meson.build b/stubs/meson.build
index a56645e2f7..e926649d40 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -65,3 +65,4 @@ else
 endif
 stub_ss.add(files('semihost-all.c'))
 stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: 
files('vfio-user-obj.c'))
+stub_ss.add(files('tb-stats.c'))
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
new file mode 100644
index 00..38ee12313e
--- /dev/null
+++ b/stubs/tb-stats.c
@@ -0,0 +1,32 @@
+/*
+ * TB Stats Stubs
+ *
+ * Copyright (c) 2019
+ * Written by Alex Bennée 
+ *
+ * This code is licensed under the GNU GPL v2, or later.
+ */
+
+
+#include "qemu/osdep.h"
+#include "exec/tb-stats-flags.h"
+
+void enable_collect_tb_stats(void)
+{
+return;
+}
+
+void disable_collect_tb_stats(void)
+{
+return;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+return false;
+}
+
+void set_tbstats_flag(uint32_t flag)
+{
+return;
+}
diff --git a/util/log.c b/util/log.c
index def88a9402..59c781de36 100644
--- a/util/log.c
+++ b/util/log.c
@@ -27,6 +27,7 @@
 #include "qemu/thread.h"
 #include "qemu/lockable.h"
 #include "qemu/rcu.h"
+#include "exec/tb-stats-flags.h"
 #ifdef CONFIG_LINUX
 #include 
 #endif
@@ -497,6 +498,8 @@ const QEMULogItem qemu_log_items[] = {
   "open a separate log file per thread; filename must contain '%d'" },
 { CPU_LOG_TB_VPU, "vpu",
   "include VPU registers in the 'cpu' logging" },
+{ CPU_LOG_TB_STATS, "tb_stats_{all,jit,exec}",
+  "enable collection of TBs statistics at startup" },
 { 0, NULL, NULL },
 };
 
@@ -518,6 +521,21 @@ int qemu_str_to_log_mask(const char *str)
 trace_enable_events((*tmp) + 6);
 mask |= LOG_TRACE;
 #endif
+} else if (g_str_has_prefix(*tmp, "tb_stats_")) {
+char *p = *tmp + 9;
+uint32_t flag = TB_NONE_STATS;
+if (g_str_has_prefix(p, "all")) {
+flag = TB_ALL_STATS;
+} else if (g_str_has_prefix(p, "jit")) {
+flag = TB_JIT_STATS;
+} else if (g_str_has_prefix(p, "exec")) {
+flag = TB_EXEC_STATS;
+}
+if (flag != TB_NONE_STATS) {
+mask |= CPU_LOG_TB_STATS;
+set_tbstats_flag(flag);
+enable_collect_tb_stats();
+}
 } else {
 for (item = qemu_log_items; item->mask != 0; item++) {
 if (g_str_equal(*tmp, item->name)) {
-- 
2.25.1




[PATCH v16 3/9] accel/tcg: add jit stats to TBStatistics

2023-06-28 Thread Fei Wu
This collects all the statistics for TBStatistics, not only for the
whole emulation but for each TB.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   |  6 +++
 accel/tcg/tb-stats.c  | 69 +++
 accel/tcg/translate-all.c | 31 ++--
 accel/tcg/translator.c|  5 +++
 include/exec/tb-stats-flags.h |  1 +
 include/exec/tb-stats.h   | 30 +++
 include/tcg/tcg.h | 10 +
 tcg/tcg.c | 47 ++--
 8 files changed, 193 insertions(+), 6 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index d48de23999..567950a7ed 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -16,6 +16,7 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
+#include "exec/tb-stats.h"
 #include "internal.h"
 
 
@@ -70,6 +71,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
 
+if (!tb_stats_collection_enabled()) {
+error_setg(errp, "TB information not being recorded");
+return NULL;
+}
+
 if (!tcg_enabled()) {
 error_setg(errp,
"Opcode count information is only available with 
accel=tcg");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 2b7321b548..11322359c7 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -9,6 +9,10 @@
 #include "qemu/osdep.h"
 
 #include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -22,6 +26,71 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t tbstats_flag;
 
+struct jit_profile_info {
+uint64_t translations;
+uint64_t aborted;
+uint64_t ops;
+unsigned ops_max;
+uint64_t del_ops;
+uint64_t temps;
+unsigned temps_max;
+uint64_t host;
+uint64_t guest;
+uint64_t search_data;
+};
+
+#define stat_per_translation(stat, name) \
+(stat->translations.total ? stat->name / stat->translations.total : 0)
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+struct jit_profile_info *jpi = userp;
+TBStatistics *tbs = p;
+
+jpi->translations += tbs->translations.total;
+jpi->ops += tbs->code.num_tcg_ops;
+if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+}
+jpi->del_ops += tbs->code.deleted_ops;
+jpi->temps += tbs->code.temps;
+if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+jpi->temps_max = stat_per_translation(tbs, code.temps);
+}
+jpi->host += tbs->code.out_len;
+jpi->guest += tbs->code.in_len;
+jpi->search_data += tbs->code.search_out_len;
+}
+
+/* dump JIT statistics using TBStats */
+void dump_jit_profile_info(GString *buf)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+struct jit_profile_info *jpi = g_new0(struct jit_profile_info, 1);
+
+qht_iter(&tb_ctx.tb_stats, collect_jit_profile_info, jpi);
+
+if (jpi->translations) {
+g_string_append_printf(buf, "translated TBs  %" PRId64 "\n",
+jpi->translations);
+g_string_append_printf(buf, "avg ops/TB  %0.1f max=%d\n",
+jpi->ops / (double) jpi->translations, jpi->ops_max);
+g_string_append_printf(buf, "deleted ops/TB  %0.2f\n",
+jpi->del_ops / (double) jpi->translations);
+g_string_append_printf(buf, "avg temps/TB%0.2f max=%d\n",
+jpi->temps / (double) jpi->translations, jpi->temps_max);
+g_string_append_printf(buf, "avg host code/TB%0.1f\n",
+jpi->host / (double) jpi->translations);
+g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
+jpi->search_data / (double) jpi->translations);
+}
+g_free(jpi);
+}
+
 void init_tb_stats_htable(void)
 {
 if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index d7a35e0ea5..4f1d3b4612 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -266,7 +266,7 @@ void page_init(void)
  */
 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
vaddr pc, void *host_pc,
-   int *max_insns, int64_t *ti)
+   

[PATCH v16 1/9] accel/tcg: introduce TBStatistics structure

2023-06-28 Thread Fei Wu
From: "Vanderson M. do Rosario" 

To store statistics for each TB, we created a TBStatistics structure
which is linked with the TBs. TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

The goal is to have all present and future qemu/tcg statistics and
meta-data stored in this new structure.

Reviewed-by: Alex Bennée 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-2-vanderson...@gmail.com>
[AJB: fix git author, review comments]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 MAINTAINERS  |  1 +
 accel/tcg/meson.build|  1 +
 accel/tcg/tb-context.h   |  1 +
 accel/tcg/tb-hash.h  |  7 
 accel/tcg/tb-maint.c | 19 +++
 accel/tcg/tb-stats.c | 46 ++
 accel/tcg/translate-all.c| 44 +
 include/exec/tb-stats-flags.h| 19 +++
 include/exec/tb-stats.h  | 56 
 include/exec/translation-block.h |  3 ++
 10 files changed, 197 insertions(+)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h

diff --git a/MAINTAINERS b/MAINTAINERS
index e07746ac7d..f11c58f371 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -153,6 +153,7 @@ F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/tb-flush.h
 F: include/exec/target_long.h
+F: include/exec/tb-stats*.h
 F: include/exec/helper*.h
 F: include/exec/helper*.h.inc
 F: include/exec/helper-info.c.inc
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index 166bef173b..239120c933 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -4,6 +4,7 @@ tcg_ss.add(files(
   'cpu-exec-common.c',
   'cpu-exec.c',
   'tb-maint.c',
+  'tb-stats.c',
   'tcg-runtime-gvec.c',
   'tcg-runtime.c',
   'translate-all.c',
diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
index cac62d9749..d7910d586b 100644
--- a/accel/tcg/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
 /* statistics */
 unsigned tb_flush_count;
 unsigned tb_phys_invalidate_count;
+struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index a0c61f25cd..638fe58270 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -67,4 +67,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, vaddr pc,
 return qemu_xxhash8(phys_pc, pc, flags2, flags, cf_mask);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+uint32_t flags)
+{
+return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 3541419845..264bdd84b3 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -24,6 +24,7 @@
 #include "exec/log.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats.h"
 #include "exec/translate-all.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
@@ -41,6 +42,23 @@
 #define TB_FOR_EACH_JMP(head_tb, tb, n) \
 TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 
+/*
+ * This is the more or less the same compare as tb_cmp(), but the
+ * data persists over tb_flush. We also aggregate the various
+ * variations of cflags under one record and ignore the details of
+ * page overlap (although we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+const TBStatistics *a = ap;
+const TBStatistics *b = bp;
+
+return a->phys_pc == b->phys_pc &&
+a->pc == b->pc &&
+a->cs_base == b->cs_base &&
+a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
 const TranslationBlock *a = ap;
@@ -59,6 +77,7 @@ void tb_htable_init(void)
 unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
 qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+init_tb_stats_htable();
 }
 
 typedef struct PageDesc PageDesc;
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 00..2b6be943ef
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,46 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+
+#include "exec/tb-stats.h"
+#include "tb-context.h"
+
+/* TBStatistic collection controls */
+enum TBStatsStatus {
+TB_STATS_STOPPED = 0,
+TB_STATS_RUNNING
+};
+
+static enum TBStatsStatus tcg_

[PATCH v16 0/9] TCG code quality tracking

2023-06-28 Thread Fei Wu
v16
--
* rebase to latest commit 4329d049d (Jun 26)
* original patch 1 (remove CONFIG_PROFILER) has already upstreamed so is
  removed from here
* add async_safe_run_on_cpu back for info tb-list/tb
* add ram_addr support to monitor_disas and use it to disassemble guest code
* update the commit logs and documents
* small change to do_hmp_tbstats_safe() to avoid memory leaking


Alex Bennée (1):
  tb-stats: reset the tracked TBs on a tb_flush

Fei Wu (4):
  accel/tcg: add jit stats to TBStatistics
  debug: add -d tb_stats to control TBStatistics
  tb-stats: dump hot TBs at the end of the execution
  docs: add tb-stats how to

Vanderson M. do Rosario (4):
  accel/tcg: introduce TBStatistics structure
  accel: collecting TB execution count
  monitor: adding tb_stats hmp command
  tb-stats: Adding info [tb-list|tb] commands to HMP

 MAINTAINERS  |   1 +
 accel/tcg/cpu-exec.c |   6 +
 accel/tcg/meson.build|   1 +
 accel/tcg/monitor.c  | 216 ++
 accel/tcg/tb-context.h   |   1 +
 accel/tcg/tb-hash.h  |   7 +
 accel/tcg/tb-maint.c |  20 ++
 accel/tcg/tb-stats.c | 366 +++
 accel/tcg/tcg-runtime.c  |   1 +
 accel/tcg/translate-all.c|  77 ++-
 accel/tcg/translator.c   |  31 +++
 disas/disas-mon.c|  15 +-
 disas/disas.c|   2 +
 docs/devel/index-tcg.rst |   1 +
 docs/devel/tcg-tbstats.rst   | 126 +++
 hmp-commands-info.hx |  16 ++
 hmp-commands.hx  |  16 ++
 include/disas/disas.h|   8 +-
 include/exec/tb-stats-dump.h |  21 ++
 include/exec/tb-stats-flags.h|  29 +++
 include/exec/tb-stats.h  | 130 +++
 include/exec/translation-block.h |   3 +
 include/monitor/hmp.h|   3 +
 include/qemu/log.h   |   1 +
 include/tcg/tcg-temp-internal.h  |   2 +
 include/tcg/tcg.h|  10 +
 linux-user/exit.c|   2 +
 monitor/hmp-cmds-target.c|   3 +-
 softmmu/runstate.c   |   2 +
 stubs/meson.build|   1 +
 stubs/tb-stats.c |  36 +++
 tcg/tcg.c|  47 +++-
 util/log.c   |  26 +++
 33 files changed, 1217 insertions(+), 10 deletions(-)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 docs/devel/tcg-tbstats.rst
 create mode 100644 include/exec/tb-stats-dump.h
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h
 create mode 100644 stubs/tb-stats.c

-- 
2.25.1




[PATCH v15 06/10] tb-stats: reset the tracked TBs on a tb_flush

2023-06-07 Thread Fei Wu
From: Alex Bennée 

We keep track of translations but can only do so up until the
translation cache is flushed. At that point we really have no idea if
we can re-create a translation because all the active tracking
information has been reset.

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-maint.c|  1 +
 accel/tcg/tb-stats.c| 18 ++
 include/exec/tb-stats.h |  8 
 3 files changed, 27 insertions(+)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 0980fca358..11ff0ddd90 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -763,6 +763,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
tb_flush_count)
 qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 tb_remove_all();
 
+tbstats_reset_tbs();
 tcg_region_reset_all();
 /* XXX: flush processor icache at this point if cache flush is expensive */
 qatomic_inc(&tb_ctx.tb_flush_count);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index c90dde37d0..7c7f700c89 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -103,6 +103,24 @@ void clean_tbstats(void)
 qht_destroy(&tb_ctx.tb_stats);
 }
 
+/*
+ * We have to reset the tbs array on a tb_flush as those
+ * TranslationBlocks no longer exist.
+ */
+
+static void reset_tbs_array(void *p, uint32_t hash, void *userp)
+{
+TBStatistics *tbs = p;
+g_ptr_array_set_size(tbs->tbs, 0);
+}
+
+void tbstats_reset_tbs(void)
+{
+if (tb_ctx.tb_stats.map) {
+qht_iter(&tb_ctx.tb_stats, reset_tbs_array, NULL);
+}
+}
+
 void init_tb_stats_htable(void)
 {
 if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index e16f947410..77974b4b7f 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -94,4 +94,12 @@ void dump_jit_profile_info(GString *buf);
 
 void clean_tbstats(void);
 
+/**
+ * tbstats_reset_tbs: reset the linked array of TBs
+ *
+ * Reset the list of tbs for a given array. Should be called from
+ * safe work during tb_flush.
+ */
+void tbstats_reset_tbs(void);
+
 #endif
-- 
2.25.1




[PATCH v15 05/10] monitor: adding tb_stats hmp command

2023-06-07 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Adding tb_stats [start|stop|status] command to hmp.
This allows controlling the collection of statistics.
It is also possible to set the flag of collection:
all, jit, or exec.

The goal of this command is to allow the dynamic exploration
of the TCG behavior and quality. Therefore, for now, a
corresponding QMP command is not worthwhile.

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-8-vanderson...@gmail.com>
Message-Id: <20190829173437.5926-9-vanderson...@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   | 100 ++
 accel/tcg/tb-stats.c  |  17 ++
 hmp-commands.hx   |  16 ++
 include/exec/tb-stats-flags.h |   1 +
 include/exec/tb-stats.h   |   4 ++
 include/monitor/hmp.h |   1 +
 6 files changed, 139 insertions(+)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 111ccbc4da..094150e4af 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -11,13 +11,18 @@
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "exec/tb-stats.h"
+#include "exec/tb-flush.h"
 #include "internal.h"
+#include "tb-context.h"
 
+enum TbstatsCmd { TBS_CMD_START, TBS_CMD_STOP, TBS_CMD_STATUS };
 
 static void dump_drift_info(GString *buf)
 {
@@ -86,6 +91,101 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
+struct TbstatsCommand {
+enum TbstatsCmd cmd;
+uint32_t flag;
+Monitor *mon;
+};
+
+static void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
+{
+struct TbstatsCommand *cmdinfo = icmd.host_ptr;
+int cmd = cmdinfo->cmd;
+uint32_t flag = cmdinfo->flag;
+Monitor *mon = cmdinfo->mon;
+
+switch (cmd) {
+case TBS_CMD_START:
+if (tb_stats_collection_enabled()) {
+monitor_printf(mon, "TB information already being recorded\n");
+return;
+}
+
+qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
+ QHT_MODE_AUTO_RESIZE);
+set_tbstats_flag(flag);
+enable_collect_tb_stats();
+tb_flush(cpu);
+break;
+case TBS_CMD_STOP:
+if (tb_stats_collection_disabled()) {
+monitor_printf(mon, "TB information not being recorded\n");
+return;
+}
+
+/* Dissalloc all TBStatistics structures and stop creating new ones */
+disable_collect_tb_stats();
+clean_tbstats();
+tb_flush(cpu);
+break;
+case TBS_CMD_STATUS:
+if (tb_stats_collection_enabled()) {
+uint32_t flag = get_tbstats_flag();
+monitor_printf(mon, "tb_stats is enabled with flag:\n");
+monitor_printf(mon, "EXEC: %d\n", !!(flag & TB_EXEC_STATS));
+monitor_printf(mon, " JIT: %d\n", !!(flag & TB_JIT_STATS));
+} else {
+monitor_printf(mon, "tb_stats is disabled\n");
+}
+break;
+default: /* INVALID */
+g_assert_not_reached();
+break;
+}
+
+g_free(cmdinfo);
+}
+
+void hmp_tbstats(Monitor *mon, const QDict *qdict)
+{
+if (!tcg_enabled()) {
+monitor_printf(mon, "Only available with accel=tcg\n");
+return;
+}
+
+char *cmd = (char *) qdict_get_try_str(qdict, "command");
+enum TbstatsCmd icmd = -1;
+
+if (strcmp(cmd, "start") == 0) {
+icmd = TBS_CMD_START;
+} else if (strcmp(cmd, "stop") == 0) {
+icmd = TBS_CMD_STOP;
+} else if (strcmp(cmd, "status") == 0) {
+icmd = TBS_CMD_STATUS;
+} else {
+monitor_printf(mon, "Invalid command\n");
+return;
+}
+
+char *sflag = (char *) qdict_get_try_str(qdict, "flag");
+uint32_t flag = TB_EXEC_STATS | TB_JIT_STATS;
+if (sflag) {
+if (strcmp(sflag, "jit") == 0) {
+flag = TB_JIT_STATS;
+} else if (strcmp(sflag, "exec") == 0) {
+flag = TB_EXEC_STATS;
+}
+}
+
+struct TbstatsCommand *tbscommand = g_new0(struct TbstatsCommand, 1);
+tbscommand->cmd = icmd;
+tbscommand->flag = flag;
+tbscommand->mon = mon;
+async_safe_run_on_cpu(first_cpu, do_hmp_tbstats_safe,
+  RUN_ON_CPU_HOST_PTR(tbscommand));
+
+}
+
 static void hmp_tcg_register(void)
 {
 monitor_register_hmp_info_hrt(&q

[PATCH v15 04/10] accel/tcg: add jit stats to TBStatistics

2023-06-07 Thread Fei Wu
This collects all the statistics for TBStatistics, not only for the
whole emulation but for each TB.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   |  6 +++
 accel/tcg/tb-stats.c  | 69 +++
 accel/tcg/translate-all.c | 31 ++--
 accel/tcg/translator.c|  5 +++
 include/exec/tb-stats-flags.h |  1 +
 include/exec/tb-stats.h   | 29 +++
 include/tcg/tcg.h | 10 +
 tcg/tcg.c | 47 ++--
 8 files changed, 192 insertions(+), 6 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 39d8260428..111ccbc4da 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -15,6 +15,7 @@
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
+#include "exec/tb-stats.h"
 #include "internal.h"
 
 
@@ -69,6 +70,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
 
+if (!tb_stats_collection_enabled()) {
+error_setg(errp, "TB information not being recorded");
+return NULL;
+}
+
 if (!tcg_enabled()) {
 error_setg(errp,
"Opcode count information is only available with 
accel=tcg");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 2b7321b548..11322359c7 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -9,6 +9,10 @@
 #include "qemu/osdep.h"
 
 #include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -22,6 +26,71 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t tbstats_flag;
 
+struct jit_profile_info {
+uint64_t translations;
+uint64_t aborted;
+uint64_t ops;
+unsigned ops_max;
+uint64_t del_ops;
+uint64_t temps;
+unsigned temps_max;
+uint64_t host;
+uint64_t guest;
+uint64_t search_data;
+};
+
+#define stat_per_translation(stat, name) \
+(stat->translations.total ? stat->name / stat->translations.total : 0)
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+struct jit_profile_info *jpi = userp;
+TBStatistics *tbs = p;
+
+jpi->translations += tbs->translations.total;
+jpi->ops += tbs->code.num_tcg_ops;
+if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+}
+jpi->del_ops += tbs->code.deleted_ops;
+jpi->temps += tbs->code.temps;
+if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+jpi->temps_max = stat_per_translation(tbs, code.temps);
+}
+jpi->host += tbs->code.out_len;
+jpi->guest += tbs->code.in_len;
+jpi->search_data += tbs->code.search_out_len;
+}
+
+/* dump JIT statistics using TBStats */
+void dump_jit_profile_info(GString *buf)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+struct jit_profile_info *jpi = g_new0(struct jit_profile_info, 1);
+
+qht_iter(&tb_ctx.tb_stats, collect_jit_profile_info, jpi);
+
+if (jpi->translations) {
+g_string_append_printf(buf, "translated TBs  %" PRId64 "\n",
+jpi->translations);
+g_string_append_printf(buf, "avg ops/TB  %0.1f max=%d\n",
+jpi->ops / (double) jpi->translations, jpi->ops_max);
+g_string_append_printf(buf, "deleted ops/TB  %0.2f\n",
+jpi->del_ops / (double) jpi->translations);
+g_string_append_printf(buf, "avg temps/TB%0.2f max=%d\n",
+jpi->temps / (double) jpi->translations, jpi->temps_max);
+g_string_append_printf(buf, "avg host code/TB%0.1f\n",
+jpi->host / (double) jpi->translations);
+g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
+jpi->search_data / (double) jpi->translations);
+}
+g_free(jpi);
+}
+
 void init_tb_stats_htable(void)
 {
 if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 5963cd6fe4..f795385aac 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -267,7 +267,7 @@ void page_init(void)
  */
 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
target_ulong pc, void *host_pc,
-   int *max_insns, int64_t *ti)
+   

[PATCH v15 10/10] docs: add tb-stats how to

2023-06-07 Thread Fei Wu
Signed-off-by: Fei Wu 
---
 docs/devel/tcg-tbstats.rst | 97 ++
 1 file changed, 97 insertions(+)
 create mode 100644 docs/devel/tcg-tbstats.rst

diff --git a/docs/devel/tcg-tbstats.rst b/docs/devel/tcg-tbstats.rst
new file mode 100644
index 00..dd611e41f3
--- /dev/null
+++ b/docs/devel/tcg-tbstats.rst
@@ -0,0 +1,97 @@
+
+TBStatistics
+
+
+What is TBStatistics
+
+
+TBStatistics (tb_stats) is a tool to gather various internal information of TCG
+during binary translation, this allows us to identify such as hottest TBs,
+guest to host instruction translation ratio, number of spills during register
+allocation and more.
+
+
+How to use TBStatistics
+===
+
+1. HMP interface
+
+
+TBStatistics provides HMP interface, you can try the following examples after
+connecting to the monitor.
+
+* First check the help info::
+
+(qemu) help tb_stats
+tb_stats command [flag] -- Control tb statistics collection:tb_stats 
(start|stop|status) [all|jit|exec]
+
+(qemu) help info tb-list
+info tb-list [number sortedby] -- show a [number] translated blocks sorted 
by [sortedby]sortedby opts: hotness hg spills
+
+(qemu) help info tb
+info tb id [flag1,flag2,...] -- show information about one translated 
block by id.dump flags can be used to set dump code level: out_asm in_asm op
+
+* Enable TBStatistics::
+
+(qemu) tb_stats start all
+(qemu)
+
+* Get interested TB list::
+
+(qemu) info tb-list 2
+TB id:0 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 0 inv/1
+| exec:1464084/0 guest inst cov:0.15%
+| trans:1 ints: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+
+TB id:1 | phys:0x2adf0c virt:0x800adf0c flags:0x01024001 0 inv/1
+| exec:1033298/0 guest inst cov:0.28%
+| trans:1 ints: g:8 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 86.00
+
+* Dive into the specific TB::
+
+(qemu) info tb 1
+--
+
+TB id:0 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 7 inv/19
+| exec:2038349/0 guest inst cov:0.15%
+| trans:19 ints: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+
+--
+
+* Stop TBStatistics after investigation, this will disable TBStatistics 
completely.::
+
+(qemu) tb_stats stop
+(qemu)
+
+* Definitely, TBStatistics can be restarted for another round of 
investigation.::
+
+(qemu) tb_stats start all
+(qemu)
+
+
+2. Dump at exit
+---
+
+New command line options have been added to enable dump TB information at 
exit:::
+
+-d tb_stats_{all,jit,exec}[:dump_num_at_exit]
+
+e.g. starting qemu like this:::
+
+-d tb_stats_all:2
+
+Qemu prints the following at exit:::
+
+QEMU: Terminated
+TB id:1 | phys:0x61be02 virt:0x8041be02 flags:0x01024001 0 inv/1
+| exec:72739176/0 guest inst cov:20.22%
+| trans:1 ints: g:9 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 51.57
+
+TB id:2 | phys:0x61bc66 virt:0x8041bc66 flags:0x01024001 0 inv/1
+| exec:25069507/0 guest inst cov:0.77%
+| trans:1 ints: g:1 op:15 op_opt:14 spills:0
+| h/g (host bytes / guest insts): 128.00
-- 
2.25.1




[PATCH v15 01/10] accel/tcg: remove CONFIG_PROFILER

2023-06-07 Thread Fei Wu
TBStats will be introduced to replace CONFIG_PROFILER totally, here
remove all CONFIG_PROFILER related stuffs first.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
Reviewed-by: Richard Henderson 
---
 accel/tcg/monitor.c   |  31 -
 accel/tcg/tcg-accel-ops.c |  10 --
 accel/tcg/translate-all.c |  33 --
 hmp-commands-info.hx  |  15 ---
 include/qemu/timer.h  |   9 --
 include/tcg/tcg.h |  26 -
 meson.build   |   2 -
 meson_options.txt |   2 -
 qapi/machine.json |  18 ---
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c|   9 --
 tcg/tcg.c | 214 --
 tests/qtest/qmp-cmd-test.c|   3 -
 13 files changed, 375 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 92fce580f1..39d8260428 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -80,37 +80,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-g_autoptr(GString) buf = g_string_new("");
-static int64_t last_cpu_exec_time;
-int64_t cpu_exec_time;
-int64_t delta;
-
-cpu_exec_time = tcg_cpu_exec_time();
-delta = cpu_exec_time - last_cpu_exec_time;
-
-g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
-g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-   delta, delta / (double)NANOSECONDS_PER_SECOND);
-last_cpu_exec_time = cpu_exec_time;
-dev_time = 0;
-
-return human_readable_text_from_str(buf);
-}
-#else
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-error_setg(errp, "Internal profiler not compiled");
-return NULL;
-}
-#endif
-
 static void hmp_tcg_register(void)
 {
 monitor_register_hmp_info_hrt("jit", qmp_x_query_jit);
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 58c8e64096..3973591508 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -70,20 +70,10 @@ void tcg_cpus_destroy(CPUState *cpu)
 int tcg_cpus_exec(CPUState *cpu)
 {
 int ret;
-#ifdef CONFIG_PROFILER
-int64_t ti;
-#endif
 assert(tcg_enabled());
-#ifdef CONFIG_PROFILER
-ti = profile_getclock();
-#endif
 cpu_exec_start(cpu);
 ret = cpu_exec(cpu);
 cpu_exec_end(cpu);
-#ifdef CONFIG_PROFILER
-qatomic_set(&tcg_ctx->prof.cpu_exec_time,
-tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
-#endif
 return ret;
 }
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index c87648b99e..4e035e0f79 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -203,10 +203,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
uintptr_t host_pc)
 {
 uint64_t data[TARGET_INSN_START_WORDS];
-#ifdef CONFIG_PROFILER
-TCGProfile *prof = &tcg_ctx->prof;
-int64_t ti = profile_getclock();
-#endif
 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 
 if (insns_left < 0) {
@@ -223,12 +219,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
 }
 
 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
-
-#ifdef CONFIG_PROFILER
-qatomic_set(&prof->restore_time,
-prof->restore_time + profile_getclock() - ti);
-qatomic_set(&prof->restore_count, prof->restore_count + 1);
-#endif
 }
 
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
@@ -291,13 +281,6 @@ static int setjmp_gen_code(CPUArchState *env, 
TranslationBlock *tb,
 tcg_ctx->cpu = NULL;
 *max_insns = tb->icount;
 
-#ifdef CONFIG_PROFILER
-qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
-qatomic_set(&tcg_ctx->prof.interm_time,
-tcg_ctx->prof.interm_time + profile_getclock() - *ti);
-*ti = profile_getclock();
-#endif
-
 return tcg_gen_code(tcg_ctx, tb, pc);
 }
 
@@ -311,9 +294,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tb_page_addr_t phys_pc;
 tcg_insn_unit *gen_code_buf;
 int gen_code_size, search_size, max_insns;
-#ifdef CONFIG_PROFILER
-TCGProfile *prof = &tcg_ctx->prof;
-#endif
 int64_t ti;
 void *host_pc;
 
@@ -365,12 +345,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
  tb_overflow:
 
-#ifdef CONFIG_PROFILER
-/* includes aborted translations because of exceptions */
-qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
-ti = profile_getclock();
-#endif
-
 trace_translate_block(tb, pc, tb->tc.ptr);
 
 gen_code_s

[PATCH v15 08/10] debug: add -d tb_stats to control TBStatistics

2023-06-07 Thread Fei Wu
Capture TBS at startup instead of an explicit 'tb_stats start' command.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 include/exec/tb-stats-flags.h |  1 +
 include/qemu/log.h|  1 +
 stubs/meson.build |  1 +
 stubs/tb-stats.c  | 32 
 util/log.c| 18 ++
 5 files changed, 53 insertions(+)
 create mode 100644 stubs/tb-stats.c

diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index d8b844be99..936bd35707 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -14,6 +14,7 @@
 #define TB_NONE_STATS (0)  /* no stats */
 #define TB_EXEC_STATS (1 << 0)
 #define TB_JIT_STATS  (1 << 1)
+#define TB_ALL_STATS  (TB_EXEC_STATS | TB_JIT_STATS)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
diff --git a/include/qemu/log.h b/include/qemu/log.h
index c5643d8dd5..6f3b8091cd 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -35,6 +35,7 @@ bool qemu_log_separate(void);
 /* LOG_STRACE is used for user-mode strace logging. */
 #define LOG_STRACE (1 << 19)
 #define LOG_PER_THREAD (1 << 20)
+#define CPU_LOG_TB_STATS   (1 << 21)
 
 /* Lock/unlock output. */
 
diff --git a/stubs/meson.build b/stubs/meson.build
index a56645e2f7..e926649d40 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -65,3 +65,4 @@ else
 endif
 stub_ss.add(files('semihost-all.c'))
 stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: 
files('vfio-user-obj.c'))
+stub_ss.add(files('tb-stats.c'))
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
new file mode 100644
index 00..38ee12313e
--- /dev/null
+++ b/stubs/tb-stats.c
@@ -0,0 +1,32 @@
+/*
+ * TB Stats Stubs
+ *
+ * Copyright (c) 2019
+ * Written by Alex Bennée 
+ *
+ * This code is licensed under the GNU GPL v2, or later.
+ */
+
+
+#include "qemu/osdep.h"
+#include "exec/tb-stats-flags.h"
+
+void enable_collect_tb_stats(void)
+{
+return;
+}
+
+void disable_collect_tb_stats(void)
+{
+return;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+return false;
+}
+
+void set_tbstats_flag(uint32_t flag)
+{
+return;
+}
diff --git a/util/log.c b/util/log.c
index 53b4f6c58e..419f02c380 100644
--- a/util/log.c
+++ b/util/log.c
@@ -27,6 +27,7 @@
 #include "qemu/thread.h"
 #include "qemu/lockable.h"
 #include "qemu/rcu.h"
+#include "exec/tb-stats-flags.h"
 #ifdef CONFIG_LINUX
 #include 
 #endif
@@ -495,6 +496,8 @@ const QEMULogItem qemu_log_items[] = {
   "log every user-mode syscall, its input, and its result" },
 { LOG_PER_THREAD, "tid",
   "open a separate log file per thread; filename must contain '%d'" },
+{ CPU_LOG_TB_STATS, "tb_stats_{all,jit,exec}",
+  "enable collection of TBs statistics at startup" },
 { 0, NULL, NULL },
 };
 
@@ -516,6 +519,21 @@ int qemu_str_to_log_mask(const char *str)
 trace_enable_events((*tmp) + 6);
 mask |= LOG_TRACE;
 #endif
+} else if (g_str_has_prefix(*tmp, "tb_stats_")) {
+char *p = *tmp + 9;
+uint32_t flag = TB_NONE_STATS;
+if (g_str_has_prefix(p, "all")) {
+flag = TB_ALL_STATS;
+} else if (g_str_has_prefix(p, "jit")) {
+flag = TB_JIT_STATS;
+} else if (g_str_has_prefix(p, "exec")) {
+flag = TB_EXEC_STATS;
+}
+if (flag != TB_NONE_STATS) {
+mask |= CPU_LOG_TB_STATS;
+set_tbstats_flag(flag);
+enable_collect_tb_stats();
+}
 } else {
 for (item = qemu_log_items; item->mask != 0; item++) {
 if (g_str_equal(*tmp, item->name)) {
-- 
2.25.1




[PATCH v15 02/10] accel/tcg: introduce TBStatistics structure

2023-06-07 Thread Fei Wu
From: "Vanderson M. do Rosario" 

To store statistics for each TB, we created a TBStatistics structure
which is linked with the TBs. TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

The goal is to have all present and future qemu/tcg statistics and
meta-data stored in this new structure.

Reviewed-by: Alex Bennée 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-2-vanderson...@gmail.com>
[AJB: fix git author, review comments]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 MAINTAINERS   |  1 +
 accel/tcg/meson.build |  1 +
 accel/tcg/tb-context.h|  1 +
 accel/tcg/tb-hash.h   |  7 +
 accel/tcg/tb-maint.c  | 19 
 accel/tcg/tb-stats.c  | 46 
 accel/tcg/translate-all.c | 44 +++
 include/exec/exec-all.h   |  3 ++
 include/exec/tb-stats-flags.h | 19 
 include/exec/tb-stats.h   | 56 +++
 10 files changed, 197 insertions(+)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 1c93ab0ee5..2077889efd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -153,6 +153,7 @@ F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/tb-flush.h
 F: include/exec/target_long.h
+F: include/exec/tb-stats*.h
 F: include/exec/helper*.h
 F: include/sysemu/cpus.h
 F: include/sysemu/tcg.h
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index aeb20a6ef0..9263bdde11 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -4,6 +4,7 @@ tcg_ss.add(files(
   'cpu-exec-common.c',
   'cpu-exec.c',
   'tb-maint.c',
+  'tb-stats.c',
   'tcg-runtime-gvec.c',
   'tcg-runtime.c',
   'translate-all.c',
diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
index cac62d9749..d7910d586b 100644
--- a/accel/tcg/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
 /* statistics */
 unsigned tb_flush_count;
 unsigned tb_phys_invalidate_count;
+struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 83dc610e4c..87d657a1c6 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -67,4 +67,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong 
pc, uint32_t flags,
 return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+uint32_t flags)
+{
+return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 991746f80f..0980fca358 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -24,6 +24,7 @@
 #include "exec/log.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats.h"
 #include "exec/translate-all.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
@@ -41,6 +42,23 @@
 #define TB_FOR_EACH_JMP(head_tb, tb, n) \
 TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 
+/*
+ * This is the more or less the same compare as tb_cmp(), but the
+ * data persists over tb_flush. We also aggregate the various
+ * variations of cflags under one record and ignore the details of
+ * page overlap (although we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+const TBStatistics *a = ap;
+const TBStatistics *b = bp;
+
+return a->phys_pc == b->phys_pc &&
+a->pc == b->pc &&
+a->cs_base == b->cs_base &&
+a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
 const TranslationBlock *a = ap;
@@ -60,6 +78,7 @@ void tb_htable_init(void)
 unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
 qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+init_tb_stats_htable();
 }
 
 typedef struct PageDesc PageDesc;
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 00..2b6be943ef
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,46 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+
+#include "exec/tb-stats.h"
+#include "tb-context.h"
+
+/* TBStatistic collection controls */
+enum TBStatsStatus {
+TB_STATS_STOPPED = 0,
+TB_STATS_RUNNING
+};
+
+static enum TBStatsStatus tcg_

[PATCH v15 09/10] tb-stats: dump hot TBs at the end of the execution

2023-06-07 Thread Fei Wu
Dump the hottest TBs if -d tb_stats_{all,jit,exec}[:dump_num_at_exit]
---
 accel/tcg/tb-stats.c  | 21 +
 include/exec/tb-stats-dump.h  | 21 +
 include/exec/tb-stats-flags.h |  1 +
 linux-user/exit.c |  2 ++
 softmmu/runstate.c|  2 ++
 stubs/tb-stats.c  |  4 
 util/log.c| 12 ++--
 7 files changed, 61 insertions(+), 2 deletions(-)
 create mode 100644 include/exec/tb-stats-dump.h

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 53a4f448dc..a5fc3c1c74 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -17,6 +17,7 @@
 #include "qemu/log.h"
 
 #include "exec/tb-stats.h"
+#include "exec/tb-stats-dump.h"
 #include "tb-context.h"
 
 #include "internal.h"
@@ -29,6 +30,7 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t tbstats_flag;
+static int max_dump_tbs;
 
 static GPtrArray *last_search;
 
@@ -299,6 +301,20 @@ void dump_tblist_info(GString *buf, int total, int sort_by)
 }
 }
 
+/*
+ * Dump the final stats
+ */
+void tb_stats_dump(void)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+g_autoptr(GString) buf = g_string_new("");
+dump_tblist_info(buf, max_dump_tbs, SORT_BY_HOTNESS);
+qemu_printf("%s", buf->str);
+}
+
 void enable_collect_tb_stats(void)
 {
 tcg_collect_tb_stats = TB_STATS_RUNNING;
@@ -342,3 +358,8 @@ bool tbs_stats_enabled(struct TBStatistics *tbs, uint32_t 
flag)
 return tb_stats_collection_enabled() &&
(tbstats_flag & flag);
 }
+
+void set_tbstats_max_tbs(int max)
+{
+max_dump_tbs = max;
+}
diff --git a/include/exec/tb-stats-dump.h b/include/exec/tb-stats-dump.h
new file mode 100644
index 00..197c6148e9
--- /dev/null
+++ b/include/exec/tb-stats-dump.h
@@ -0,0 +1,21 @@
+/*
+ * TB Stats common dump functions across sysemu/linux-user
+ *
+ * Copyright (c) 2019 Linaro
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef _TB_STATS_DUMP_H_
+#define _TB_STATS_DUMP_H_
+
+/**
+ * tb_stats_dump: dump final tb_stats at end of execution
+ */
+#ifdef CONFIG_TCG
+void tb_stats_dump(void);
+#else
+static inline void tb_stats_dump(void) { /* do nothing */ };
+#endif
+
+#endif /* _TB_STATS_DUMP_H_ */
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index 936bd35707..52d0429eeb 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -24,5 +24,6 @@ bool tb_stats_collection_disabled(void);
 
 uint32_t get_tbstats_flag(void);
 void set_tbstats_flag(uint32_t flag);
+void set_tbstats_max_tbs(int max);
 
 #endif
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 3017d28a3c..4fd23bcf60 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -25,6 +25,7 @@
 #ifdef CONFIG_GPROF
 #include 
 #endif
+#include "exec/tb-stats-dump.h"
 
 #ifdef CONFIG_GCOV
 extern void __gcov_dump(void);
@@ -41,4 +42,5 @@ void preexit_cleanup(CPUArchState *env, int code)
 gdb_exit(code);
 qemu_plugin_user_exit();
 perf_exit();
+tb_stats_dump();
 }
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index bd50062ed0..156156815b 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -30,6 +30,7 @@
 #include "crypto/cipher.h"
 #include "crypto/init.h"
 #include "exec/cpu-common.h"
+#include "exec/tb-stats-dump.h"
 #include "gdbstub/syscalls.h"
 #include "hw/boards.h"
 #include "migration/misc.h"
@@ -818,6 +819,7 @@ void qemu_cleanup(void)
 vm_shutdown();
 replay_finish();
 
+tb_stats_dump();
 job_cancel_sync_all();
 bdrv_close_all();
 
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
index 38ee12313e..78ae1aa217 100644
--- a/stubs/tb-stats.c
+++ b/stubs/tb-stats.c
@@ -30,3 +30,7 @@ void set_tbstats_flag(uint32_t flag)
 {
 return;
 }
+
+void set_tbstats_max_tbs(int max) {
+return;
+}
diff --git a/util/log.c b/util/log.c
index 419f02c380..ae5398fb69 100644
--- a/util/log.c
+++ b/util/log.c
@@ -496,8 +496,9 @@ const QEMULogItem qemu_log_items[] = {
   "log every user-mode syscall, its input, and its result" },
 { LOG_PER_THREAD, "tid",
   "open a separate log file per thread; filename must contain '%d'" },
-{ CPU_LOG_TB_STATS, "tb_stats_{all,jit,exec}",
-  "enable collection of TBs statistics at startup" },
+{ CPU_LOG_TB_STATS, "tb_stats_{all,jit,exec}[:dump_num_at_exit]",
+  "enable collection of TBs statistics at startup "
+  "and dump at exit until given a limit" },
 { 0, NULL, NULL },
 };
 
@@ -524,16 +525,23 @@ int qemu_str_to_log_mask(const char *str)
 uint32_t flag = TB_NONE_STATS;
 if (g_str_has_prefix(p, "all")) {
 flag = TB_ALL_STATS;
+p += 3;
 } else if (g_str_has_prefix(p, "jit")) {
 flag = TB_JIT_STATS;
+p += 3;
 } else if (g_str_has_prefix(p, "exec")) {
 flag = TB_EXEC_STATS;
+   

[PATCH v15 07/10] tb-stats: Adding info [tb-list|tb] commands to HMP (WIP)

2023-06-07 Thread Fei Wu
From: "Vanderson M. do Rosario" 

These commands allow the exploration of TBs generated by the TCG.
Understand which one hotter, with more guest/host instructions... and
examine their guest, host and IR code.

The goal of this command is to allow the dynamic exploration of TCG
behavior and code quality. Therefore, for now, a corresponding QMP
command is not worthwhile.

[AJB: WIP - we still can't be safely sure a translation will succeed]

Example of output:

TB id:0 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64

TB id:1 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00



0x00034d0d:  89 demovl %ebx, %esi
0x00034d0f:  26 8b 0e movl %es:(%esi), %ecx
0x00034d12:  26 f6 46 08 80   testb$0x80, %es:8(%esi)
0x00034d17:  75 3bjne  0x34d54

--

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-10-vanderson...@gmail.com>
[AJB: fix authorship, dropped coverset]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c |  69 
 accel/tcg/tb-stats.c| 176 
 disas/disas.c   |   2 +
 hmp-commands-info.hx|  16 
 include/exec/tb-stats.h |  25 ++
 include/monitor/hmp.h   |   2 +
 6 files changed, 290 insertions(+)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 094150e4af..d43c62b248 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -8,17 +8,21 @@
 
 #include "qemu/osdep.h"
 #include "qemu/accel.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
 #include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
 #include "monitor/hmp.h"
+#include "monitor/hmp-target.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "exec/tb-stats.h"
 #include "exec/tb-flush.h"
+#include "disas/disas.h"
+#include "tb-context.h"
 #include "internal.h"
 #include "tb-context.h"
 
@@ -186,6 +190,71 @@ void hmp_tbstats(Monitor *mon, const QDict *qdict)
 
 }
 
+void hmp_info_tblist(Monitor *mon, const QDict *qdict)
+{
+int number_int;
+const char *sortedby_str = NULL;
+g_autoptr(GString) buf = g_string_new("");
+
+if (!tcg_enabled()) {
+monitor_printf(mon, "Only available with accel=tcg\n");
+return;
+}
+if (!tb_ctx.tb_stats.map) {
+monitor_printf(mon, "no TB information recorded\n");
+return;
+}
+
+number_int = qdict_get_try_int(qdict, "number", 10);
+sortedby_str = qdict_get_try_str(qdict, "sortedby");
+
+int sortedby = SORT_BY_HOTNESS;
+if (sortedby_str == NULL || strcmp(sortedby_str, "hotness") == 0) {
+sortedby = SORT_BY_HOTNESS;
+} else if (strcmp(sortedby_str, "hg") == 0) {
+sortedby = SORT_BY_HG;
+} else if (strcmp(sortedby_str, "spills") == 0) {
+sortedby = SORT_BY_SPILLS;
+} else {
+monitor_printf(mon, "valid sort options are: hotness hg spills\n");
+return;
+}
+
+dump_tblist_info(buf, number_int, sortedby);
+monitor_printf(mon, "%s", buf->str);
+}
+
+void hmp_info_tb(Monitor *mon, const QDict *qdict)
+{
+const int id = qdict_get_int(qdict, "id");
+g_autoptr(GString) buf = g_string_new("");
+
+if (!tcg_enabled()) {
+monitor_printf(mon, "Only available with accel=tcg\n");
+return;
+}
+
+TBStatistics *tbs = get_tbstats_by_id(id);
+if (tbs == NULL) {
+monitor_printf(mon, "TB %d information is not recorded\n", id);
+return;
+}
+
+monitor_printf(mon, "\n--\n\n");
+
+int valid_tb_num = dump_tb_info(buf, tbs, id);
+monitor_printf(mon, "%s", buf->str);
+
+if (valid_tb_num > 0) {
+unsigned num_inst = tbs->code.num_guest_inst / tbs->translations.total;
+
+monitor_printf(mon, "\nn\n");
+// FIXME: cannot disas
+monitor_disas(mon, mon_get_cpu(mon), tbs->phys_pc, num_inst, true);
+monitor_printf(mon, "\n--\n\n");
+}
+}
+
 static void hmp_tcg_register(void)
 {
 monitor_register_hmp_info_hrt("

[PATCH v15 00/10] TCG code quality tracking

2023-06-07 Thread Fei Wu
v15
---
This is a large change:
* remove all time related stuffs, including cmd 'info profile'
* remove the per-TB flag, use global flag instead
* remove tb_stats pause/filter, but add status
* remove qemu_log changes, and use monitor_printf
* use array instead of list for sorting
* remove async_safe_run_on_cpu for cmd info tb-list & tb
* use monitor_disas instead of regenerate TB, but **doesn't work yet**
* other cleanups


Alex Bennée (1):
  tb-stats: reset the tracked TBs on a tb_flush

Fei Wu (5):
  accel/tcg: remove CONFIG_PROFILER
  accel/tcg: add jit stats to TBStatistics
  debug: add -d tb_stats to control TBStatistics
  tb-stats: dump hot TBs at the end of the execution
  docs: add tb-stats how to

Vanderson M. do Rosario (4):
  accel/tcg: introduce TBStatistics structure
  accel: collecting TB execution count
  monitor: adding tb_stats hmp command
  tb-stats: Adding info [tb-list|tb] commands to HMP (WIP)

 MAINTAINERS   |   1 +
 accel/tcg/cpu-exec.c  |   6 +
 accel/tcg/meson.build |   1 +
 accel/tcg/monitor.c   | 184 +++--
 accel/tcg/tb-context.h|   1 +
 accel/tcg/tb-hash.h   |   7 +
 accel/tcg/tb-maint.c  |  20 ++
 accel/tcg/tb-stats.c  | 365 ++
 accel/tcg/tcg-accel-ops.c |  10 -
 accel/tcg/tcg-runtime.c   |   1 +
 accel/tcg/translate-all.c | 110 ++
 accel/tcg/translator.c|  30 +++
 disas/disas.c |   2 +
 docs/devel/tcg-tbstats.rst|  97 +
 hmp-commands-info.hx  |  31 +--
 hmp-commands.hx   |  16 ++
 include/exec/exec-all.h   |   3 +
 include/exec/gen-icount.h |   1 +
 include/exec/tb-stats-dump.h  |  21 ++
 include/exec/tb-stats-flags.h |  29 +++
 include/exec/tb-stats.h   | 130 
 include/monitor/hmp.h |   3 +
 include/qemu/log.h|   1 +
 include/qemu/timer.h  |   9 -
 include/tcg/tcg.h |  26 +--
 linux-user/exit.c |   2 +
 meson.build   |   2 -
 meson_options.txt |   2 -
 qapi/machine.json |  18 --
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c|  11 +-
 stubs/meson.build |   1 +
 stubs/tb-stats.c  |  36 
 tcg/tcg.c | 237 +++---
 tests/qtest/qmp-cmd-test.c|   3 -
 util/log.c|  26 +++
 36 files changed, 1093 insertions(+), 353 deletions(-)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 docs/devel/tcg-tbstats.rst
 create mode 100644 include/exec/tb-stats-dump.h
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h
 create mode 100644 stubs/tb-stats.c

-- 
2.25.1




[PATCH v15 03/10] accel: collecting TB execution count

2023-06-07 Thread Fei Wu
From: "Vanderson M. do Rosario" 

If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

[Richard Henderson created the inline gen_tb_exec_count]

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-3-vanderson...@gmail.com>
[AJB: Fix author]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/cpu-exec.c  |  6 ++
 accel/tcg/tb-stats.c  | 18 ++
 accel/tcg/tcg-runtime.c   |  1 +
 accel/tcg/translate-all.c |  6 --
 accel/tcg/translator.c| 25 +
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats-flags.h |  6 ++
 include/exec/tb-stats.h   |  8 
 8 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 0e741960da..c0d8f26237 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
@@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
 mmap_unlock();
 }
 
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+tb->tb_stats->executions.atomic++;
+}
+
 cpu_exec_enter(cpu);
+
 /* execute the generated code */
 trace_exec_tb(tb, pc);
 cpu_tb_exec(cpu, tb, &tb_exit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 2b6be943ef..2b7321b548 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -20,6 +20,7 @@ enum TBStatsStatus {
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
+static uint32_t tbstats_flag;
 
 void init_tb_stats_htable(void)
 {
@@ -44,3 +45,20 @@ bool tb_stats_collection_enabled(void)
 {
 return tcg_collect_tb_stats == TB_STATS_RUNNING;
 }
+
+uint32_t get_tbstats_flag(void)
+{
+return tbstats_flag;
+}
+
+void set_tbstats_flag(uint32_t flag)
+{
+tbstats_flag = flag;
+}
+
+bool tb_stats_enabled(TranslationBlock *tb, uint32_t flag)
+{
+return tb_stats_collection_enabled() &&
+   tb->tb_stats &&
+   (tbstats_flag & flag);
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index e4e030043f..11c6192064 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,6 +27,7 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 4fc41a63b5..5963cd6fe4 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -382,9 +382,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 /*
  * We want to fetch the stats structure before we start code
  * generation so we can count interesting things about this
- * generation.
+ * generation. If dfilter is in effect we will only collect stats
+ * for the specified range.
  */
-if (tb_stats_collection_enabled()) {
+if (tb_stats_collection_enabled() &&
+qemu_log_in_addr_range(tb->pc)) {
 tb->tb_stats = tb_get_stats(phys_pc, cflags & CF_PCREL ? 0 : pc,
 cs_base, flags);
 } else {
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 6120ef2a92..80ffbfb455 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -11,6 +11,7 @@
 #include "qemu/error-report.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-temp-internal.h"
 #include "exec/exec-all.h"
 #include "exec/gen-icount.h"
 #include "exec/log.h"
@@ -18,6 +19,29 @@
 #include "exec/plugin-gen.h"
 #include "exec/replay-core.h"
 
+static void gen_tb_exec_count(TranslationBlock *tb)
+{
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
+
+tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal);
+if (sizeof(tb->tb_stats->executions.normal) == 4) {
+TCGv_i32 t = tcg_temp_ebb_new_i32();
+tcg_gen_ld_i32(t, ptr, 0);
+tcg_gen_addi_i32(t, t, 1);
+tcg_gen_st_i32(t, ptr, 0);
+tcg_temp_free_i32(t);
+} else {
+TCGv_i64 t = tcg_temp_ebb_new_i64()

[PATCH v14 05/10] debug: add -d tb_stats to control TBStatistics collection:

2023-05-30 Thread Fei Wu
From: "Vanderson M. do Rosario" 

 -d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=]]

"dump_limit" is used to limit the number of dumped TBStats in
linux-user mode.

[all+jit+exec+time] control the profilling level used
by the TBStats. Can be used as follow:

-d tb_stats
-d tb_stats,level=jit+time
-d tb_stats,dump_limit=15
...

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-7-vanderson...@gmail.com>
[AJB: fix authorship, reword title]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-stats.c  |  5 +
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats-flags.h |  3 +++
 include/exec/tb-stats.h   |  2 ++
 include/qemu/log.h|  1 +
 stubs/meson.build |  1 +
 stubs/tb-stats.c  | 27 +
 util/log.c| 37 +++
 8 files changed, 77 insertions(+)
 create mode 100644 stubs/tb-stats.c

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 78a3104c7f..68ac7d3f73 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -208,3 +208,8 @@ uint32_t get_default_tbstats_flag(void)
 {
 return default_tbstats_flag;
 }
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+default_tbstats_flag = flags;
+}
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 20e7835ff0..4372817951 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -3,6 +3,7 @@
 
 #include "exec/exec-all.h"
 #include "exec/tb-stats.h"
+#include "tb-stats-flags.h"
 
 /* Helpers for instruction counting code generation.  */
 
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index f29eff7576..04adaee8d9 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -15,6 +15,7 @@
 #define TB_EXEC_STATS (1 << 1)
 #define TB_JIT_STATS  (1 << 2)
 #define TB_JIT_TIME   (1 << 3)
+#define TB_ALL_STATS  (TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
@@ -24,5 +25,7 @@ bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
 uint32_t get_default_tbstats_flag(void);
+void set_default_tbstats_flag(uint32_t);
+void set_tbstats_flags(uint32_t flags);
 
 #endif
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index d93d42e085..72585c448a 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -31,6 +31,8 @@
 #include "exec/tb-stats-flags.h"
 #include "tcg/tcg.h"
 
+#include "exec/tb-stats-flags.h"
+
 #define tb_stats_enabled(tb, JIT_STATS) \
 (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
 
diff --git a/include/qemu/log.h b/include/qemu/log.h
index c5643d8dd5..6f3b8091cd 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -35,6 +35,7 @@ bool qemu_log_separate(void);
 /* LOG_STRACE is used for user-mode strace logging. */
 #define LOG_STRACE (1 << 19)
 #define LOG_PER_THREAD (1 << 20)
+#define CPU_LOG_TB_STATS   (1 << 21)
 
 /* Lock/unlock output. */
 
diff --git a/stubs/meson.build b/stubs/meson.build
index a56645e2f7..e926649d40 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -65,3 +65,4 @@ else
 endif
 stub_ss.add(files('semihost-all.c'))
 stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: 
files('vfio-user-obj.c'))
+stub_ss.add(files('tb-stats.c'))
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
new file mode 100644
index 00..d212c2a1fa
--- /dev/null
+++ b/stubs/tb-stats.c
@@ -0,0 +1,27 @@
+/*
+ * TB Stats Stubs
+ *
+ * Copyright (c) 2019
+ * Written by Alex Bennée 
+ *
+ * This code is licensed under the GNU GPL v2, or later.
+ */
+
+
+#include "qemu/osdep.h"
+#include "exec/tb-stats-flags.h"
+
+void enable_collect_tb_stats(void)
+{
+return;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+return false;
+}
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+return;
+}
diff --git a/util/log.c b/util/log.c
index 53b4f6c58e..7ae471d97c 100644
--- a/util/log.c
+++ b/util/log.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/qemu-print.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
@@ -27,6 +28,7 @@
 #include "qemu/thread.h"
 #include "qemu/lockable.h"
 #include "qemu/rcu.h"
+#include "exec/tb-stats-flags.h"
 #ifdef CONFIG_LINUX
 #include 
 #endif
@@ -47,6 +49,7 @@ static __thread Notifier qemu_log_thread_cleanup_notifier;
 int qemu_loglevel;
 static bool log_per_thread;
 static GArray *debug_regions;
+int32_t max_num_hot_tbs_to_dump;
 
 /* Returns true if qemu_log() will really write somewhere. */
 bool qemu

[PATCH v14 06/10] monitor: adding tb_stats hmp command

2023-05-30 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Adding tb_stats [start|pause|stop|filter] command to hmp.
This allows controlling the collection of statistics.
It is also possible to set the level of collection:
all, jit, or exec.

tb_stats filter allow to only collect statistics for the TB
in the last_search list.

The goal of this command is to allow the dynamic exploration
of the TCG behavior and quality. Therefore, for now, a
corresponding QMP command is not worthwhile.

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-8-vanderson...@gmail.com>
Message-Id: <20190829173437.5926-9-vanderson...@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   |  45 +
 accel/tcg/tb-stats.c  | 121 +-
 hmp-commands.hx   |  16 +
 include/exec/tb-stats-flags.h |   2 +
 include/exec/tb-stats.h   |  10 +++
 include/monitor/hmp.h |   1 +
 6 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 2bc87f2642..2e00f10267 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -11,7 +11,9 @@
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
@@ -87,6 +89,49 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 }
 
 #ifdef CONFIG_TCG
+void hmp_tbstats(Monitor *mon, const QDict *qdict)
+{
+if (!tcg_enabled()) {
+error_report("TB information is only available with accel=tcg");
+return;
+}
+
+char *cmd = (char *) qdict_get_try_str(qdict, "command");
+enum TbstatsCmd icmd = -1;
+
+if (strcmp(cmd, "start") == 0) {
+icmd = START;
+} else if (strcmp(cmd, "pause") == 0) {
+icmd = PAUSE;
+} else if (strcmp(cmd, "stop") == 0) {
+icmd = STOP;
+} else if (strcmp(cmd, "filter") == 0) {
+icmd = FILTER;
+} else {
+error_report("invalid command!");
+return;
+}
+
+char *slevel = (char *) qdict_get_try_str(qdict, "level");
+uint32_t level = TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME;
+if (slevel) {
+if (strcmp(slevel, "jit") == 0) {
+level = TB_JIT_STATS;
+} else if (strcmp(slevel, "exec") == 0) {
+level = TB_EXEC_STATS;
+} else if (strcmp(slevel, "time") == 0) {
+level = TB_JIT_TIME;
+}
+}
+
+struct TbstatsCommand *tbscommand = g_new0(struct TbstatsCommand, 1);
+tbscommand->cmd = icmd;
+tbscommand->level = level;
+async_safe_run_on_cpu(first_cpu, do_hmp_tbstats_safe,
+  RUN_ON_CPU_HOST_PTR(tbscommand));
+
+}
+
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 68ac7d3f73..805e1fc74d 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -16,18 +16,20 @@
 #include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
+#include "exec/tb-flush.h"
 #include "tb-context.h"
 
 /* TBStatistic collection controls */
 enum TBStatsStatus {
 TB_STATS_DISABLED = 0,
 TB_STATS_RUNNING,
-TB_STATS_PAUSED,
-TB_STATS_STOPPED
+TB_STATS_PAUSED
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+/* only accessed in safe work */
+static GList *last_search;
 
 uint64_t dev_time;
 
@@ -170,6 +172,101 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
 g_free(jpi);
 }
 
+static void free_tbstats(void *p, uint32_t hash, void *userp)
+{
+g_free(p);
+}
+
+static void clean_tbstats(void)
+{
+/* remove all tb_stats */
+qht_iter(&tb_ctx.tb_stats, free_tbstats, NULL);
+qht_destroy(&tb_ctx.tb_stats);
+}
+
+void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
+{
+struct TbstatsCommand *cmdinfo = icmd.host_ptr;
+int cmd = cmdinfo->cmd;
+uint32_t level = cmdinfo->level;
+
+switch (cmd) {
+case START:
+if (tb_stats_collection_enabled()) {
+qemu_printf("TB information already being recorded");
+return;
+} else if (tb_stats_collection_paused()) {
+set_tbstats_flags(level);
+} else {
+qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
+ QHT_MODE_AUTO_RESIZE);
+}
+
+set_default_tbstats_flag(level);
+enable_collect_tb_stats();
+  

[PATCH v14 09/10] tb-stats: dump hot TBs at the end of the execution

2023-05-30 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Dump the hottest TBs if -d tb_stats,dump_limit=N is used.

Example of output for the 3 hottest TBs:

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
| targets: 0x00034d5e (id:11), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)

TB id:3 | phys:0xec1c1 virt:0x000ec1c1 flags:0xb0
| exec:872032/0 guest inst cov:1.97%
| trans:1 ints: g:2 op:56 op_opt:26 spills:1
| h/g (host bytes / guest insts): 68.00
| time to gen at 2.4GHz => code:1692.08(ns) IR:473.75(ns)
| targets: 0x000ec1c5 (id:4), 0x000ec1cb (id:13)

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-12-vanderson...@gmail.com>
[AJB: fix authorship, ad softmmu support]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-stats.c  | 21 +
 include/exec/tb-stats-dump.h  | 21 +
 include/exec/tb-stats-flags.h |  1 +
 linux-user/exit.c |  2 ++
 softmmu/runstate.c|  2 ++
 stubs/tb-stats.c  |  5 +
 util/log.c|  4 ++--
 7 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 include/exec/tb-stats-dump.h

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 4b358cb421..d77891492a 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -19,6 +19,7 @@
 
 #include "exec/tb-stats.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats-dump.h"
 #include "tb-context.h"
 
 #include "internal.h"
@@ -32,6 +33,7 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+static int max_dump_tbs;
 /* only accessed in safe work */
 static GList *last_search;
 
@@ -616,6 +618,20 @@ void dump_tb_info(int id, int log_mask, bool use_monitor)
 }
 
 
+/*
+ * Dump the final stats
+ */
+void tb_stats_dump(void)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+dump_tbs_info(max_dump_tbs, SORT_BY_HOTNESS, false);
+}
+
+/* TBStatistic collection controls */
+
 void enable_collect_tb_stats(void)
 {
 tcg_collect_tb_stats = TB_STATS_RUNNING;
@@ -669,3 +685,8 @@ void set_default_tbstats_flag(uint32_t flags)
 {
 default_tbstats_flag = flags;
 }
+
+void set_tbstats_max_tbs(int max)
+{
+max_dump_tbs = max;
+}
diff --git a/include/exec/tb-stats-dump.h b/include/exec/tb-stats-dump.h
new file mode 100644
index 00..197c6148e9
--- /dev/null
+++ b/include/exec/tb-stats-dump.h
@@ -0,0 +1,21 @@
+/*
+ * TB Stats common dump functions across sysemu/linux-user
+ *
+ * Copyright (c) 2019 Linaro
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef _TB_STATS_DUMP_H_
+#define _TB_STATS_DUMP_H_
+
+/**
+ * tb_stats_dump: dump final tb_stats at end of execution
+ */
+#ifdef CONFIG_TCG
+void tb_stats_dump(void);
+#else
+static inline void tb_stats_dump(void) { /* do nothing */ };
+#endif
+
+#endif /* _TB_STATS_DUMP_H_ */
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index a3897c99b1..484a4c9c68 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -26,6 +26,7 @@ bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_disabled(void);
 bool tb_stats_collection_paused(void);
 
+void set_tbstats_max_tbs(int max);
 uint32_t get_default_tbstats_flag(void);
 void set_default_tbstats_flag(uint32_t);
 void set_tbstats_flags(uint32_t flags);
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 3017d28a3c..4fd23bcf60 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -25,6 +25,7 @@
 #ifdef CONFIG_GPROF
 #include 
 #endif
+#include "exec/tb-stats-dump.h"
 
 #ifdef CONFIG_GCOV
 extern void __gcov_dump(void);
@@ -41,4 +42,5 @@ void preexit_cleanup(CPUArchState *env, int code)
 gdb_exit(code);
 qemu_plugin_user_exit();
 perf_exit();
+tb_stats_dump();
 }
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 37390799f1..b5ceb55ffd 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -30,6 +30,7 @@
 #include "crypto/cipher.h"
 #include "crypto/init.h"
 #include "exec/cpu-common.h"
+#include "exec/tb-stats-dump.h"
 #include "gdbstub/syscalls.h"
 #include "hw/boards.h"
 #include "mi

[PATCH v14 10/10] docs: add tb-stats how to

2023-05-30 Thread Fei Wu
Signed-off-by: Fei Wu 
---
 docs/devel/tcg-tbstats.rst | 129 +
 1 file changed, 129 insertions(+)
 create mode 100644 docs/devel/tcg-tbstats.rst

diff --git a/docs/devel/tcg-tbstats.rst b/docs/devel/tcg-tbstats.rst
new file mode 100644
index 00..bfba222e96
--- /dev/null
+++ b/docs/devel/tcg-tbstats.rst
@@ -0,0 +1,129 @@
+
+TBStatistics
+
+
+What is TBStatistics
+
+
+TBStatistics (tb_stats) is a tool to gather various internal information of TCG
+during binary translation, this allows us to identify such as hottest TBs,
+guest to host instruction translation ratio, number of spills during register
+allocation and more.
+
+
+How to use TBStatistics
+===
+
+1. HMP interface
+
+
+TBStatistics provides HMP interface, you can try the following examples after
+connecting to the monitor.
+
+* First check the help info::
+
+(qemu) help tb_stats
+tb_stats command [stats_level] -- Control tb statistics 
collection:tb_stats (start|pause|stop|filter) [all|jit_stats|exec_stats]
+
+(qemu) help info tb-list
+info tb-list [number sortedby] -- show a [number] translated blocks sorted 
by [sortedby]sortedby opts: hotness hg spills
+
+(qemu) help info tb
+info tb id [flag1,flag2,...] -- show information about one translated 
block by id.dump flags can be used to set dump code level: out_asm in_asm op
+
+* Enable TBStatistics::
+
+(qemu) tb_stats start all
+(qemu)
+
+* Get interested TB list::
+
+(qemu) info tb-list 2
+TB id:1 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 0 inv/1
+| exec:1464084/0 guest inst cov:0.15%
+| trans:1 ints: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+| time to gen at 2.4GHz => code:607.08(ns) IR:250.83(ns)
+
+TB id:2 | phys:0x2adf0c virt:0x800adf0c flags:0x01024001 0 inv/1
+| exec:1033298/0 guest inst cov:0.28%
+| trans:1 ints: g:8 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 86.00
+| time to gen at 2.4GHz => code:1429.58(ns) IR:545.42(ns)
+
+* Dive into the specific TB::
+
+(qemu) info tb 1 op
+--
+
+TB id:1 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 7 inv/19
+| exec:2038349/0 guest inst cov:0.15%
+| trans:19 ints: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+| time to gen at 2.4GHz => code:133434.17(ns) IR:176988.33(ns)
+
+OP:
+ ld_i32 loc1,env,$0xfff0
+ brcond_i32 loc1,$0x0,lt,$L0
+ mov_i64 loc3,$0x7f3c70b3a4e0
+ call inc_exec_freq,$0x1,$0,loc3
+
+  8059bca0 6422
+ add_i64 loc5,x2/sp,$0x8
+ qemu_ld_i64 x8/s0,loc5,un+leq,1
+
+  8059bca2 
+ add_i64 x2/sp,x2/sp,$0x10
+
+  8059bca4 
+ mov_i64 pc,x1/ra
+ and_i64 pc,pc,$0xfffe
+ call lookup_tb_ptr,$0x6,$1,tmp9,env
+ goto_ptr tmp9
+ set_label $L0
+ exit_tb $0x7f3e887a8043
+
+--
+
+* Stop TBStatistics after investigation, this will disable TBStatistics 
completely.::
+
+(qemu) tb_stats stop
+(qemu)
+
+* Alternatively, TBStatistics can be paused, the previous collected 
TBStatistics
+  are not cleared but there is no TBStatistics recorded for new TBs.::
+
+(qemu) tb_stats pause
+(qemu)
+
+* Definitely, TBStatistics can be restarted for another round of 
investigation.::
+
+(qemu) tb_stats start all
+(qemu)
+
+
+2. Dump at exit
+---
+
+New command line options have been added to enable dump TB information at 
exit:::
+
+-d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=]]
+
+e.g. starting qemu like this:::
+
+-d tb_stats,dump_limit=2
+
+Qemu prints the following at exit:::
+
+QEMU: Terminated
+TB id:1 | phys:0x61be02 virt:0x8041be02 flags:0x01024001 0 inv/1
+| exec:72739176/0 guest inst cov:20.22%
+| trans:1 ints: g:9 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 51.57
+| time to gen at 2.4GHz => code:1065.42(ns) IR:554.17(ns)
+
+TB id:2 | phys:0x61bc66 virt:0x8041bc66 flags:0x01024001 0 inv/1
+| exec:25069507/0 guest inst cov:0.77%
+| trans:1 ints: g:1 op:15 op_opt:14 spills:0
+| h/g (host bytes / guest insts): 128.00
+| time to gen at 2.4GHz => code:312.50(ns) IR:152.08(ns)
-- 
2.25.1




[PATCH v14 04/10] accel/tcg: add jit stats and time to TBStatistics

2023-05-30 Thread Fei Wu
This collects all the statistics for TBStatistics, not only for the
whole emulation but for each TB.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   |  20 -
 accel/tcg/tb-stats.c  | 146 ++
 accel/tcg/tcg-accel-ops.c |   7 ++
 accel/tcg/translate-all.c |  70 +++-
 accel/tcg/translator.c|   7 +-
 include/exec/tb-stats-flags.h |   2 +
 include/exec/tb-stats.h   |  46 +++
 include/qemu/timer.h  |   6 ++
 include/tcg/tcg.h |  28 ++-
 softmmu/runstate.c|   9 +++
 tcg/tcg.c |  88 ++--
 tests/qtest/qmp-cmd-test.c|   3 +
 12 files changed, 417 insertions(+), 15 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index e903dd1d2e..2bc87f2642 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -15,6 +15,7 @@
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
+#include "exec/tb-stats.h"
 #include "internal.h"
 
 
@@ -69,6 +70,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
 
+if (!tb_stats_collection_enabled()) {
+error_setg(errp, "TB information not being recorded.");
+return NULL;
+}
+
 if (!tcg_enabled()) {
 error_setg(errp,
"Opcode count information is only available with 
accel=tcg");
@@ -80,11 +86,23 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
+#ifdef CONFIG_TCG
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+g_autoptr(GString) buf = g_string_new("");
+
+dump_jit_exec_time_info(dev_time, buf);
+dev_time = 0;
+
+return human_readable_text_from_str(buf);
+}
+#else
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
-error_setg(errp, "Internal profiler not compiled");
+error_setg(errp, "TCG should be enabled!");
 return NULL;
 }
+#endif
 
 static void hmp_tcg_register(void)
 {
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 143a52ef5c..78a3104c7f 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -9,6 +9,11 @@
 #include "qemu/osdep.h"
 
 #include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
+#include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -24,6 +29,147 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
 
+uint64_t dev_time;
+
+struct jit_profile_info {
+uint64_t translations;
+uint64_t aborted;
+uint64_t ops;
+unsigned ops_max;
+uint64_t del_ops;
+uint64_t temps;
+unsigned temps_max;
+uint64_t host;
+uint64_t guest;
+uint64_t search_data;
+
+uint64_t interm_time;
+uint64_t code_time;
+uint64_t restore_count;
+uint64_t restore_time;
+uint64_t opt_time;
+uint64_t la_time;
+};
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+struct jit_profile_info *jpi = userp;
+TBStatistics *tbs = p;
+
+jpi->translations += tbs->translations.total;
+jpi->ops += tbs->code.num_tcg_ops;
+if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+}
+jpi->del_ops += tbs->code.deleted_ops;
+jpi->temps += tbs->code.temps;
+if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+jpi->temps_max = stat_per_translation(tbs, code.temps);
+}
+jpi->host += tbs->code.out_len;
+jpi->guest += tbs->code.in_len;
+jpi->search_data += tbs->code.search_out_len;
+
+jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
+jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
+jpi->la_time += stat_per_translation(tbs, gen_times.la);
+jpi->code_time += stat_per_translation(tbs, gen_times.code);
+
+/*
+ * The restore time covers how long we have spent restoring state
+ * from a given TB (e.g. recovering from a fault). It is therefor
+ * not related to the number of translations we have done.
+ */
+jpi->restore_time += tbs->tb_restore_time;
+jpi->restore_count += tbs->tb_restore_count;
+}
+
+void dump_jit_exec_time_info(uint64_t dev_time, GString *buf)
+{
+static uint64_t last_cpu_exec_time;
+uint64_t cpu_exec_time;
+uint64_t delta;
+
+cpu_exec_time = tcg_cpu_exec_time();
+delta = cpu_exec_time - last_cpu_exec_time;
+
+g_string_append_p

[PATCH v14 08/10] Adding info [tb-list|tb] commands to HMP (WIP)

2023-05-30 Thread Fei Wu
From: "Vanderson M. do Rosario" 

These commands allow the exploration of TBs generated by the TCG.
Understand which one hotter, with more guest/host instructions... and
examine their guest, host and IR code.

The goal of this command is to allow the dynamic exploration of TCG
behavior and code quality. Therefore, for now, a corresponding QMP
command is not worthwhile.

[AJB: WIP - we still can't be safely sure a translation will succeed]

Example of output:

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
| targets: 0x00034d5e (id:11), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:6956495/0  guest inst cov:21.82%
| trans:2 ints: g:2 op:40 op_opt:19 spills:1
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3130.83(ns) IR:722.50(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)


IN:
0x00034d0d:  89 demovl %ebx, %esi
0x00034d0f:  26 8b 0e movl %es:(%esi), %ecx
0x00034d12:  26 f6 46 08 80   testb$0x80, %es:8(%esi)
0x00034d17:  75 3bjne  0x34d54

--

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:5202686/0 guest inst cov:11.28%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:2793.75(ns) IR:614.58(ns)
| targets: 0x00034d5e (id:3), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:5199468/0 guest inst cov:15.03%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:2958.75(ns) IR:719.58(ns)
| targets: 0x00034d19 (id:4), 0x00034d54 (id:1)

--
2 TBs to reach 25% of guest inst exec coverage
Total of guest insts exec: 138346727

--

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-10-vanderson...@gmail.com>
[AJB: fix authorship, dropped coverset]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c  |  54 ++
 accel/tcg/tb-stats.c | 322 +++
 disas/disas.c|  26 ++-
 hmp-commands-info.hx |  16 ++
 include/exec/tb-stats.h  |  33 +++-
 include/monitor/hmp.h|   2 +
 include/qemu/log-for-trace.h |   6 +-
 include/qemu/log.h   |   2 +
 util/log.c   |  66 +--
 9 files changed, 508 insertions(+), 19 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 2e00f10267..a1780c5920 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -8,6 +8,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/accel.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
@@ -18,6 +19,7 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "exec/tb-stats.h"
+#include "tb-context.h"
 #include "internal.h"
 
 
@@ -132,6 +134,58 @@ void hmp_tbstats(Monitor *mon, const QDict *qdict)
 
 }
 
+void hmp_info_tblist(Monitor *mon, const QDict *qdict)
+{
+int number_int;
+const char *sortedby_str = NULL;
+if (!tcg_enabled()) {
+error_report("TB information is only available with accel=tcg");
+return;
+}
+if (!tb_ctx.tb_stats.map) {
+error_report("no TB information recorded");
+return;
+}
+
+number_int = qdict_get_try_int(qdict, "number", 10);
+sortedby_str = qdict_get_try_str(qdict, "sortedby");
+
+int sortedby = SORT_BY_HOTNESS;
+if (sortedby_str == NULL || strcmp(sortedby_str, "hotness") == 0) {
+sortedby = SORT_BY_HOTNESS;
+} else if (strcmp(sortedby_str, "hg") == 0) {
+sortedby = SORT_BY_HG;
+} else if (strcmp(sortedby_str, "spills") == 0) {
+sortedby = SORT_BY_SPILLS;
+} else {
+error_report("valid sort options ar

[PATCH v14 02/10] accel/tcg: introduce TBStatistics structure

2023-05-30 Thread Fei Wu
From: "Vanderson M. do Rosario" 

To store statistics for each TB, we created a TBStatistics structure
which is linked with the TBs. TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

The goal is to have all present and future qemu/tcg statistics and
meta-data stored in this new structure.

Reviewed-by: Alex Bennée 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-2-vanderson...@gmail.com>
[AJB: fix git author, review comments]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 MAINTAINERS   |  1 +
 accel/tcg/meson.build |  1 +
 accel/tcg/tb-context.h|  1 +
 accel/tcg/tb-hash.h   |  7 +
 accel/tcg/tb-maint.c  | 19 
 accel/tcg/tb-stats.c  | 58 +++
 accel/tcg/translate-all.c | 43 ++
 include/exec/exec-all.h   |  3 ++
 include/exec/tb-stats-flags.h | 21 +
 include/exec/tb-stats.h   | 56 +
 10 files changed, 210 insertions(+)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 4b025a7b63..d72ecd12b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -153,6 +153,7 @@ F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/tb-flush.h
 F: include/exec/target_long.h
+F: include/exec/tb-stats*.h
 F: include/exec/helper*.h
 F: include/sysemu/cpus.h
 F: include/sysemu/tcg.h
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index aeb20a6ef0..9263bdde11 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -4,6 +4,7 @@ tcg_ss.add(files(
   'cpu-exec-common.c',
   'cpu-exec.c',
   'tb-maint.c',
+  'tb-stats.c',
   'tcg-runtime-gvec.c',
   'tcg-runtime.c',
   'translate-all.c',
diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
index cac62d9749..d7910d586b 100644
--- a/accel/tcg/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
 /* statistics */
 unsigned tb_flush_count;
 unsigned tb_phys_invalidate_count;
+struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 83dc610e4c..87d657a1c6 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -67,4 +67,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong 
pc, uint32_t flags,
 return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+uint32_t flags)
+{
+return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 991746f80f..0980fca358 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -24,6 +24,7 @@
 #include "exec/log.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats.h"
 #include "exec/translate-all.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
@@ -41,6 +42,23 @@
 #define TB_FOR_EACH_JMP(head_tb, tb, n) \
 TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 
+/*
+ * This is the more or less the same compare as tb_cmp(), but the
+ * data persists over tb_flush. We also aggregate the various
+ * variations of cflags under one record and ignore the details of
+ * page overlap (although we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+const TBStatistics *a = ap;
+const TBStatistics *b = bp;
+
+return a->phys_pc == b->phys_pc &&
+a->pc == b->pc &&
+a->cs_base == b->cs_base &&
+a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
 const TranslationBlock *a = ap;
@@ -60,6 +78,7 @@ void tb_htable_init(void)
 unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
 qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+init_tb_stats_htable();
 }
 
 typedef struct PageDesc PageDesc;
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 00..f988bd8a31
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,58 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+
+#include "exec/tb-stats.h"
+#include "tb-context.h"
+
+/* TBStatistic collection controls */
+enum TBStatsStatus {
+TB_STATS_DISABLED = 0,
+TB_STATS_RUNNING,

[PATCH v14 01/10] accel/tcg: remove CONFIG_PROFILER

2023-05-30 Thread Fei Wu
TBStats will be introduced to replace CONFIG_PROFILER totally, here
remove all CONFIG_PROFILER related stuffs first.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
Reviewed-by: Richard Henderson 
---
 accel/tcg/monitor.c   |  25 
 accel/tcg/tcg-accel-ops.c |  10 --
 accel/tcg/translate-all.c |  33 --
 include/qemu/timer.h  |   9 --
 include/tcg/tcg.h |  25 
 meson.build   |   2 -
 meson_options.txt |   2 -
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c|   9 --
 tcg/tcg.c | 208 --
 tests/qtest/qmp-cmd-test.c|   3 -
 11 files changed, 329 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 92fce580f1..e903dd1d2e 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -80,36 +80,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-g_autoptr(GString) buf = g_string_new("");
-static int64_t last_cpu_exec_time;
-int64_t cpu_exec_time;
-int64_t delta;
-
-cpu_exec_time = tcg_cpu_exec_time();
-delta = cpu_exec_time - last_cpu_exec_time;
-
-g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
-g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-   delta, delta / (double)NANOSECONDS_PER_SECOND);
-last_cpu_exec_time = cpu_exec_time;
-dev_time = 0;
-
-return human_readable_text_from_str(buf);
-}
-#else
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 error_setg(errp, "Internal profiler not compiled");
 return NULL;
 }
-#endif
 
 static void hmp_tcg_register(void)
 {
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 58c8e64096..3973591508 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -70,20 +70,10 @@ void tcg_cpus_destroy(CPUState *cpu)
 int tcg_cpus_exec(CPUState *cpu)
 {
 int ret;
-#ifdef CONFIG_PROFILER
-int64_t ti;
-#endif
 assert(tcg_enabled());
-#ifdef CONFIG_PROFILER
-ti = profile_getclock();
-#endif
 cpu_exec_start(cpu);
 ret = cpu_exec(cpu);
 cpu_exec_end(cpu);
-#ifdef CONFIG_PROFILER
-qatomic_set(&tcg_ctx->prof.cpu_exec_time,
-tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
-#endif
 return ret;
 }
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index c87648b99e..4e035e0f79 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -203,10 +203,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
uintptr_t host_pc)
 {
 uint64_t data[TARGET_INSN_START_WORDS];
-#ifdef CONFIG_PROFILER
-TCGProfile *prof = &tcg_ctx->prof;
-int64_t ti = profile_getclock();
-#endif
 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 
 if (insns_left < 0) {
@@ -223,12 +219,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
 }
 
 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
-
-#ifdef CONFIG_PROFILER
-qatomic_set(&prof->restore_time,
-prof->restore_time + profile_getclock() - ti);
-qatomic_set(&prof->restore_count, prof->restore_count + 1);
-#endif
 }
 
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
@@ -291,13 +281,6 @@ static int setjmp_gen_code(CPUArchState *env, 
TranslationBlock *tb,
 tcg_ctx->cpu = NULL;
 *max_insns = tb->icount;
 
-#ifdef CONFIG_PROFILER
-qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
-qatomic_set(&tcg_ctx->prof.interm_time,
-tcg_ctx->prof.interm_time + profile_getclock() - *ti);
-*ti = profile_getclock();
-#endif
-
 return tcg_gen_code(tcg_ctx, tb, pc);
 }
 
@@ -311,9 +294,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tb_page_addr_t phys_pc;
 tcg_insn_unit *gen_code_buf;
 int gen_code_size, search_size, max_insns;
-#ifdef CONFIG_PROFILER
-TCGProfile *prof = &tcg_ctx->prof;
-#endif
 int64_t ti;
 void *host_pc;
 
@@ -365,12 +345,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
  tb_overflow:
 
-#ifdef CONFIG_PROFILER
-/* includes aborted translations because of exceptions */
-qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
-ti = profile_getclock();
-#endif
-
 trace_translate_block(tb, pc, tb->tc.ptr);
 
 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
@@ -425,13 +399,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
  */
   

[PATCH v14 07/10] tb-stats: reset the tracked TBs on a tb_flush

2023-05-30 Thread Fei Wu
From: Alex Bennée 

We keep track of translations but can only do so up until the
translation cache is flushed. At that point we really have no idea if
we can re-create a translation because all the active tracking
information has been reset.

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-maint.c|  1 +
 accel/tcg/tb-stats.c| 19 +++
 include/exec/tb-stats.h |  8 
 3 files changed, 28 insertions(+)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 0980fca358..11ff0ddd90 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -763,6 +763,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
tb_flush_count)
 qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 tb_remove_all();
 
+tbstats_reset_tbs();
 tcg_region_reset_all();
 /* XXX: flush processor icache at this point if cache flush is expensive */
 qatomic_inc(&tb_ctx.tb_flush_count);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 805e1fc74d..139f049ffc 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -267,6 +267,25 @@ void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data 
icmd)
 g_free(cmdinfo);
 }
 
+/*
+ * We have to reset the tbs array on a tb_flush as those
+ * TranslationBlocks no longer exist and we no loner know if the
+ * current mapping is still valid.
+ */
+
+static void reset_tbs_array(void *p, uint32_t hash, void *userp)
+{
+TBStatistics *tbs = p;
+g_ptr_array_set_size(tbs->tbs, 0);
+}
+
+void tbstats_reset_tbs(void)
+{
+if (tb_ctx.tb_stats.map) {
+qht_iter(&tb_ctx.tb_stats, reset_tbs_array, NULL);
+}
+}
+
 void init_tb_stats_htable(void)
 {
 if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 4bb343870b..30b788f7b2 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -124,4 +124,12 @@ struct TbstatsCommand {
 
 void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd);
 
+/**
+ * tbstats_reset_tbs: reset the linked array of TBs
+ *
+ * Reset the list of tbs for a given array. Should be called from
+ * safe work during tb_flush.
+ */
+void tbstats_reset_tbs(void);
+
 #endif
-- 
2.25.1




[PATCH v14 03/10] accel: collecting TB execution count

2023-05-30 Thread Fei Wu
From: "Vanderson M. do Rosario" 

If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

[Richard Henderson created the inline gen_tb_exec_count]

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-3-vanderson...@gmail.com>
[AJB: Fix author]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/cpu-exec.c  |  6 ++
 accel/tcg/tb-stats.c  |  6 ++
 accel/tcg/tcg-runtime.c   |  1 +
 accel/tcg/translate-all.c |  7 +--
 accel/tcg/translator.c| 25 +
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats-flags.h |  5 +
 include/exec/tb-stats.h   | 13 +
 8 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 0e741960da..c0d8f26237 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
@@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
 mmap_unlock();
 }
 
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+tb->tb_stats->executions.atomic++;
+}
+
 cpu_exec_enter(cpu);
+
 /* execute the generated code */
 trace_exec_tb(tb, pc);
 cpu_tb_exec(cpu, tb, &tb_exit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index f988bd8a31..143a52ef5c 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -22,6 +22,7 @@ enum TBStatsStatus {
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
+static uint32_t default_tbstats_flag;
 
 void init_tb_stats_htable(void)
 {
@@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
 {
 return tcg_collect_tb_stats == TB_STATS_PAUSED;
 }
+
+uint32_t get_default_tbstats_flag(void)
+{
+return default_tbstats_flag;
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index e4e030043f..11c6192064 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,6 +27,7 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index bb9483785e..dadf49954f 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -295,6 +295,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, 
target_ulong pc,
 new_stats->pc = pc;
 new_stats->cs_base = cs_base;
 new_stats->flags = flags;
+new_stats->stats_enabled = get_default_tbstats_flag();
 
 /*
  * All initialisation must be complete before we insert into qht
@@ -382,9 +383,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 /*
  * We want to fetch the stats structure before we start code
  * generation so we can count interesting things about this
- * generation.
+ * generation. If dfilter is in effect we will only collect stats
+ * for the specified range.
  */
-if (tb_stats_collection_enabled()) {
+if (tb_stats_collection_enabled() &&
+qemu_log_in_addr_range(tb->pc)) {
 tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
 } else {
 tb->tb_stats = NULL;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 6120ef2a92..80ffbfb455 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -11,6 +11,7 @@
 #include "qemu/error-report.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-temp-internal.h"
 #include "exec/exec-all.h"
 #include "exec/gen-icount.h"
 #include "exec/log.h"
@@ -18,6 +19,29 @@
 #include "exec/plugin-gen.h"
 #include "exec/replay-core.h"
 
+static void gen_tb_exec_count(TranslationBlock *tb)
+{
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
+
+tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal);
+if (sizeof(tb->tb_stats->executions.normal) == 4) {
+TCGv_i32 t = tcg_temp_ebb_new_i32();
+tcg_gen_ld_i32(t, ptr, 0);
+tcg_gen_addi_i32(t, t, 1);
+tcg_gen_st_i32(t, ptr, 0);
+tcg_temp_free_i32(t);
+} else {

[PATCH v14 00/10] TCG code quality tracking

2023-05-30 Thread Fei Wu
v14
---
* cleanup TCGProfile in patch 04


Alex Bennée (1):
  tb-stats: reset the tracked TBs on a tb_flush

Fei Wu (3):
  accel/tcg: remove CONFIG_PROFILER
  accel/tcg: add jit stats and time to TBStatistics
  docs: add tb-stats how to

Vanderson M. do Rosario (6):
  accel/tcg: introduce TBStatistics structure
  accel: collecting TB execution count
  debug: add -d tb_stats to control TBStatistics collection:
  monitor: adding tb_stats hmp command
  Adding info [tb-list|tb] commands to HMP (WIP)
  tb-stats: dump hot TBs at the end of the execution

 MAINTAINERS   |   1 +
 accel/tcg/cpu-exec.c  |   6 +
 accel/tcg/meson.build |   1 +
 accel/tcg/monitor.c   | 122 +-
 accel/tcg/tb-context.h|   1 +
 accel/tcg/tb-hash.h   |   7 +
 accel/tcg/tb-maint.c  |  20 +
 accel/tcg/tb-stats.c  | 692 ++
 accel/tcg/tcg-accel-ops.c |  15 +-
 accel/tcg/tcg-runtime.c   |   1 +
 accel/tcg/translate-all.c | 143 +--
 accel/tcg/translator.c|  30 ++
 disas/disas.c |  26 +-
 docs/devel/tcg-tbstats.rst| 129 +++
 hmp-commands-info.hx  |  16 +
 hmp-commands.hx   |  16 +
 include/exec/exec-all.h   |   3 +
 include/exec/gen-icount.h |   2 +
 include/exec/tb-stats-dump.h  |  21 ++
 include/exec/tb-stats-flags.h |  34 ++
 include/exec/tb-stats.h   | 164 
 include/monitor/hmp.h |   3 +
 include/qemu/log-for-trace.h  |   6 +-
 include/qemu/log.h|   3 +
 include/qemu/timer.h  |   5 +-
 include/tcg/tcg.h |  41 +-
 linux-user/exit.c |   2 +
 meson.build   |   2 -
 meson_options.txt |   2 -
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c|  10 +-
 stubs/meson.build |   1 +
 stubs/tb-stats.c  |  32 ++
 tcg/tcg.c | 230 +++
 tests/qtest/qmp-cmd-test.c|   2 +-
 util/log.c| 103 -
 36 files changed, 1601 insertions(+), 294 deletions(-)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 docs/devel/tcg-tbstats.rst
 create mode 100644 include/exec/tb-stats-dump.h
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h
 create mode 100644 stubs/tb-stats.c

-- 
2.25.1




[PATCH v13 07/10] tb-stats: reset the tracked TBs on a tb_flush

2023-05-29 Thread Fei Wu
From: Alex Bennée 

We keep track of translations but can only do so up until the
translation cache is flushed. At that point we really have no idea if
we can re-create a translation because all the active tracking
information has been reset.

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-maint.c|  1 +
 accel/tcg/tb-stats.c| 19 +++
 include/exec/tb-stats.h |  8 
 3 files changed, 28 insertions(+)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 0980fca358..11ff0ddd90 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -763,6 +763,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
tb_flush_count)
 qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 tb_remove_all();
 
+tbstats_reset_tbs();
 tcg_region_reset_all();
 /* XXX: flush processor icache at this point if cache flush is expensive */
 qatomic_inc(&tb_ctx.tb_flush_count);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 805e1fc74d..139f049ffc 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -267,6 +267,25 @@ void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data 
icmd)
 g_free(cmdinfo);
 }
 
+/*
+ * We have to reset the tbs array on a tb_flush as those
+ * TranslationBlocks no longer exist and we no loner know if the
+ * current mapping is still valid.
+ */
+
+static void reset_tbs_array(void *p, uint32_t hash, void *userp)
+{
+TBStatistics *tbs = p;
+g_ptr_array_set_size(tbs->tbs, 0);
+}
+
+void tbstats_reset_tbs(void)
+{
+if (tb_ctx.tb_stats.map) {
+qht_iter(&tb_ctx.tb_stats, reset_tbs_array, NULL);
+}
+}
+
 void init_tb_stats_htable(void)
 {
 if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 4bb343870b..30b788f7b2 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -124,4 +124,12 @@ struct TbstatsCommand {
 
 void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd);
 
+/**
+ * tbstats_reset_tbs: reset the linked array of TBs
+ *
+ * Reset the list of tbs for a given array. Should be called from
+ * safe work during tb_flush.
+ */
+void tbstats_reset_tbs(void);
+
 #endif
-- 
2.25.1




[PATCH v13 02/10] accel/tcg: introduce TBStatistics structure

2023-05-29 Thread Fei Wu
From: "Vanderson M. do Rosario" 

To store statistics for each TB, we created a TBStatistics structure
which is linked with the TBs. TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

The goal is to have all present and future qemu/tcg statistics and
meta-data stored in this new structure.

Reviewed-by: Alex Bennée 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-2-vanderson...@gmail.com>
[AJB: fix git author, review comments]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 MAINTAINERS   |  1 +
 accel/tcg/meson.build |  1 +
 accel/tcg/tb-context.h|  1 +
 accel/tcg/tb-hash.h   |  7 +
 accel/tcg/tb-maint.c  | 19 
 accel/tcg/tb-stats.c  | 58 +++
 accel/tcg/translate-all.c | 43 ++
 include/exec/exec-all.h   |  3 ++
 include/exec/tb-stats-flags.h | 21 +
 include/exec/tb-stats.h   | 56 +
 10 files changed, 210 insertions(+)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 4b025a7b63..d72ecd12b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -153,6 +153,7 @@ F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/tb-flush.h
 F: include/exec/target_long.h
+F: include/exec/tb-stats*.h
 F: include/exec/helper*.h
 F: include/sysemu/cpus.h
 F: include/sysemu/tcg.h
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index aeb20a6ef0..9263bdde11 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -4,6 +4,7 @@ tcg_ss.add(files(
   'cpu-exec-common.c',
   'cpu-exec.c',
   'tb-maint.c',
+  'tb-stats.c',
   'tcg-runtime-gvec.c',
   'tcg-runtime.c',
   'translate-all.c',
diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
index cac62d9749..d7910d586b 100644
--- a/accel/tcg/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
 /* statistics */
 unsigned tb_flush_count;
 unsigned tb_phys_invalidate_count;
+struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 83dc610e4c..87d657a1c6 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -67,4 +67,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong 
pc, uint32_t flags,
 return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+uint32_t flags)
+{
+return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 991746f80f..0980fca358 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -24,6 +24,7 @@
 #include "exec/log.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats.h"
 #include "exec/translate-all.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
@@ -41,6 +42,23 @@
 #define TB_FOR_EACH_JMP(head_tb, tb, n) \
 TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 
+/*
+ * This is the more or less the same compare as tb_cmp(), but the
+ * data persists over tb_flush. We also aggregate the various
+ * variations of cflags under one record and ignore the details of
+ * page overlap (although we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+const TBStatistics *a = ap;
+const TBStatistics *b = bp;
+
+return a->phys_pc == b->phys_pc &&
+a->pc == b->pc &&
+a->cs_base == b->cs_base &&
+a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
 const TranslationBlock *a = ap;
@@ -60,6 +78,7 @@ void tb_htable_init(void)
 unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
 qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+init_tb_stats_htable();
 }
 
 typedef struct PageDesc PageDesc;
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 00..f988bd8a31
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,58 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+
+#include "exec/tb-stats.h"
+#include "tb-context.h"
+
+/* TBStatistic collection controls */
+enum TBStatsStatus {
+TB_STATS_DISABLED = 0,
+TB_STATS_RUNNING,

[PATCH v13 10/10] docs: add tb-stats how to

2023-05-29 Thread Fei Wu
Signed-off-by: Fei Wu 
---
 docs/devel/tcg-tbstats.rst | 129 +
 1 file changed, 129 insertions(+)
 create mode 100644 docs/devel/tcg-tbstats.rst

diff --git a/docs/devel/tcg-tbstats.rst b/docs/devel/tcg-tbstats.rst
new file mode 100644
index 00..bfba222e96
--- /dev/null
+++ b/docs/devel/tcg-tbstats.rst
@@ -0,0 +1,129 @@
+
+TBStatistics
+
+
+What is TBStatistics
+
+
+TBStatistics (tb_stats) is a tool to gather various internal information of TCG
+during binary translation, this allows us to identify such as hottest TBs,
+guest to host instruction translation ratio, number of spills during register
+allocation and more.
+
+
+How to use TBStatistics
+===
+
+1. HMP interface
+
+
+TBStatistics provides HMP interface, you can try the following examples after
+connecting to the monitor.
+
+* First check the help info::
+
+(qemu) help tb_stats
+tb_stats command [stats_level] -- Control tb statistics 
collection:tb_stats (start|pause|stop|filter) [all|jit_stats|exec_stats]
+
+(qemu) help info tb-list
+info tb-list [number sortedby] -- show a [number] translated blocks sorted 
by [sortedby]sortedby opts: hotness hg spills
+
+(qemu) help info tb
+info tb id [flag1,flag2,...] -- show information about one translated 
block by id.dump flags can be used to set dump code level: out_asm in_asm op
+
+* Enable TBStatistics::
+
+(qemu) tb_stats start all
+(qemu)
+
+* Get interested TB list::
+
+(qemu) info tb-list 2
+TB id:1 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 0 inv/1
+| exec:1464084/0 guest inst cov:0.15%
+| trans:1 ints: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+| time to gen at 2.4GHz => code:607.08(ns) IR:250.83(ns)
+
+TB id:2 | phys:0x2adf0c virt:0x800adf0c flags:0x01024001 0 inv/1
+| exec:1033298/0 guest inst cov:0.28%
+| trans:1 ints: g:8 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 86.00
+| time to gen at 2.4GHz => code:1429.58(ns) IR:545.42(ns)
+
+* Dive into the specific TB::
+
+(qemu) info tb 1 op
+--
+
+TB id:1 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 7 inv/19
+| exec:2038349/0 guest inst cov:0.15%
+| trans:19 ints: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+| time to gen at 2.4GHz => code:133434.17(ns) IR:176988.33(ns)
+
+OP:
+ ld_i32 loc1,env,$0xfff0
+ brcond_i32 loc1,$0x0,lt,$L0
+ mov_i64 loc3,$0x7f3c70b3a4e0
+ call inc_exec_freq,$0x1,$0,loc3
+
+  8059bca0 6422
+ add_i64 loc5,x2/sp,$0x8
+ qemu_ld_i64 x8/s0,loc5,un+leq,1
+
+  8059bca2 
+ add_i64 x2/sp,x2/sp,$0x10
+
+  8059bca4 
+ mov_i64 pc,x1/ra
+ and_i64 pc,pc,$0xfffe
+ call lookup_tb_ptr,$0x6,$1,tmp9,env
+ goto_ptr tmp9
+ set_label $L0
+ exit_tb $0x7f3e887a8043
+
+--
+
+* Stop TBStatistics after investigation, this will disable TBStatistics 
completely.::
+
+(qemu) tb_stats stop
+(qemu)
+
+* Alternatively, TBStatistics can be paused, the previous collected 
TBStatistics
+  are not cleared but there is no TBStatistics recorded for new TBs.::
+
+(qemu) tb_stats pause
+(qemu)
+
+* Definitely, TBStatistics can be restarted for another round of 
investigation.::
+
+(qemu) tb_stats start all
+(qemu)
+
+
+2. Dump at exit
+---
+
+New command line options have been added to enable dump TB information at 
exit:::
+
+-d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=]]
+
+e.g. starting qemu like this:::
+
+-d tb_stats,dump_limit=2
+
+Qemu prints the following at exit:::
+
+QEMU: Terminated
+TB id:1 | phys:0x61be02 virt:0x8041be02 flags:0x01024001 0 inv/1
+| exec:72739176/0 guest inst cov:20.22%
+| trans:1 ints: g:9 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 51.57
+| time to gen at 2.4GHz => code:1065.42(ns) IR:554.17(ns)
+
+TB id:2 | phys:0x61bc66 virt:0x8041bc66 flags:0x01024001 0 inv/1
+| exec:25069507/0 guest inst cov:0.77%
+| trans:1 ints: g:1 op:15 op_opt:14 spills:0
+| h/g (host bytes / guest insts): 128.00
+| time to gen at 2.4GHz => code:312.50(ns) IR:152.08(ns)
-- 
2.25.1




[PATCH v13 08/10] Adding info [tb-list|tb] commands to HMP (WIP)

2023-05-29 Thread Fei Wu
From: "Vanderson M. do Rosario" 

These commands allow the exploration of TBs generated by the TCG.
Understand which one hotter, with more guest/host instructions... and
examine their guest, host and IR code.

The goal of this command is to allow the dynamic exploration of TCG
behavior and code quality. Therefore, for now, a corresponding QMP
command is not worthwhile.

[AJB: WIP - we still can't be safely sure a translation will succeed]

Example of output:

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
| targets: 0x00034d5e (id:11), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:6956495/0  guest inst cov:21.82%
| trans:2 ints: g:2 op:40 op_opt:19 spills:1
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3130.83(ns) IR:722.50(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)


IN:
0x00034d0d:  89 demovl %ebx, %esi
0x00034d0f:  26 8b 0e movl %es:(%esi), %ecx
0x00034d12:  26 f6 46 08 80   testb$0x80, %es:8(%esi)
0x00034d17:  75 3bjne  0x34d54

--

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:5202686/0 guest inst cov:11.28%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:2793.75(ns) IR:614.58(ns)
| targets: 0x00034d5e (id:3), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:5199468/0 guest inst cov:15.03%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:2958.75(ns) IR:719.58(ns)
| targets: 0x00034d19 (id:4), 0x00034d54 (id:1)

--
2 TBs to reach 25% of guest inst exec coverage
Total of guest insts exec: 138346727

--

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-10-vanderson...@gmail.com>
[AJB: fix authorship, dropped coverset]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c  |  54 ++
 accel/tcg/tb-stats.c | 322 +++
 disas/disas.c|  26 ++-
 hmp-commands-info.hx |  16 ++
 include/exec/tb-stats.h  |  33 +++-
 include/monitor/hmp.h|   2 +
 include/qemu/log-for-trace.h |   6 +-
 include/qemu/log.h   |   2 +
 util/log.c   |  66 +--
 9 files changed, 508 insertions(+), 19 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 2e00f10267..a1780c5920 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -8,6 +8,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/accel.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
@@ -18,6 +19,7 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "exec/tb-stats.h"
+#include "tb-context.h"
 #include "internal.h"
 
 
@@ -132,6 +134,58 @@ void hmp_tbstats(Monitor *mon, const QDict *qdict)
 
 }
 
+void hmp_info_tblist(Monitor *mon, const QDict *qdict)
+{
+int number_int;
+const char *sortedby_str = NULL;
+if (!tcg_enabled()) {
+error_report("TB information is only available with accel=tcg");
+return;
+}
+if (!tb_ctx.tb_stats.map) {
+error_report("no TB information recorded");
+return;
+}
+
+number_int = qdict_get_try_int(qdict, "number", 10);
+sortedby_str = qdict_get_try_str(qdict, "sortedby");
+
+int sortedby = SORT_BY_HOTNESS;
+if (sortedby_str == NULL || strcmp(sortedby_str, "hotness") == 0) {
+sortedby = SORT_BY_HOTNESS;
+} else if (strcmp(sortedby_str, "hg") == 0) {
+sortedby = SORT_BY_HG;
+} else if (strcmp(sortedby_str, "spills") == 0) {
+sortedby = SORT_BY_SPILLS;
+} else {
+error_report("valid sort options ar

[PATCH v13 09/10] tb-stats: dump hot TBs at the end of the execution

2023-05-29 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Dump the hottest TBs if -d tb_stats,dump_limit=N is used.

Example of output for the 3 hottest TBs:

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
| targets: 0x00034d5e (id:11), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)

TB id:3 | phys:0xec1c1 virt:0x000ec1c1 flags:0xb0
| exec:872032/0 guest inst cov:1.97%
| trans:1 ints: g:2 op:56 op_opt:26 spills:1
| h/g (host bytes / guest insts): 68.00
| time to gen at 2.4GHz => code:1692.08(ns) IR:473.75(ns)
| targets: 0x000ec1c5 (id:4), 0x000ec1cb (id:13)

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-12-vanderson...@gmail.com>
[AJB: fix authorship, ad softmmu support]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-stats.c  | 21 +
 include/exec/tb-stats-dump.h  | 21 +
 include/exec/tb-stats-flags.h |  1 +
 linux-user/exit.c |  2 ++
 softmmu/runstate.c|  2 ++
 stubs/tb-stats.c  |  5 +
 util/log.c|  4 ++--
 7 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 include/exec/tb-stats-dump.h

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 4b358cb421..d77891492a 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -19,6 +19,7 @@
 
 #include "exec/tb-stats.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats-dump.h"
 #include "tb-context.h"
 
 #include "internal.h"
@@ -32,6 +33,7 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+static int max_dump_tbs;
 /* only accessed in safe work */
 static GList *last_search;
 
@@ -616,6 +618,20 @@ void dump_tb_info(int id, int log_mask, bool use_monitor)
 }
 
 
+/*
+ * Dump the final stats
+ */
+void tb_stats_dump(void)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+dump_tbs_info(max_dump_tbs, SORT_BY_HOTNESS, false);
+}
+
+/* TBStatistic collection controls */
+
 void enable_collect_tb_stats(void)
 {
 tcg_collect_tb_stats = TB_STATS_RUNNING;
@@ -669,3 +685,8 @@ void set_default_tbstats_flag(uint32_t flags)
 {
 default_tbstats_flag = flags;
 }
+
+void set_tbstats_max_tbs(int max)
+{
+max_dump_tbs = max;
+}
diff --git a/include/exec/tb-stats-dump.h b/include/exec/tb-stats-dump.h
new file mode 100644
index 00..197c6148e9
--- /dev/null
+++ b/include/exec/tb-stats-dump.h
@@ -0,0 +1,21 @@
+/*
+ * TB Stats common dump functions across sysemu/linux-user
+ *
+ * Copyright (c) 2019 Linaro
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef _TB_STATS_DUMP_H_
+#define _TB_STATS_DUMP_H_
+
+/**
+ * tb_stats_dump: dump final tb_stats at end of execution
+ */
+#ifdef CONFIG_TCG
+void tb_stats_dump(void);
+#else
+static inline void tb_stats_dump(void) { /* do nothing */ };
+#endif
+
+#endif /* _TB_STATS_DUMP_H_ */
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index a3897c99b1..484a4c9c68 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -26,6 +26,7 @@ bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_disabled(void);
 bool tb_stats_collection_paused(void);
 
+void set_tbstats_max_tbs(int max);
 uint32_t get_default_tbstats_flag(void);
 void set_default_tbstats_flag(uint32_t);
 void set_tbstats_flags(uint32_t flags);
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 3017d28a3c..4fd23bcf60 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -25,6 +25,7 @@
 #ifdef CONFIG_GPROF
 #include 
 #endif
+#include "exec/tb-stats-dump.h"
 
 #ifdef CONFIG_GCOV
 extern void __gcov_dump(void);
@@ -41,4 +42,5 @@ void preexit_cleanup(CPUArchState *env, int code)
 gdb_exit(code);
 qemu_plugin_user_exit();
 perf_exit();
+tb_stats_dump();
 }
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 37390799f1..b5ceb55ffd 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -30,6 +30,7 @@
 #include "crypto/cipher.h"
 #include "crypto/init.h"
 #include "exec/cpu-common.h"
+#include "exec/tb-stats-dump.h"
 #include "gdbstub/syscalls.h"
 #include "hw/boards.h"
 #include "mi

[PATCH v13 05/10] debug: add -d tb_stats to control TBStatistics collection:

2023-05-29 Thread Fei Wu
From: "Vanderson M. do Rosario" 

 -d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=]]

"dump_limit" is used to limit the number of dumped TBStats in
linux-user mode.

[all+jit+exec+time] control the profilling level used
by the TBStats. Can be used as follow:

-d tb_stats
-d tb_stats,level=jit+time
-d tb_stats,dump_limit=15
...

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-7-vanderson...@gmail.com>
[AJB: fix authorship, reword title]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-stats.c  |  5 +
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats-flags.h |  3 +++
 include/exec/tb-stats.h   |  2 ++
 include/qemu/log.h|  1 +
 stubs/meson.build |  1 +
 stubs/tb-stats.c  | 27 +
 util/log.c| 37 +++
 8 files changed, 77 insertions(+)
 create mode 100644 stubs/tb-stats.c

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 78a3104c7f..68ac7d3f73 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -208,3 +208,8 @@ uint32_t get_default_tbstats_flag(void)
 {
 return default_tbstats_flag;
 }
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+default_tbstats_flag = flags;
+}
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 20e7835ff0..4372817951 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -3,6 +3,7 @@
 
 #include "exec/exec-all.h"
 #include "exec/tb-stats.h"
+#include "tb-stats-flags.h"
 
 /* Helpers for instruction counting code generation.  */
 
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index f29eff7576..04adaee8d9 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -15,6 +15,7 @@
 #define TB_EXEC_STATS (1 << 1)
 #define TB_JIT_STATS  (1 << 2)
 #define TB_JIT_TIME   (1 << 3)
+#define TB_ALL_STATS  (TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
@@ -24,5 +25,7 @@ bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
 uint32_t get_default_tbstats_flag(void);
+void set_default_tbstats_flag(uint32_t);
+void set_tbstats_flags(uint32_t flags);
 
 #endif
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index d93d42e085..72585c448a 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -31,6 +31,8 @@
 #include "exec/tb-stats-flags.h"
 #include "tcg/tcg.h"
 
+#include "exec/tb-stats-flags.h"
+
 #define tb_stats_enabled(tb, JIT_STATS) \
 (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
 
diff --git a/include/qemu/log.h b/include/qemu/log.h
index c5643d8dd5..6f3b8091cd 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -35,6 +35,7 @@ bool qemu_log_separate(void);
 /* LOG_STRACE is used for user-mode strace logging. */
 #define LOG_STRACE (1 << 19)
 #define LOG_PER_THREAD (1 << 20)
+#define CPU_LOG_TB_STATS   (1 << 21)
 
 /* Lock/unlock output. */
 
diff --git a/stubs/meson.build b/stubs/meson.build
index a56645e2f7..e926649d40 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -65,3 +65,4 @@ else
 endif
 stub_ss.add(files('semihost-all.c'))
 stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: 
files('vfio-user-obj.c'))
+stub_ss.add(files('tb-stats.c'))
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
new file mode 100644
index 00..d212c2a1fa
--- /dev/null
+++ b/stubs/tb-stats.c
@@ -0,0 +1,27 @@
+/*
+ * TB Stats Stubs
+ *
+ * Copyright (c) 2019
+ * Written by Alex Bennée 
+ *
+ * This code is licensed under the GNU GPL v2, or later.
+ */
+
+
+#include "qemu/osdep.h"
+#include "exec/tb-stats-flags.h"
+
+void enable_collect_tb_stats(void)
+{
+return;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+return false;
+}
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+return;
+}
diff --git a/util/log.c b/util/log.c
index 53b4f6c58e..7ae471d97c 100644
--- a/util/log.c
+++ b/util/log.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/qemu-print.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
@@ -27,6 +28,7 @@
 #include "qemu/thread.h"
 #include "qemu/lockable.h"
 #include "qemu/rcu.h"
+#include "exec/tb-stats-flags.h"
 #ifdef CONFIG_LINUX
 #include 
 #endif
@@ -47,6 +49,7 @@ static __thread Notifier qemu_log_thread_cleanup_notifier;
 int qemu_loglevel;
 static bool log_per_thread;
 static GArray *debug_regions;
+int32_t max_num_hot_tbs_to_dump;
 
 /* Returns true if qemu_log() will really write somewhere. */
 bool qemu

[PATCH v13 06/10] monitor: adding tb_stats hmp command

2023-05-29 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Adding tb_stats [start|pause|stop|filter] command to hmp.
This allows controlling the collection of statistics.
It is also possible to set the level of collection:
all, jit, or exec.

tb_stats filter allow to only collect statistics for the TB
in the last_search list.

The goal of this command is to allow the dynamic exploration
of the TCG behavior and quality. Therefore, for now, a
corresponding QMP command is not worthwhile.

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-8-vanderson...@gmail.com>
Message-Id: <20190829173437.5926-9-vanderson...@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   |  45 +
 accel/tcg/tb-stats.c  | 121 +-
 hmp-commands.hx   |  16 +
 include/exec/tb-stats-flags.h |   2 +
 include/exec/tb-stats.h   |  10 +++
 include/monitor/hmp.h |   1 +
 6 files changed, 192 insertions(+), 3 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 2bc87f2642..2e00f10267 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -11,7 +11,9 @@
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
@@ -87,6 +89,49 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 }
 
 #ifdef CONFIG_TCG
+void hmp_tbstats(Monitor *mon, const QDict *qdict)
+{
+if (!tcg_enabled()) {
+error_report("TB information is only available with accel=tcg");
+return;
+}
+
+char *cmd = (char *) qdict_get_try_str(qdict, "command");
+enum TbstatsCmd icmd = -1;
+
+if (strcmp(cmd, "start") == 0) {
+icmd = START;
+} else if (strcmp(cmd, "pause") == 0) {
+icmd = PAUSE;
+} else if (strcmp(cmd, "stop") == 0) {
+icmd = STOP;
+} else if (strcmp(cmd, "filter") == 0) {
+icmd = FILTER;
+} else {
+error_report("invalid command!");
+return;
+}
+
+char *slevel = (char *) qdict_get_try_str(qdict, "level");
+uint32_t level = TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME;
+if (slevel) {
+if (strcmp(slevel, "jit") == 0) {
+level = TB_JIT_STATS;
+} else if (strcmp(slevel, "exec") == 0) {
+level = TB_EXEC_STATS;
+} else if (strcmp(slevel, "time") == 0) {
+level = TB_JIT_TIME;
+}
+}
+
+struct TbstatsCommand *tbscommand = g_new0(struct TbstatsCommand, 1);
+tbscommand->cmd = icmd;
+tbscommand->level = level;
+async_safe_run_on_cpu(first_cpu, do_hmp_tbstats_safe,
+  RUN_ON_CPU_HOST_PTR(tbscommand));
+
+}
+
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 68ac7d3f73..805e1fc74d 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -16,18 +16,20 @@
 #include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
+#include "exec/tb-flush.h"
 #include "tb-context.h"
 
 /* TBStatistic collection controls */
 enum TBStatsStatus {
 TB_STATS_DISABLED = 0,
 TB_STATS_RUNNING,
-TB_STATS_PAUSED,
-TB_STATS_STOPPED
+TB_STATS_PAUSED
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+/* only accessed in safe work */
+static GList *last_search;
 
 uint64_t dev_time;
 
@@ -170,6 +172,101 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
 g_free(jpi);
 }
 
+static void free_tbstats(void *p, uint32_t hash, void *userp)
+{
+g_free(p);
+}
+
+static void clean_tbstats(void)
+{
+/* remove all tb_stats */
+qht_iter(&tb_ctx.tb_stats, free_tbstats, NULL);
+qht_destroy(&tb_ctx.tb_stats);
+}
+
+void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
+{
+struct TbstatsCommand *cmdinfo = icmd.host_ptr;
+int cmd = cmdinfo->cmd;
+uint32_t level = cmdinfo->level;
+
+switch (cmd) {
+case START:
+if (tb_stats_collection_enabled()) {
+qemu_printf("TB information already being recorded");
+return;
+} else if (tb_stats_collection_paused()) {
+set_tbstats_flags(level);
+} else {
+qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
+ QHT_MODE_AUTO_RESIZE);
+}
+
+set_default_tbstats_flag(level);
+enable_collect_tb_stats();
+  

[PATCH v13 01/10] accel/tcg: remove CONFIG_PROFILER

2023-05-29 Thread Fei Wu
TBStats will be introduced to replace CONFIG_PROFILER totally, here
remove all CONFIG_PROFILER related stuffs first.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   |  25 
 accel/tcg/tcg-accel-ops.c |  10 --
 accel/tcg/translate-all.c |  33 --
 include/qemu/timer.h  |   9 --
 include/tcg/tcg.h |  25 
 meson.build   |   2 -
 meson_options.txt |   2 -
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c|   9 --
 tcg/tcg.c | 208 --
 tests/qtest/qmp-cmd-test.c|   3 -
 11 files changed, 329 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 92fce580f1..e903dd1d2e 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -80,36 +80,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
-#ifdef CONFIG_PROFILER
-
-int64_t dev_time;
-
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-g_autoptr(GString) buf = g_string_new("");
-static int64_t last_cpu_exec_time;
-int64_t cpu_exec_time;
-int64_t delta;
-
-cpu_exec_time = tcg_cpu_exec_time();
-delta = cpu_exec_time - last_cpu_exec_time;
-
-g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
-g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-   delta, delta / (double)NANOSECONDS_PER_SECOND);
-last_cpu_exec_time = cpu_exec_time;
-dev_time = 0;
-
-return human_readable_text_from_str(buf);
-}
-#else
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 error_setg(errp, "Internal profiler not compiled");
 return NULL;
 }
-#endif
 
 static void hmp_tcg_register(void)
 {
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 58c8e64096..3973591508 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -70,20 +70,10 @@ void tcg_cpus_destroy(CPUState *cpu)
 int tcg_cpus_exec(CPUState *cpu)
 {
 int ret;
-#ifdef CONFIG_PROFILER
-int64_t ti;
-#endif
 assert(tcg_enabled());
-#ifdef CONFIG_PROFILER
-ti = profile_getclock();
-#endif
 cpu_exec_start(cpu);
 ret = cpu_exec(cpu);
 cpu_exec_end(cpu);
-#ifdef CONFIG_PROFILER
-qatomic_set(&tcg_ctx->prof.cpu_exec_time,
-tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
-#endif
 return ret;
 }
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index c87648b99e..4e035e0f79 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -203,10 +203,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
uintptr_t host_pc)
 {
 uint64_t data[TARGET_INSN_START_WORDS];
-#ifdef CONFIG_PROFILER
-TCGProfile *prof = &tcg_ctx->prof;
-int64_t ti = profile_getclock();
-#endif
 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 
 if (insns_left < 0) {
@@ -223,12 +219,6 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
 }
 
 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
-
-#ifdef CONFIG_PROFILER
-qatomic_set(&prof->restore_time,
-prof->restore_time + profile_getclock() - ti);
-qatomic_set(&prof->restore_count, prof->restore_count + 1);
-#endif
 }
 
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
@@ -291,13 +281,6 @@ static int setjmp_gen_code(CPUArchState *env, 
TranslationBlock *tb,
 tcg_ctx->cpu = NULL;
 *max_insns = tb->icount;
 
-#ifdef CONFIG_PROFILER
-qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
-qatomic_set(&tcg_ctx->prof.interm_time,
-tcg_ctx->prof.interm_time + profile_getclock() - *ti);
-*ti = profile_getclock();
-#endif
-
 return tcg_gen_code(tcg_ctx, tb, pc);
 }
 
@@ -311,9 +294,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tb_page_addr_t phys_pc;
 tcg_insn_unit *gen_code_buf;
 int gen_code_size, search_size, max_insns;
-#ifdef CONFIG_PROFILER
-TCGProfile *prof = &tcg_ctx->prof;
-#endif
 int64_t ti;
 void *host_pc;
 
@@ -365,12 +345,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
  tb_overflow:
 
-#ifdef CONFIG_PROFILER
-/* includes aborted translations because of exceptions */
-qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
-ti = profile_getclock();
-#endif
-
 trace_translate_block(tb, pc, tb->tc.ptr);
 
 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
@@ -425,13 +399,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
  */
 perf_report_code(pc, tb, tcg_splitwx_to_rx

[PATCH v13 00/10] TCG code quality tracking

2023-05-29 Thread Fei Wu
v12
---
* remove CONFIG_PROFILER completely at the first
* squash original patches 3-8 into one
* use Richard's inline version gen_tb_exec_count
* convert how-to to rst format as suggested by Thomas
* small change to patch 6 of hmp command


Alex Bennée (1):
  tb-stats: reset the tracked TBs on a tb_flush

Fei Wu (3):
  accel/tcg: remove CONFIG_PROFILER
  accel/tcg: add jit stats and time to TBStatistics
  docs: add tb-stats how to

Vanderson M. do Rosario (6):
  accel/tcg: introduce TBStatistics structure
  accel: collecting TB execution count
  debug: add -d tb_stats to control TBStatistics collection:
  monitor: adding tb_stats hmp command
  Adding info [tb-list|tb] commands to HMP (WIP)
  tb-stats: dump hot TBs at the end of the execution

 MAINTAINERS   |   1 +
 accel/tcg/cpu-exec.c  |   6 +
 accel/tcg/meson.build |   1 +
 accel/tcg/monitor.c   | 122 +-
 accel/tcg/tb-context.h|   1 +
 accel/tcg/tb-hash.h   |   7 +
 accel/tcg/tb-maint.c  |  20 +
 accel/tcg/tb-stats.c  | 692 ++
 accel/tcg/tcg-accel-ops.c |  15 +-
 accel/tcg/tcg-runtime.c   |   1 +
 accel/tcg/translate-all.c | 147 ++--
 accel/tcg/translator.c|  28 ++
 disas/disas.c |  26 +-
 docs/devel/tcg-tbstats.rst| 129 +++
 hmp-commands-info.hx  |  16 +
 hmp-commands.hx   |  16 +
 include/exec/exec-all.h   |   3 +
 include/exec/gen-icount.h |   2 +
 include/exec/tb-stats-dump.h  |  21 ++
 include/exec/tb-stats-flags.h |  34 ++
 include/exec/tb-stats.h   | 164 
 include/monitor/hmp.h |   3 +
 include/qemu/log-for-trace.h  |   6 +-
 include/qemu/log.h|   3 +
 include/qemu/timer.h  |   5 +-
 include/tcg/tcg.h |  50 ++-
 linux-user/exit.c |   2 +
 meson.build   |   2 -
 meson_options.txt |   2 -
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c|  10 +-
 stubs/meson.build |   1 +
 stubs/tb-stats.c  |  32 ++
 tcg/tcg.c | 224 +++
 tests/qtest/qmp-cmd-test.c|   2 +-
 util/log.c| 103 -
 36 files changed, 1614 insertions(+), 286 deletions(-)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 docs/devel/tcg-tbstats.rst
 create mode 100644 include/exec/tb-stats-dump.h
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h
 create mode 100644 stubs/tb-stats.c

-- 
2.25.1




[PATCH v13 03/10] accel: collecting TB execution count

2023-05-29 Thread Fei Wu
From: "Vanderson M. do Rosario" 

If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

[Richard Henderson created the inline gen_tb_exec_count]

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-3-vanderson...@gmail.com>
[AJB: Fix author]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/cpu-exec.c  |  6 ++
 accel/tcg/tb-stats.c  |  6 ++
 accel/tcg/tcg-runtime.c   |  1 +
 accel/tcg/translate-all.c |  7 +--
 accel/tcg/translator.c| 25 +
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats-flags.h |  5 +
 include/exec/tb-stats.h   | 13 +
 8 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 0e741960da..c0d8f26237 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
@@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
 mmap_unlock();
 }
 
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+tb->tb_stats->executions.atomic++;
+}
+
 cpu_exec_enter(cpu);
+
 /* execute the generated code */
 trace_exec_tb(tb, pc);
 cpu_tb_exec(cpu, tb, &tb_exit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index f988bd8a31..143a52ef5c 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -22,6 +22,7 @@ enum TBStatsStatus {
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
+static uint32_t default_tbstats_flag;
 
 void init_tb_stats_htable(void)
 {
@@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
 {
 return tcg_collect_tb_stats == TB_STATS_PAUSED;
 }
+
+uint32_t get_default_tbstats_flag(void)
+{
+return default_tbstats_flag;
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index e4e030043f..11c6192064 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,6 +27,7 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index bb9483785e..dadf49954f 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -295,6 +295,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, 
target_ulong pc,
 new_stats->pc = pc;
 new_stats->cs_base = cs_base;
 new_stats->flags = flags;
+new_stats->stats_enabled = get_default_tbstats_flag();
 
 /*
  * All initialisation must be complete before we insert into qht
@@ -382,9 +383,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 /*
  * We want to fetch the stats structure before we start code
  * generation so we can count interesting things about this
- * generation.
+ * generation. If dfilter is in effect we will only collect stats
+ * for the specified range.
  */
-if (tb_stats_collection_enabled()) {
+if (tb_stats_collection_enabled() &&
+qemu_log_in_addr_range(tb->pc)) {
 tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
 } else {
 tb->tb_stats = NULL;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 6120ef2a92..80ffbfb455 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -11,6 +11,7 @@
 #include "qemu/error-report.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-op.h"
+#include "tcg/tcg-temp-internal.h"
 #include "exec/exec-all.h"
 #include "exec/gen-icount.h"
 #include "exec/log.h"
@@ -18,6 +19,29 @@
 #include "exec/plugin-gen.h"
 #include "exec/replay-core.h"
 
+static void gen_tb_exec_count(TranslationBlock *tb)
+{
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
+
+tcg_gen_movi_ptr(ptr, (intptr_t)&tb->tb_stats->executions.normal);
+if (sizeof(tb->tb_stats->executions.normal) == 4) {
+TCGv_i32 t = tcg_temp_ebb_new_i32();
+tcg_gen_ld_i32(t, ptr, 0);
+tcg_gen_addi_i32(t, t, 1);
+tcg_gen_st_i32(t, ptr, 0);
+tcg_temp_free_i32(t);
+} else {

[PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics

2023-05-29 Thread Fei Wu
This collects all the statistics for TBStatistics, not only for the
whole emulation but for each TB.

Signed-off-by: Vanderson M. do Rosario 
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   |  20 -
 accel/tcg/tb-stats.c  | 146 ++
 accel/tcg/tcg-accel-ops.c |   7 ++
 accel/tcg/translate-all.c |  76 +-
 accel/tcg/translator.c|   5 +-
 include/exec/tb-stats-flags.h |   2 +
 include/exec/tb-stats.h   |  46 +++
 include/qemu/timer.h  |   6 ++
 include/tcg/tcg.h |  45 ++-
 softmmu/runstate.c|   9 +++
 tcg/tcg.c |  82 +--
 tests/qtest/qmp-cmd-test.c|   3 +
 12 files changed, 435 insertions(+), 12 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index e903dd1d2e..2bc87f2642 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -15,6 +15,7 @@
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
+#include "exec/tb-stats.h"
 #include "internal.h"
 
 
@@ -69,6 +70,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
 
+if (!tb_stats_collection_enabled()) {
+error_setg(errp, "TB information not being recorded.");
+return NULL;
+}
+
 if (!tcg_enabled()) {
 error_setg(errp,
"Opcode count information is only available with 
accel=tcg");
@@ -80,11 +86,23 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
+#ifdef CONFIG_TCG
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+g_autoptr(GString) buf = g_string_new("");
+
+dump_jit_exec_time_info(dev_time, buf);
+dev_time = 0;
+
+return human_readable_text_from_str(buf);
+}
+#else
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
-error_setg(errp, "Internal profiler not compiled");
+error_setg(errp, "TCG should be enabled!");
 return NULL;
 }
+#endif
 
 static void hmp_tcg_register(void)
 {
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 143a52ef5c..78a3104c7f 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -9,6 +9,11 @@
 #include "qemu/osdep.h"
 
 #include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
+#include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -24,6 +29,147 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
 
+uint64_t dev_time;
+
+struct jit_profile_info {
+uint64_t translations;
+uint64_t aborted;
+uint64_t ops;
+unsigned ops_max;
+uint64_t del_ops;
+uint64_t temps;
+unsigned temps_max;
+uint64_t host;
+uint64_t guest;
+uint64_t search_data;
+
+uint64_t interm_time;
+uint64_t code_time;
+uint64_t restore_count;
+uint64_t restore_time;
+uint64_t opt_time;
+uint64_t la_time;
+};
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+struct jit_profile_info *jpi = userp;
+TBStatistics *tbs = p;
+
+jpi->translations += tbs->translations.total;
+jpi->ops += tbs->code.num_tcg_ops;
+if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+}
+jpi->del_ops += tbs->code.deleted_ops;
+jpi->temps += tbs->code.temps;
+if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+jpi->temps_max = stat_per_translation(tbs, code.temps);
+}
+jpi->host += tbs->code.out_len;
+jpi->guest += tbs->code.in_len;
+jpi->search_data += tbs->code.search_out_len;
+
+jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
+jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
+jpi->la_time += stat_per_translation(tbs, gen_times.la);
+jpi->code_time += stat_per_translation(tbs, gen_times.code);
+
+/*
+ * The restore time covers how long we have spent restoring state
+ * from a given TB (e.g. recovering from a fault). It is therefor
+ * not related to the number of translations we have done.
+ */
+jpi->restore_time += tbs->tb_restore_time;
+jpi->restore_count += tbs->tb_restore_count;
+}
+
+void dump_jit_exec_time_info(uint64_t dev_time, GString *buf)
+{
+static uint64_t last_cpu_exec_time;
+uint64_t cpu_exec_time;
+uint64_t delta;
+
+cpu_exec_time = tcg_cpu_exec_time();
+delta = cpu_exec_time - last_cpu_exec_time;
+
+g_string_append_p

[PATCH] Makefile: add file entry to ctags

2023-05-18 Thread Fei Wu
It's more convenient to jump among files with --extra=+fq.

Signed-off-by: Fei Wu 
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 3c7d67142f..ffb3bcd4f4 100644
--- a/Makefile
+++ b/Makefile
@@ -239,8 +239,8 @@ ctags:
rm -f "$(SRC_PATH)/"tags,   \
"CTAGS", "Remove old tags")
$(call quiet-command, \
-   $(find-src-path) -exec ctags\
-   -f "$(SRC_PATH)/"tags --append {} +,\
+   $(find-src-path) -exec ctags --extra=+fq\
+   -f "$(SRC_PATH)/"tags --append {} +,\
"CTAGS", "Re-index $(SRC_PATH)")
 
 .PHONY: gtags
-- 
2.25.1




[PATCH v12 14/15] configure: remove the final bits of --profiler support

2023-05-18 Thread Fei Wu
From: Alex Bennée 

Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 meson.build   | 2 --
 meson_options.txt | 2 --
 scripts/meson-buildoptions.sh | 3 ---
 3 files changed, 7 deletions(-)

diff --git a/meson.build b/meson.build
index 4dddccb890..4be21c9d57 100644
--- a/meson.build
+++ b/meson.build
@@ -1888,7 +1888,6 @@ if numa.found()
dependencies: numa))
 endif
 config_host_data.set('CONFIG_OPENGL', opengl.found())
-config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
 config_host_data.set('CONFIG_RBD', rbd.found())
 config_host_data.set('CONFIG_RDMA', rdma.found())
 config_host_data.set('CONFIG_SDL', sdl.found())
@@ -3859,7 +3858,6 @@ if 'objc' in all_languages
   summary_info += {'QEMU_OBJCFLAGS':' '.join(qemu_objcflags)}
 endif
 summary_info += {'QEMU_LDFLAGS':  ' '.join(qemu_ldflags)}
-summary_info += {'profiler':  get_option('profiler')}
 summary_info += {'link-time optimization (LTO)': get_option('b_lto')}
 summary_info += {'PIE':   get_option('b_pie')}
 summary_info += {'static build':  config_host.has_key('CONFIG_STATIC')}
diff --git a/meson_options.txt b/meson_options.txt
index 11aec2a441..9692fa7cdc 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -332,8 +332,6 @@ option('qom_cast_debug', type: 'boolean', value: true,
 option('gprof', type: 'boolean', value: false,
description: 'QEMU profiling with gprof',
deprecated: true)
-option('profiler', type: 'boolean', value: false,
-   description: 'profiler support')
 option('slirp_smbd', type : 'feature', value : 'auto',
description: 'use smbd (at path --smbd=*) in slirp networking')
 
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 52fb079a60..28c7d21a15 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -38,7 +38,6 @@ meson_options_help() {
   printf "%s\n" '   jemalloc/system/tcmalloc)'
   printf "%s\n" '  --enable-module-upgrades try to load modules from alternate 
paths for'
   printf "%s\n" '   upgrades'
-  printf "%s\n" '  --enable-profilerprofiler support'
   printf "%s\n" '  --enable-rng-nonedummy RNG, avoid using 
/dev/(u)random and'
   printf "%s\n" '   getrandom()'
   printf "%s\n" '  --enable-strip   Strip targets on install'
@@ -386,8 +385,6 @@ _meson_option_parse() {
 --with-pkgversion=*) quote_sh "-Dpkgversion=$2" ;;
 --enable-png) printf "%s" -Dpng=enabled ;;
 --disable-png) printf "%s" -Dpng=disabled ;;
---enable-profiler) printf "%s" -Dprofiler=true ;;
---disable-profiler) printf "%s" -Dprofiler=false ;;
 --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;;
 --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;;
 --enable-qcow1) printf "%s" -Dqcow1=enabled ;;
-- 
2.25.1




[PATCH v12 10/15] monitor: adding tb_stats hmp command

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Adding tb_stats [start|pause|stop|filter] command to hmp.
This allows controlling the collection of statistics.
It is also possible to set the level of collection:
all, jit, or exec.

tb_stats filter allow to only collect statistics for the TB
in the last_search list.

The goal of this command is to allow the dynamic exploration
of the TCG behavior and quality. Therefore, for now, a
corresponding QMP command is not worthwhile.

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-8-vanderson...@gmail.com>
Message-Id: <20190829173437.5926-9-vanderson...@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c   |  45 ++
 accel/tcg/tb-stats.c  | 112 ++
 hmp-commands.hx   |  16 +
 include/exec/tb-stats-flags.h |   1 +
 include/exec/tb-stats.h   |  10 +++
 include/monitor/hmp.h |   1 +
 softmmu/runstate.c|   6 ++
 7 files changed, 191 insertions(+)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 2bc87f2642..2e00f10267 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -11,7 +11,9 @@
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
@@ -87,6 +89,49 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 }
 
 #ifdef CONFIG_TCG
+void hmp_tbstats(Monitor *mon, const QDict *qdict)
+{
+if (!tcg_enabled()) {
+error_report("TB information is only available with accel=tcg");
+return;
+}
+
+char *cmd = (char *) qdict_get_try_str(qdict, "command");
+enum TbstatsCmd icmd = -1;
+
+if (strcmp(cmd, "start") == 0) {
+icmd = START;
+} else if (strcmp(cmd, "pause") == 0) {
+icmd = PAUSE;
+} else if (strcmp(cmd, "stop") == 0) {
+icmd = STOP;
+} else if (strcmp(cmd, "filter") == 0) {
+icmd = FILTER;
+} else {
+error_report("invalid command!");
+return;
+}
+
+char *slevel = (char *) qdict_get_try_str(qdict, "level");
+uint32_t level = TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME;
+if (slevel) {
+if (strcmp(slevel, "jit") == 0) {
+level = TB_JIT_STATS;
+} else if (strcmp(slevel, "exec") == 0) {
+level = TB_EXEC_STATS;
+} else if (strcmp(slevel, "time") == 0) {
+level = TB_JIT_TIME;
+}
+}
+
+struct TbstatsCommand *tbscommand = g_new0(struct TbstatsCommand, 1);
+tbscommand->cmd = icmd;
+tbscommand->level = level;
+async_safe_run_on_cpu(first_cpu, do_hmp_tbstats_safe,
+  RUN_ON_CPU_HOST_PTR(tbscommand));
+
+}
+
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 68ac7d3f73..55afe6e489 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -16,6 +16,7 @@
 #include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
+#include "exec/tb-flush.h"
 #include "tb-context.h"
 
 /* TBStatistic collection controls */
@@ -28,6 +29,8 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+/* only accessed in safe work */
+static GList *last_search;
 
 uint64_t dev_time;
 
@@ -170,6 +173,102 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
 g_free(jpi);
 }
 
+static void free_tbstats(void *p, uint32_t hash, void *userp)
+{
+g_free(p);
+}
+
+static void clean_tbstats(void)
+{
+/* remove all tb_stats */
+qht_iter(&tb_ctx.tb_stats, free_tbstats, NULL);
+qht_destroy(&tb_ctx.tb_stats);
+}
+
+void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
+{
+struct TbstatsCommand *cmdinfo = icmd.host_ptr;
+int cmd = cmdinfo->cmd;
+uint32_t level = cmdinfo->level;
+
+switch (cmd) {
+case START:
+if (tb_stats_collection_paused()) {
+set_tbstats_flags(level);
+} else {
+if (tb_stats_collection_enabled()) {
+qemu_printf("TB information already being recorded");
+return;
+}
+qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
+QHT_MODE_AUTO_RESIZE);
+}
+
+set_default_tbstats_flag(level);
+enable_collect_tb_stats();
+tb_flush(cpu);
+break;
+cas

[PATCH v12 13/15] tb-stats: dump hot TBs at the end of the execution

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Dump the hottest TBs if -d tb_stats,dump_limit=N is used.

Example of output for the 3 hottest TBs:

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
| targets: 0x00034d5e (id:11), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)

TB id:3 | phys:0xec1c1 virt:0x000ec1c1 flags:0xb0
| exec:872032/0 guest inst cov:1.97%
| trans:1 ints: g:2 op:56 op_opt:26 spills:1
| h/g (host bytes / guest insts): 68.00
| time to gen at 2.4GHz => code:1692.08(ns) IR:473.75(ns)
| targets: 0x000ec1c5 (id:4), 0x000ec1cb (id:13)

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-12-vanderson...@gmail.com>
[AJB: fix authorship, ad softmmu support]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-stats.c  | 21 +
 include/exec/tb-stats-dump.h  | 21 +
 include/exec/tb-stats-flags.h |  1 +
 linux-user/exit.c |  2 ++
 softmmu/runstate.c|  2 ++
 stubs/tb-stats.c  |  5 +
 util/log.c|  4 ++--
 7 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 include/exec/tb-stats-dump.h

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index cb28879a45..3f7159b896 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -19,6 +19,7 @@
 
 #include "exec/tb-stats.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats-dump.h"
 #include "tb-context.h"
 
 #include "internal.h"
@@ -33,6 +34,7 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+static int max_dump_tbs;
 /* only accessed in safe work */
 static GList *last_search;
 
@@ -618,6 +620,20 @@ void dump_tb_info(int id, int log_mask, bool use_monitor)
 }
 
 
+/*
+ * Dump the final stats
+ */
+void tb_stats_dump(void)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+dump_tbs_info(max_dump_tbs, SORT_BY_HOTNESS, false);
+}
+
+/* TBStatistic collection controls */
+
 void enable_collect_tb_stats(void)
 {
 tcg_collect_tb_stats = TB_STATS_RUNNING;
@@ -666,3 +682,8 @@ void set_default_tbstats_flag(uint32_t flags)
 {
 default_tbstats_flag = flags;
 }
+
+void set_tbstats_max_tbs(int max)
+{
+max_dump_tbs = max;
+}
diff --git a/include/exec/tb-stats-dump.h b/include/exec/tb-stats-dump.h
new file mode 100644
index 00..197c6148e9
--- /dev/null
+++ b/include/exec/tb-stats-dump.h
@@ -0,0 +1,21 @@
+/*
+ * TB Stats common dump functions across sysemu/linux-user
+ *
+ * Copyright (c) 2019 Linaro
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef _TB_STATS_DUMP_H_
+#define _TB_STATS_DUMP_H_
+
+/**
+ * tb_stats_dump: dump final tb_stats at end of execution
+ */
+#ifdef CONFIG_TCG
+void tb_stats_dump(void);
+#else
+static inline void tb_stats_dump(void) { /* do nothing */ };
+#endif
+
+#endif /* _TB_STATS_DUMP_H_ */
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index 8c7abb62e5..ec5a215417 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -25,6 +25,7 @@ void pause_collect_tb_stats(void);
 bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
+void set_tbstats_max_tbs(int max);
 uint32_t get_default_tbstats_flag(void);
 void set_default_tbstats_flag(uint32_t);
 void set_tbstats_flags(uint32_t flags);
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 3017d28a3c..4fd23bcf60 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -25,6 +25,7 @@
 #ifdef CONFIG_GPROF
 #include 
 #endif
+#include "exec/tb-stats-dump.h"
 
 #ifdef CONFIG_GCOV
 extern void __gcov_dump(void);
@@ -41,4 +42,5 @@ void preexit_cleanup(CPUArchState *env, int code)
 gdb_exit(code);
 qemu_plugin_user_exit();
 perf_exit();
+tb_stats_dump();
 }
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 37390799f1..b5ceb55ffd 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -30,6 +30,7 @@
 #include "crypto/cipher.h"
 #include "crypto/init.h"
 #include "exec/cpu-common.h"
+#include "exec/tb-stats-dump.h"
 #include "gdbstub/syscalls.h"
 #include "hw/boards.h"
 #include "migration/misc.h"
@@

[PATCH v12 01/15] accel/tcg: introduce TBStatistics structure

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

To store statistics for each TB, we created a TBStatistics structure
which is linked with the TBs. TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

The goal is to have all present and future qemu/tcg statistics and
meta-data stored in this new structure.

Reviewed-by: Alex Bennée 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-2-vanderson...@gmail.com>
[AJB: fix git author, review comments]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 MAINTAINERS   |  1 +
 accel/tcg/meson.build |  1 +
 accel/tcg/tb-context.h|  1 +
 accel/tcg/tb-hash.h   |  7 +
 accel/tcg/tb-maint.c  | 19 
 accel/tcg/tb-stats.c  | 58 +++
 accel/tcg/translate-all.c | 43 ++
 include/exec/exec-all.h   |  3 ++
 include/exec/tb-stats-flags.h | 21 +
 include/exec/tb-stats.h   | 56 +
 10 files changed, 210 insertions(+)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 50585117a0..1e5a972be6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -153,6 +153,7 @@ F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/tb-flush.h
 F: include/exec/target_long.h
+F: include/exec/tb-stats*.h
 F: include/exec/helper*.h
 F: include/sysemu/cpus.h
 F: include/sysemu/tcg.h
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index aeb20a6ef0..9263bdde11 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -4,6 +4,7 @@ tcg_ss.add(files(
   'cpu-exec-common.c',
   'cpu-exec.c',
   'tb-maint.c',
+  'tb-stats.c',
   'tcg-runtime-gvec.c',
   'tcg-runtime.c',
   'translate-all.c',
diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
index cac62d9749..d7910d586b 100644
--- a/accel/tcg/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
 /* statistics */
 unsigned tb_flush_count;
 unsigned tb_phys_invalidate_count;
+struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 83dc610e4c..87d657a1c6 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -67,4 +67,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong 
pc, uint32_t flags,
 return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+uint32_t flags)
+{
+return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 991746f80f..0980fca358 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -24,6 +24,7 @@
 #include "exec/log.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats.h"
 #include "exec/translate-all.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
@@ -41,6 +42,23 @@
 #define TB_FOR_EACH_JMP(head_tb, tb, n) \
 TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 
+/*
+ * This is the more or less the same compare as tb_cmp(), but the
+ * data persists over tb_flush. We also aggregate the various
+ * variations of cflags under one record and ignore the details of
+ * page overlap (although we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+const TBStatistics *a = ap;
+const TBStatistics *b = bp;
+
+return a->phys_pc == b->phys_pc &&
+a->pc == b->pc &&
+a->cs_base == b->cs_base &&
+a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
 const TranslationBlock *a = ap;
@@ -60,6 +78,7 @@ void tb_htable_init(void)
 unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
 qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+init_tb_stats_htable();
 }
 
 typedef struct PageDesc PageDesc;
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 00..f988bd8a31
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,58 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+
+#include "exec/tb-stats.h"
+#include "tb-context.h"
+
+/* TBStatistic collection controls */
+enum TBStatsStatus {
+TB_STATS_DISABLED = 0,
+TB_STATS_RUNNING,

[PATCH v12 04/15] accel: replacing part of CONFIG_PROFILER with TBStats

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

We add some of the statistics collected in the TCGProfiler into the
TBStats, having the statistics not only for the whole emulation but
for each TB. Then, we removed these stats from TCGProfiler and
reconstruct the information for the "info jit" using the sum of all
TBStats statistics.

The goal is to have one unique and better way of collecting emulation
statistics. Moreover, checking dynamiclly if the profiling is enabled
showed to have an insignificant impact on the performance:
https://wiki.qemu.org/Internships/ProjectIdeas/TCGCodeQuality#Overheads.

Reviewed-by: Alex Bennée 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-5-vanderson...@gmail.com>
[AJB: fix authorship, use prof structure]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-stats.c  |  99 +
 accel/tcg/translate-all.c |  10 ++--
 include/exec/tb-stats.h   |  11 
 include/tcg/tcg.h |   9 +--
 tcg/tcg.c | 112 --
 5 files changed, 129 insertions(+), 112 deletions(-)

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 143a52ef5c..74708d1f40 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -9,6 +9,10 @@
 #include "qemu/osdep.h"
 
 #include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -24,6 +28,101 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
 
+struct jit_profile_info {
+uint64_t translations;
+uint64_t aborted;
+uint64_t ops;
+unsigned ops_max;
+uint64_t del_ops;
+uint64_t temps;
+unsigned temps_max;
+uint64_t host;
+uint64_t guest;
+uint64_t search_data;
+};
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+struct jit_profile_info *jpi = userp;
+TBStatistics *tbs = p;
+
+jpi->translations += tbs->translations.total;
+jpi->ops += tbs->code.num_tcg_ops;
+if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+}
+jpi->del_ops += tbs->code.deleted_ops;
+jpi->temps += tbs->code.temps;
+if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+jpi->temps_max = stat_per_translation(tbs, code.temps);
+}
+jpi->host += tbs->code.out_len;
+jpi->guest += tbs->code.in_len;
+jpi->search_data += tbs->code.search_out_len;
+}
+
+/* dump JIT statisticis using TCGProfile and TBStats */
+void dump_jit_profile_info(TCGProfile *s, GString *buf)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+struct jit_profile_info *jpi = g_new0(struct jit_profile_info, 1);
+
+qht_iter(&tb_ctx.tb_stats, collect_jit_profile_info, jpi);
+
+if (jpi->translations) {
+g_string_append_printf(buf, "translated TBs  %" PRId64 "\n",
+jpi->translations);
+g_string_append_printf(buf, "avg ops/TB  %0.1f max=%d\n",
+jpi->ops / (double) jpi->translations, jpi->ops_max);
+g_string_append_printf(buf, "deleted ops/TB  %0.2f\n",
+jpi->del_ops / (double) jpi->translations);
+g_string_append_printf(buf, "avg temps/TB%0.2f max=%d\n",
+jpi->temps / (double) jpi->translations, jpi->temps_max);
+g_string_append_printf(buf, "avg host code/TB%0.1f\n",
+jpi->host / (double) jpi->translations);
+g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
+jpi->search_data / (double) jpi->translations);
+
+if (s) {
+int64_t tot = s->interm_time + s->code_time;
+g_string_append_printf(buf, "JIT cycles  %" PRId64
+" (%0.3f s at 2.4 GHz)\n",
+tot, tot / 2.4e9);
+g_string_append_printf(buf, "cycles/op   %0.1f\n",
+jpi->ops ? (double)tot / jpi->ops : 0);
+g_string_append_printf(buf, "cycles/in byte  %0.1f\n",
+jpi->guest ? (double)tot / jpi->guest : 0);
+g_string_append_printf(buf, "cycles/out byte %0.1f\n",
+jpi->host ? (double)tot / jpi->host : 0);
+g_string_append_printf(buf, "cycles/search byte %0.1f\n",
+jpi->search_data ? (double)tot / jpi->search_data : 0);
+if (tot == 0) {
+  

[PATCH v12 15/15] docs/tb-stats: add how to

2023-05-18 Thread Fei Wu
Signed-off-by: Fei Wu 
---
 docs/tb-stats.txt | 116 ++
 1 file changed, 116 insertions(+)
 create mode 100644 docs/tb-stats.txt

diff --git a/docs/tb-stats.txt b/docs/tb-stats.txt
new file mode 100644
index 00..2f96ebb741
--- /dev/null
+++ b/docs/tb-stats.txt
@@ -0,0 +1,116 @@
+TBStatistics
+
+
+What is TBStatistics
+
+TBStatistics (tb_stats) is a tool to gather various internal information of TCG
+during binary translation, this allows us to identify such as hottest TBs,
+guest to host instruction translation ratio, number of spills during register
+allocation and more.
+
+
+How to use TBStatistics
+---
+
+1. HMP interface
+---
+
+TBStatistics provides HMP interface, you can try the following examples after
+connecting to the monitor.
+
+* First check the help info
+(qemu) help tb_stats
+tb_stats command [stats_level] -- Control tb statistics 
collection:tb_stats (start|pause|stop|filter) [all|jit_stats|exec_stats]
+
+(qemu) help info tb-list
+info tb-list [number sortedby] -- show a [number] translated blocks sorted 
by [sortedby]sortedby opts: hotness hg spills
+
+(qemu) help info tb
+info tb id [flag1,flag2,...] -- show information about one translated 
block by id.dump flags can be used to set dump code level: out_asm in_asm op
+
+* Enable TBStatistics
+(qemu) tb_stats start all
+(qemu)
+
+* Get interested TB list
+(qemu) info tb-list 2
+TB id:1 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 0 inv/1
+| exec:1464084/0 guest inst cov:0.15%
+| trans:1 ints: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+| time to gen at 2.4GHz => code:607.08(ns) IR:250.83(ns)
+
+TB id:2 | phys:0x2adf0c virt:0x800adf0c flags:0x01024001 0 inv/1
+| exec:1033298/0 guest inst cov:0.28%
+| trans:1 ints: g:8 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 86.00
+| time to gen at 2.4GHz => code:1429.58(ns) IR:545.42(ns)
+
+* Dive into the specific TB
+(qemu) info tb 1 op
+--
+
+TB id:1 | phys:0x79bca0 virt:0x8059bca0 flags:0x01024001 7 inv/19
+| exec:2038349/0 guest inst cov:0.15%
+| trans:19 ints: g:3 op:16 op_opt:15 spills:0
+| h/g (host bytes / guest insts): 64.00
+| time to gen at 2.4GHz => code:133434.17(ns) IR:176988.33(ns)
+
+OP:
+ ld_i32 loc1,env,$0xfff0
+ brcond_i32 loc1,$0x0,lt,$L0
+ mov_i64 loc3,$0x7f3c70b3a4e0
+ call inc_exec_freq,$0x1,$0,loc3
+
+  8059bca0 6422
+ add_i64 loc5,x2/sp,$0x8
+ qemu_ld_i64 x8/s0,loc5,un+leq,1
+
+  8059bca2 
+ add_i64 x2/sp,x2/sp,$0x10
+
+  8059bca4 
+ mov_i64 pc,x1/ra
+ and_i64 pc,pc,$0xfffe
+ call lookup_tb_ptr,$0x6,$1,tmp9,env
+ goto_ptr tmp9
+ set_label $L0
+ exit_tb $0x7f3e887a8043
+
+--
+
+* Stop TBStatistics after investigation, this will disable TBStatistics 
completely.
+(qemu) tb_stats stop
+(qemu)
+
+* Alternatively, TBStatistics can be paused, the previous collected 
TBStatistics
+  are not cleared but there is no TBStatistics recorded for new TBs.
+(qemu) tb_stats pause
+(qemu)
+
+* Definitely, TBStatistics can be restarted for another round of investigation.
+(qemu) tb_stats start all
+(qemu)
+
+
+2. Dump at exit
+---
+New command line options have been added to enable dump TB information at exit:
+-d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=]]
+
+e.g. starting qemu like this:
+-d tb_stats,dump_limit=2
+
+Qemu prints the following at exit:
+QEMU: Terminated
+TB id:1 | phys:0x61be02 virt:0x8041be02 flags:0x01024001 0 inv/1
+| exec:72739176/0 guest inst cov:20.22%
+| trans:1 ints: g:9 op:35 op_opt:33 spills:0
+| h/g (host bytes / guest insts): 51.57
+| time to gen at 2.4GHz => code:1065.42(ns) IR:554.17(ns)
+
+TB id:2 | phys:0x61bc66 virt:0x8041bc66 flags:0x01024001 0 inv/1
+| exec:25069507/0 guest inst cov:0.77%
+| trans:1 ints: g:1 op:15 op_opt:14 spills:0
+| h/g (host bytes / guest insts): 128.00
+| time to gen at 2.4GHz => code:312.50(ns) IR:152.08(ns)
-- 
2.25.1




[PATCH v12 11/15] tb-stats: reset the tracked TBs on a tb_flush

2023-05-18 Thread Fei Wu
From: Alex Bennée 

We keep track of translations but can only do so up until the
translation cache is flushed. At that point we really have no idea if
we can re-create a translation because all the active tracking
information has been reset.

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-maint.c|  1 +
 accel/tcg/tb-stats.c| 19 +++
 include/exec/tb-stats.h |  8 
 3 files changed, 28 insertions(+)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 0980fca358..11ff0ddd90 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -763,6 +763,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
tb_flush_count)
 qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 tb_remove_all();
 
+tbstats_reset_tbs();
 tcg_region_reset_all();
 /* XXX: flush processor icache at this point if cache flush is expensive */
 qatomic_inc(&tb_ctx.tb_flush_count);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 55afe6e489..469e3e024b 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -269,6 +269,25 @@ void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data 
icmd)
 g_free(cmdinfo);
 }
 
+/*
+ * We have to reset the tbs array on a tb_flush as those
+ * TranslationBlocks no longer exist and we no loner know if the
+ * current mapping is still valid.
+ */
+
+static void reset_tbs_array(void *p, uint32_t hash, void *userp)
+{
+TBStatistics *tbs = p;
+g_ptr_array_set_size(tbs->tbs, 0);
+}
+
+void tbstats_reset_tbs(void)
+{
+if (tb_ctx.tb_stats.map) {
+qht_iter(&tb_ctx.tb_stats, reset_tbs_array, NULL);
+}
+}
+
 void init_tb_stats_htable(void)
 {
 if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 4bb343870b..30b788f7b2 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -124,4 +124,12 @@ struct TbstatsCommand {
 
 void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd);
 
+/**
+ * tbstats_reset_tbs: reset the linked array of TBs
+ *
+ * Reset the list of tbs for a given array. Should be called from
+ * safe work during tb_flush.
+ */
+void tbstats_reset_tbs(void);
+
 #endif
-- 
2.25.1




[PATCH v12 03/15] accel: collecting JIT statistics

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

If a TB has a TBS (TBStatistics) with the TB_JIT_STATS enabled then we
collect statistics of its translation processes and code translation.

To help with collection we include the TCGProfile structure
unconditionally. It will have further alterations in future commits.

Collecting the number of host instructions seems to be not simple as
it would imply in having to modify several target source files. So,
for now, we are only collecting the size of the host gen code.

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-4-vanderson...@gmail.com>
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/translate-all.c | 29 +++--
 accel/tcg/translator.c|  3 +++
 include/exec/tb-stats-flags.h |  1 +
 include/exec/tb-stats.h   | 23 +++
 include/tcg/tcg.h | 22 --
 tcg/tcg.c |  9 +++--
 6 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 31d4ddf4ab..e36a780a70 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -313,6 +313,8 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, 
target_ulong pc,
 new_stats->cs_base = cs_base;
 new_stats->flags = flags;
 new_stats->stats_enabled = get_default_tbstats_flag();
+new_stats->tbs = g_ptr_array_sized_new(4);
+qemu_mutex_init(&new_stats->jit_stats_lock);
 
 /*
  * All initialisation must be complete before we insert into qht
@@ -326,6 +328,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, 
target_ulong pc,
  * If there is already a TBStatistic for this TB from a previous flush
  * then just make the new TB point to the older TBStatistic
  */
+g_ptr_array_free(new_stats->tbs, true);
 g_free(new_stats);
 return existing_stats;
 } else {
@@ -344,9 +347,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tb_page_addr_t phys_pc;
 tcg_insn_unit *gen_code_buf;
 int gen_code_size, search_size, max_insns;
-#ifdef CONFIG_PROFILER
 TCGProfile *prof = &tcg_ctx->prof;
-#endif
 int64_t ti;
 void *host_pc;
 
@@ -582,6 +583,30 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 return tb;
 }
 
+/*
+ * Collect JIT stats when enabled. We batch them all up here to
+ * avoid spamming the cache with atomic accesses
+ */
+if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+TBStatistics *ts = tb->tb_stats;
+qemu_mutex_lock(&ts->jit_stats_lock);
+
+ts->code.num_guest_inst += prof->translation.nb_guest_insns;
+ts->code.num_tcg_ops += prof->translation.nb_ops_pre_opt;
+ts->code.num_tcg_ops_opt += tcg_ctx->nb_ops;
+ts->code.spills += prof->translation.nb_spills;
+ts->code.out_len += tb->tc.size;
+
+ts->translations.total++;
+if (tb_page_addr1(tb) != -1) {
+ts->translations.spanning++;
+}
+
+g_ptr_array_add(ts->tbs, tb);
+
+qemu_mutex_unlock(&ts->jit_stats_lock);
+}
+
 /*
  * Insert TB into the corresponding region tree before publishing it
  * through QHT. Otherwise rewinding happened in the TB might fail to
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 165072c8c3..40a56962b1 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -132,6 +132,9 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, 
int *max_insns,
 tb->size = db->pc_next - db->pc_first;
 tb->icount = db->num_insns;
 
+/* Save number of guest instructions for TB_JIT_STATS */
+tcg_ctx->prof.translation.nb_guest_insns = db->num_insns;
+
 #ifdef DEBUG_DISAS
 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
 && qemu_log_in_addr_range(db->pc_first)) {
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index fa71eb6f0c..b62eaaca50 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -13,6 +13,7 @@
 
 #define TB_NOTHING(1 << 0)
 #define TB_EXEC_STATS (1 << 1)
+#define TB_JIT_STATS  (1 << 2)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index eb1fa92a4e..26d4ecae59 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -60,6 +60,29 @@ struct TBStatistics {
 unsigned long atomic;
 } executions;
 
+/* JIT Stats - protected by lock */
+QemuMutex jit_stats_lock;
+
+/* Sum of all operations for all translations */
+struct {
+unsigned num_guest_inst;
+unsigned num_tcg_ops;
+unsigned num_tcg_ops_opt;
+unsigned spills;
+unsigned out_len;
+} code;
+
+struct {
+ 

[PATCH v12 09/15] debug: add -d tb_stats to control TBStatistics collection:

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

 -d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=]]

"dump_limit" is used to limit the number of dumped TBStats in
linux-user mode.

[all+jit+exec+time] control the profilling level used
by the TBStats. Can be used as follow:

-d tb_stats
-d tb_stats,level=jit+time
-d tb_stats,dump_limit=15
...

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-7-vanderson...@gmail.com>
[AJB: fix authorship, reword title]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-stats.c  |  5 +
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats-flags.h |  3 +++
 include/exec/tb-stats.h   |  2 ++
 include/qemu/log.h|  1 +
 stubs/meson.build |  1 +
 stubs/tb-stats.c  | 27 +
 util/log.c| 37 +++
 8 files changed, 77 insertions(+)
 create mode 100644 stubs/tb-stats.c

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 78a3104c7f..68ac7d3f73 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -208,3 +208,8 @@ uint32_t get_default_tbstats_flag(void)
 {
 return default_tbstats_flag;
 }
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+default_tbstats_flag = flags;
+}
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 20e7835ff0..4372817951 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -3,6 +3,7 @@
 
 #include "exec/exec-all.h"
 #include "exec/tb-stats.h"
+#include "tb-stats-flags.h"
 
 /* Helpers for instruction counting code generation.  */
 
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index f29eff7576..04adaee8d9 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -15,6 +15,7 @@
 #define TB_EXEC_STATS (1 << 1)
 #define TB_JIT_STATS  (1 << 2)
 #define TB_JIT_TIME   (1 << 3)
+#define TB_ALL_STATS  (TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
@@ -24,5 +25,7 @@ bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
 uint32_t get_default_tbstats_flag(void);
+void set_default_tbstats_flag(uint32_t);
+void set_tbstats_flags(uint32_t flags);
 
 #endif
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index d93d42e085..72585c448a 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -31,6 +31,8 @@
 #include "exec/tb-stats-flags.h"
 #include "tcg/tcg.h"
 
+#include "exec/tb-stats-flags.h"
+
 #define tb_stats_enabled(tb, JIT_STATS) \
 (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
 
diff --git a/include/qemu/log.h b/include/qemu/log.h
index c5643d8dd5..6f3b8091cd 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -35,6 +35,7 @@ bool qemu_log_separate(void);
 /* LOG_STRACE is used for user-mode strace logging. */
 #define LOG_STRACE (1 << 19)
 #define LOG_PER_THREAD (1 << 20)
+#define CPU_LOG_TB_STATS   (1 << 21)
 
 /* Lock/unlock output. */
 
diff --git a/stubs/meson.build b/stubs/meson.build
index 8412cad15f..393adb9e47 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -64,3 +64,4 @@ else
 endif
 stub_ss.add(files('semihost-all.c'))
 stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: 
files('vfio-user-obj.c'))
+stub_ss.add(files('tb-stats.c'))
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
new file mode 100644
index 00..d212c2a1fa
--- /dev/null
+++ b/stubs/tb-stats.c
@@ -0,0 +1,27 @@
+/*
+ * TB Stats Stubs
+ *
+ * Copyright (c) 2019
+ * Written by Alex Bennée 
+ *
+ * This code is licensed under the GNU GPL v2, or later.
+ */
+
+
+#include "qemu/osdep.h"
+#include "exec/tb-stats-flags.h"
+
+void enable_collect_tb_stats(void)
+{
+return;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+return false;
+}
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+return;
+}
diff --git a/util/log.c b/util/log.c
index 53b4f6c58e..7ae471d97c 100644
--- a/util/log.c
+++ b/util/log.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/log.h"
+#include "qemu/qemu-print.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
@@ -27,6 +28,7 @@
 #include "qemu/thread.h"
 #include "qemu/lockable.h"
 #include "qemu/rcu.h"
+#include "exec/tb-stats-flags.h"
 #ifdef CONFIG_LINUX
 #include 
 #endif
@@ -47,6 +49,7 @@ static __thread Notifier qemu_log_thread_cleanup_notifier;
 int qemu_loglevel;
 static bool log_per_thread;
 static GArray *debug_regions;
+int32_t max_num_hot_tbs_to_dump;
 
 /* Returns true if qemu_log() will really write somewhere. */
 bool qemu

[PATCH v12 07/15] accel/tcg: convert profiling of code generation to TBStats

2023-05-18 Thread Fei Wu
From: Alex Bennée 

We continue the conversion of CONFIG_PROFILER data to TBStats by
measuring the time it takes to generate code. Instead of calculating
elapsed time as we do we simply store key timestamps in the profiler
structure and then calculate the totals and add them to TBStats under
lock.

Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/tb-stats.c  | 33 ---
 accel/tcg/translate-all.c | 69 ++-
 include/exec/tb-stats.h   |  7 
 include/tcg/tcg.h | 14 
 tcg/tcg.c | 17 --
 5 files changed, 59 insertions(+), 81 deletions(-)

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 7deb617446..c40c9a748e 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -88,39 +88,6 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
 jpi->host / (double) jpi->translations);
 g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
 jpi->search_data / (double) jpi->translations);
-
-if (s) {
-int64_t tot = s->interm_time + s->code_time;
-g_string_append_printf(buf, "JIT cycles  %" PRId64
-" (%0.3f s at 2.4 GHz)\n",
-tot, tot / 2.4e9);
-g_string_append_printf(buf, "cycles/op   %0.1f\n",
-jpi->ops ? (double)tot / jpi->ops : 0);
-g_string_append_printf(buf, "cycles/in byte  %0.1f\n",
-jpi->guest ? (double)tot / jpi->guest : 0);
-g_string_append_printf(buf, "cycles/out byte %0.1f\n",
-jpi->host ? (double)tot / jpi->host : 0);
-g_string_append_printf(buf, "cycles/search byte %0.1f\n",
-jpi->search_data ? (double)tot / jpi->search_data : 0);
-if (tot == 0) {
-tot = 1;
-}
-g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
-(double)s->interm_time / tot * 100.0);
-g_string_append_printf(buf, "  gen_code time %0.1f%%\n",
-(double)s->code_time / tot * 100.0);
-g_string_append_printf(buf, "optim./code time%0.1f%%\n",
-(double)s->opt_time / (s->code_time ? s->code_time : 1)
-* 100.0);
-g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
-(double)s->la_time / (s->code_time ? s->code_time : 1)
-* 100.0);
-g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
-s->restore_count);
-g_string_append_printf(buf, "  avg cycles%0.1f\n",
-s->restore_count ?
-(double)s->restore_time / s->restore_count : 0);
-}
 }
 g_free(jpi);
 }
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index beaef03902..ea2b648ffd 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -282,8 +282,9 @@ void page_init(void)
  */
 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
target_ulong pc, void *host_pc,
-   int *max_insns, int64_t *ti)
+   int *max_insns)
 {
+TCGProfile *prof = &tcg_ctx->prof;
 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
 if (unlikely(ret != 0)) {
 return ret;
@@ -297,11 +298,9 @@ static int setjmp_gen_code(CPUArchState *env, 
TranslationBlock *tb,
 tcg_ctx->cpu = NULL;
 *max_insns = tb->icount;
 
-#ifdef CONFIG_PROFILER
-qatomic_set(&tcg_ctx->prof.interm_time,
-tcg_ctx->prof.interm_time + profile_getclock() - *ti);
-*ti = profile_getclock();
-#endif
+if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+prof->gen_ir_done_time = profile_getclock();
+}
 
 return tcg_gen_code(tcg_ctx, tb, pc);
 }
@@ -352,7 +351,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tcg_insn_unit *gen_code_buf;
 int gen_code_size, search_size, max_insns;
 TCGProfile *prof = &tcg_ctx->prof;
-int64_t ti;
 void *host_pc;
 
 assert_memory_lock();
@@ -403,10 +401,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
  tb_overflow:
 
-#ifdef CONFIG_PROFILER
-ti = profile_getclock();
-#endif
-
 trace_translate_block(tb, pc, tb->tc.ptr);
 
 /*
@@ -418,11 +412,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 if (tb_stats_collection_enabled() &&
 qemu_log_in_addr_range(tb->pc)) {
 tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+pr

[PATCH v12 08/15] accel: adding TB_JIT_TIME and full replacing CONFIG_PROFILER

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Replace all others CONFIG_PROFILER statistics and migrate it to
TBStatistics system. However, TCGProfiler still exists and can
be use to store global statistics and times. All TB related
statistics goes to TBStatistics.

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-6-vanderson...@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c|  25 --
 accel/tcg/tb-stats.c   |  78 +
 accel/tcg/tcg-accel-ops.c  |  15 +++---
 include/exec/tb-stats.h|   2 +-
 include/tcg/tcg.h  |   5 +-
 softmmu/runstate.c |   8 +--
 tcg/tcg.c  | 100 ++---
 tests/qtest/qmp-cmd-test.c |   2 +-
 8 files changed, 127 insertions(+), 108 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index d4e044f7f5..2bc87f2642 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -15,6 +15,7 @@
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
+#include "exec/tb-stats.h"
 #include "internal.h"
 
 
@@ -69,6 +70,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
 
+if (!tb_stats_collection_enabled()) {
+error_setg(errp, "TB information not being recorded.");
+return NULL;
+}
+
 if (!tcg_enabled()) {
 error_setg(errp,
"Opcode count information is only available with 
accel=tcg");
@@ -80,23 +86,12 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
-#ifdef CONFIG_PROFILER
-
+#ifdef CONFIG_TCG
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
-static int64_t last_cpu_exec_time;
-int64_t cpu_exec_time;
-int64_t delta;
-
-cpu_exec_time = tcg_cpu_exec_time();
-delta = cpu_exec_time - last_cpu_exec_time;
-
-g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
-g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-   delta, delta / (double)NANOSECONDS_PER_SECOND);
-last_cpu_exec_time = cpu_exec_time;
+
+dump_jit_exec_time_info(dev_time, buf);
 dev_time = 0;
 
 return human_readable_text_from_str(buf);
@@ -104,7 +99,7 @@ HumanReadableText *qmp_x_query_profile(Error **errp)
 #else
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
-error_setg(errp, "Internal profiler not compiled");
+error_setg(errp, "TCG should be enabled!");
 return NULL;
 }
 #endif
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index c40c9a748e..78a3104c7f 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -13,6 +13,7 @@
 #include "tcg/tcg.h"
 
 #include "qemu/qemu-print.h"
+#include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -41,6 +42,13 @@ struct jit_profile_info {
 uint64_t host;
 uint64_t guest;
 uint64_t search_data;
+
+uint64_t interm_time;
+uint64_t code_time;
+uint64_t restore_count;
+uint64_t restore_time;
+uint64_t opt_time;
+uint64_t la_time;
 };
 
 /* accumulate the statistics from all TBs */
@@ -62,6 +70,35 @@ static void collect_jit_profile_info(void *p, uint32_t hash, 
void *userp)
 jpi->host += tbs->code.out_len;
 jpi->guest += tbs->code.in_len;
 jpi->search_data += tbs->code.search_out_len;
+
+jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
+jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
+jpi->la_time += stat_per_translation(tbs, gen_times.la);
+jpi->code_time += stat_per_translation(tbs, gen_times.code);
+
+/*
+ * The restore time covers how long we have spent restoring state
+ * from a given TB (e.g. recovering from a fault). It is therefor
+ * not related to the number of translations we have done.
+ */
+jpi->restore_time += tbs->tb_restore_time;
+jpi->restore_count += tbs->tb_restore_count;
+}
+
+void dump_jit_exec_time_info(uint64_t dev_time, GString *buf)
+{
+static uint64_t last_cpu_exec_time;
+uint64_t cpu_exec_time;
+uint64_t delta;
+
+cpu_exec_time = tcg_cpu_exec_time();
+delta = cpu_exec_time - last_cpu_exec_time;
+
+g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
+   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
+g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
+   delta, delta / (double

[PATCH v12 12/15] Adding info [tb-list|tb] commands to HMP (WIP)

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

These commands allow the exploration of TBs generated by the TCG.
Understand which one hotter, with more guest/host instructions... and
examine their guest, host and IR code.

The goal of this command is to allow the dynamic exploration of TCG
behavior and code quality. Therefore, for now, a corresponding QMP
command is not worthwhile.

[AJB: WIP - we still can't be safely sure a translation will succeed]

Example of output:

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
| targets: 0x00034d5e (id:11), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:6956495/0  guest inst cov:21.82%
| trans:2 ints: g:2 op:40 op_opt:19 spills:1
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3130.83(ns) IR:722.50(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)


IN:
0x00034d0d:  89 demovl %ebx, %esi
0x00034d0f:  26 8b 0e movl %es:(%esi), %ecx
0x00034d12:  26 f6 46 08 80   testb$0x80, %es:8(%esi)
0x00034d17:  75 3bjne  0x34d54

--

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:5202686/0 guest inst cov:11.28%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:2793.75(ns) IR:614.58(ns)
| targets: 0x00034d5e (id:3), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:5199468/0 guest inst cov:15.03%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:2958.75(ns) IR:719.58(ns)
| targets: 0x00034d19 (id:4), 0x00034d54 (id:1)

--
2 TBs to reach 25% of guest inst exec coverage
Total of guest insts exec: 138346727

--

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-10-vanderson...@gmail.com>
[AJB: fix authorship, dropped coverset]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c  |  54 ++
 accel/tcg/tb-stats.c | 322 +++
 disas/disas.c|  24 ++-
 hmp-commands-info.hx |  16 ++
 include/exec/tb-stats.h  |  33 +++-
 include/monitor/hmp.h|   2 +
 include/qemu/log-for-trace.h |   6 +-
 include/qemu/log.h   |   2 +
 util/log.c   |  66 +--
 9 files changed, 507 insertions(+), 18 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 2e00f10267..a1780c5920 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -8,6 +8,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/accel.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
@@ -18,6 +19,7 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "exec/tb-stats.h"
+#include "tb-context.h"
 #include "internal.h"
 
 
@@ -132,6 +134,58 @@ void hmp_tbstats(Monitor *mon, const QDict *qdict)
 
 }
 
+void hmp_info_tblist(Monitor *mon, const QDict *qdict)
+{
+int number_int;
+const char *sortedby_str = NULL;
+if (!tcg_enabled()) {
+error_report("TB information is only available with accel=tcg");
+return;
+}
+if (!tb_ctx.tb_stats.map) {
+error_report("no TB information recorded");
+return;
+}
+
+number_int = qdict_get_try_int(qdict, "number", 10);
+sortedby_str = qdict_get_try_str(qdict, "sortedby");
+
+int sortedby = SORT_BY_HOTNESS;
+if (sortedby_str == NULL || strcmp(sortedby_str, "hotness") == 0) {
+sortedby = SORT_BY_HOTNESS;
+} else if (strcmp(sortedby_str, "hg") == 0) {
+sortedby = SORT_BY_HG;
+} else if (strcmp(sortedby_str, "spills") == 0) {
+sortedby = SORT_BY_SPILLS;
+} else {
+error_report("valid sort options ar

[PATCH v12 06/15] accel/tcg: convert profiling of restore operations to TBStats

2023-05-18 Thread Fei Wu
From: Alex Bennée 

This starts the conversion of CONFIG_PROFILER data collection to under
the TBStats system. We introduce a new flag TB_JIT_TIME and start
tracking how much time is spent restoring execution state from a given
TB.

Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/translate-all.c | 23 ++-
 include/exec/tb-stats-flags.h |  1 +
 include/exec/tb-stats.h   |  5 +
 include/qemu/timer.h  |  3 ---
 4 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 48ce7df121..beaef03902 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -204,10 +204,12 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
uintptr_t host_pc)
 {
 uint64_t data[TARGET_INSN_START_WORDS];
-#ifdef CONFIG_PROFILER
-TCGProfile *prof = &tcg_ctx->prof;
-int64_t ti = profile_getclock();
-#endif
+uint64_t ti = 0;
+
+if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ti = profile_getclock();
+}
+
 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 
 if (insns_left < 0) {
@@ -225,11 +227,14 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
 
 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
 
-#ifdef CONFIG_PROFILER
-qatomic_set(&prof->restore_time,
-prof->restore_time + profile_getclock() - ti);
-qatomic_set(&prof->restore_count, prof->restore_count + 1);
-#endif
+if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+TBStatistics *ts = tb->tb_stats;
+uint64_t elapsed = profile_getclock() - ti;
+qemu_mutex_lock(&ts->jit_stats_lock);
+ts->tb_restore_time += elapsed;
+ts->tb_restore_count++;
+qemu_mutex_unlock(&ts->jit_stats_lock);
+}
 }
 
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index b62eaaca50..f29eff7576 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -14,6 +14,7 @@
 #define TB_NOTHING(1 << 0)
 #define TB_EXEC_STATS (1 << 1)
 #define TB_JIT_STATS  (1 << 2)
+#define TB_JIT_TIME   (1 << 3)
 
 /* TBStatistic collection controls */
 void enable_collect_tb_stats(void);
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 43722ff59d..cc9ab686b8 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -92,6 +92,11 @@ struct TBStatistics {
  * this TBStats structure. Has to be reset on a tb_flush.
  */
 GPtrArray *tbs;
+
+/* Recover state from TB */
+uint64_t tb_restore_time;
+uint64_t tb_restore_count;
+
 };
 
 bool tb_stats_cmp(const void *ap, const void *bp);
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index d86fc73a17..ad0da18a5f 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -989,7 +989,6 @@ static inline int64_t cpu_get_host_ticks(void)
 }
 #endif
 
-#ifdef CONFIG_PROFILER
 static inline int64_t profile_getclock(void)
 {
 return get_clock();
@@ -997,5 +996,3 @@ static inline int64_t profile_getclock(void)
 
 extern uint64_t dev_time;
 #endif
-
-#endif
-- 
2.25.1




[PATCH v12 05/15] accel/tcg: move profiler dev_time to tb_stats

2023-05-18 Thread Fei Wu
From: Alex Bennée 

This shouldn't live in the monitor code anyway. While we are at it
make it an uint64_t as we won't be dealing in negative numbers.

Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/monitor.c  | 2 --
 accel/tcg/tb-stats.c | 2 ++
 include/qemu/timer.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 92fce580f1..d4e044f7f5 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -82,8 +82,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 
 #ifdef CONFIG_PROFILER
 
-int64_t dev_time;
-
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 74708d1f40..7deb617446 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -28,6 +28,8 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
 
+uint64_t dev_time;
+
 struct jit_profile_info {
 uint64_t translations;
 uint64_t aborted;
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index ee071e07d1..d86fc73a17 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -995,7 +995,7 @@ static inline int64_t profile_getclock(void)
 return get_clock();
 }
 
-extern int64_t dev_time;
+extern uint64_t dev_time;
 #endif
 
 #endif
-- 
2.25.1




[PATCH v12 02/15] accel: collecting TB execution count

2023-05-18 Thread Fei Wu
From: "Vanderson M. do Rosario" 

If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-3-vanderson...@gmail.com>
[AJB: Fix author]
Signed-off-by: Alex Bennée 
Signed-off-by: Fei Wu 
---
 accel/tcg/cpu-exec.c  |  6 ++
 accel/tcg/tb-stats.c  |  6 ++
 accel/tcg/tcg-runtime.c   |  8 
 accel/tcg/tcg-runtime.h   |  1 +
 accel/tcg/translate-all.c |  7 +--
 accel/tcg/translator.c| 10 ++
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats-flags.h |  5 +
 include/exec/tb-stats.h   | 13 +
 9 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index bc0e1c3299..a3f24c62a0 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
@@ -564,7 +565,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
 mmap_unlock();
 }
 
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+tb->tb_stats->executions.atomic++;
+}
+
 cpu_exec_enter(cpu);
+
 /* execute the generated code */
 trace_exec_tb(tb, pc);
 cpu_tb_exec(cpu, tb, &tb_exit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index f988bd8a31..143a52ef5c 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -22,6 +22,7 @@ enum TBStatsStatus {
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
+static uint32_t default_tbstats_flag;
 
 void init_tb_stats_htable(void)
 {
@@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
 {
 return tcg_collect_tb_stats == TB_STATS_PAUSED;
 }
+
+uint32_t get_default_tbstats_flag(void)
+{
+return default_tbstats_flag;
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index e4e030043f..85cb795732 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,6 +27,7 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
@@ -148,3 +149,10 @@ void HELPER(exit_atomic)(CPUArchState *env)
 {
 cpu_loop_exit_atomic(env_cpu(env), GETPC());
 }
+
+void HELPER(inc_exec_freq)(void *ptr)
+{
+TBStatistics *stats = (TBStatistics *) ptr;
+tcg_debug_assert(stats);
+++stats->executions.normal;
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 6f8c2061d0..8d310e39c0 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -41,6 +41,7 @@ DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, 
ptr)
 
 DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
 DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
+DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
 
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
i32, env, i64, i32, i32, i32)
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index a5ebc5e9e2..31d4ddf4ab 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -312,6 +312,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, 
target_ulong pc,
 new_stats->pc = pc;
 new_stats->cs_base = cs_base;
 new_stats->flags = flags;
+new_stats->stats_enabled = get_default_tbstats_flag();
 
 /*
  * All initialisation must be complete before we insert into qht
@@ -408,9 +409,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 /*
  * We want to fetch the stats structure before we start code
  * generation so we can count interesting things about this
- * generation.
+ * generation. If dfilter is in effect we will only collect stats
+ * for the specified range.
  */
-if (tb_stats_collection_enabled()) {
+if (tb_stats_collection_enabled() &&
+qemu_log_in_addr_range(tb->pc)) {
 tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
 } else {
 tb->tb_stats = NULL;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 7bda43ff61..165072c8c3 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -18,6 +18,15 @@
 #include "exec/plugin-gen.h"
 #include "exec/re

[PATCH v12 00/15] TCG code quality tracking

2023-05-18 Thread Fei Wu
v12
---
* fix some CI issues, including make do-meson-check and build
* move part of patch 9 to 1
* fix inverse_sort_tbs
* use normal ++ instead of qatomic_inc for stats->executions.normal
* add how to for tb-stats
* append Fei Wu to Signed-off-by


Alex Bennée (5):
  accel/tcg: move profiler dev_time to tb_stats
  accel/tcg: convert profiling of restore operations to TBStats
  accel/tcg: convert profiling of code generation to TBStats
  tb-stats: reset the tracked TBs on a tb_flush
  configure: remove the final bits of --profiler support

Fei Wu (1):
  docs/tb-stats: add how to

Vanderson M. do Rosario (9):
  accel/tcg: introduce TBStatistics structure
  accel: collecting TB execution count
  accel: collecting JIT statistics
  accel: replacing part of CONFIG_PROFILER with TBStats
  accel: adding TB_JIT_TIME and full replacing CONFIG_PROFILER
  debug: add -d tb_stats to control TBStatistics collection:
  monitor: adding tb_stats hmp command
  Adding info [tb-list|tb] commands to HMP (WIP)
  tb-stats: dump hot TBs at the end of the execution

 MAINTAINERS   |   1 +
 accel/tcg/cpu-exec.c  |   6 +
 accel/tcg/meson.build |   1 +
 accel/tcg/monitor.c   | 122 +-
 accel/tcg/tb-context.h|   1 +
 accel/tcg/tb-hash.h   |   7 +
 accel/tcg/tb-maint.c  |  20 +
 accel/tcg/tb-stats.c  | 689 ++
 accel/tcg/tcg-accel-ops.c |  15 +-
 accel/tcg/tcg-runtime.c   |   8 +
 accel/tcg/tcg-runtime.h   |   1 +
 accel/tcg/translate-all.c | 147 ++--
 accel/tcg/translator.c|  13 +
 disas/disas.c |  24 +-
 docs/tb-stats.txt | 116 ++
 hmp-commands-info.hx  |  16 +
 hmp-commands.hx   |  16 +
 include/exec/exec-all.h   |   3 +
 include/exec/gen-icount.h |   2 +
 include/exec/tb-stats-dump.h  |  21 ++
 include/exec/tb-stats-flags.h |  33 ++
 include/exec/tb-stats.h   | 164 
 include/monitor/hmp.h |   3 +
 include/qemu/log-for-trace.h  |   6 +-
 include/qemu/log.h|   3 +
 include/qemu/timer.h  |   5 +-
 include/tcg/tcg.h |  50 ++-
 linux-user/exit.c |   2 +
 meson.build   |   2 -
 meson_options.txt |   2 -
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c|  10 +-
 stubs/meson.build |   1 +
 stubs/tb-stats.c  |  32 ++
 tcg/tcg.c | 224 +++
 tests/qtest/qmp-cmd-test.c|   2 +-
 util/log.c| 103 -
 37 files changed, 1590 insertions(+), 284 deletions(-)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 docs/tb-stats.txt
 create mode 100644 include/exec/tb-stats-dump.h
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h
 create mode 100644 stubs/tb-stats.c

-- 
2.25.1




[PATCH v11 14/14] configure: remove the final bits of --profiler support

2023-04-21 Thread Fei Wu
From: Alex Bennée 

Signed-off-by: Alex Bennée 
---
 meson.build   | 2 --
 meson_options.txt | 2 --
 scripts/meson-buildoptions.sh | 3 ---
 3 files changed, 7 deletions(-)

diff --git a/meson.build b/meson.build
index 29f8644d6d..27627199d7 100644
--- a/meson.build
+++ b/meson.build
@@ -1872,7 +1872,6 @@ if numa.found()
dependencies: numa))
 endif
 config_host_data.set('CONFIG_OPENGL', opengl.found())
-config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
 config_host_data.set('CONFIG_RBD', rbd.found())
 config_host_data.set('CONFIG_RDMA', rdma.found())
 config_host_data.set('CONFIG_SDL', sdl.found())
@@ -3823,7 +3822,6 @@ if 'objc' in all_languages
   summary_info += {'QEMU_OBJCFLAGS':' '.join(qemu_objcflags)}
 endif
 summary_info += {'QEMU_LDFLAGS':  ' '.join(qemu_ldflags)}
-summary_info += {'profiler':  get_option('profiler')}
 summary_info += {'link-time optimization (LTO)': get_option('b_lto')}
 summary_info += {'PIE':   get_option('b_pie')}
 summary_info += {'static build':  config_host.has_key('CONFIG_STATIC')}
diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..163233fda6 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -320,8 +320,6 @@ option('qom_cast_debug', type: 'boolean', value: false,
 option('gprof', type: 'boolean', value: false,
description: 'QEMU profiling with gprof',
deprecated: true)
-option('profiler', type: 'boolean', value: false,
-   description: 'profiler support')
 option('slirp_smbd', type : 'feature', value : 'auto',
description: 'use smbd (at path --smbd=*) in slirp networking')
 
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..8a6d61ed90 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -34,7 +34,6 @@ meson_options_help() {
   printf "%s\n" '   jemalloc/system/tcmalloc)'
   printf "%s\n" '  --enable-module-upgrades try to load modules from alternate 
paths for'
   printf "%s\n" '   upgrades'
-  printf "%s\n" '  --enable-profilerprofiler support'
   printf "%s\n" '  --enable-qom-cast-debug  cast debugging support'
   printf "%s\n" '  --enable-rng-nonedummy RNG, avoid using 
/dev/(u)random and'
   printf "%s\n" '   getrandom()'
@@ -373,8 +372,6 @@ _meson_option_parse() {
 --with-pkgversion=*) quote_sh "-Dpkgversion=$2" ;;
 --enable-png) printf "%s" -Dpng=enabled ;;
 --disable-png) printf "%s" -Dpng=disabled ;;
---enable-profiler) printf "%s" -Dprofiler=true ;;
---disable-profiler) printf "%s" -Dprofiler=false ;;
 --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;;
 --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;;
 --enable-qcow1) printf "%s" -Dqcow1=enabled ;;
-- 
2.25.1




[PATCH v11 12/14] Adding info [tb-list|tb] commands to HMP (WIP)

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

These commands allow the exploration of TBs generated by the TCG.
Understand which one hotter, with more guest/host instructions... and
examine their guest, host and IR code.

The goal of this command is to allow the dynamic exploration of TCG
behavior and code quality. Therefore, for now, a corresponding QMP
command is not worthwhile.

[AJB: WIP - we still can't be safely sure a translation will succeed]

Example of output:

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
| targets: 0x00034d5e (id:11), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:6956495/0  guest inst cov:21.82%
| trans:2 ints: g:2 op:40 op_opt:19 spills:1
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3130.83(ns) IR:722.50(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)


IN:
0x00034d0d:  89 demovl %ebx, %esi
0x00034d0f:  26 8b 0e movl %es:(%esi), %ecx
0x00034d12:  26 f6 46 08 80   testb$0x80, %es:8(%esi)
0x00034d17:  75 3bjne  0x34d54

--

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:5202686/0 guest inst cov:11.28%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:2793.75(ns) IR:614.58(ns)
| targets: 0x00034d5e (id:3), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:5199468/0 guest inst cov:15.03%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:2958.75(ns) IR:719.58(ns)
| targets: 0x00034d19 (id:4), 0x00034d54 (id:1)

--
2 TBs to reach 25% of guest inst exec coverage
Total of guest insts exec: 138346727

--

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-10-vanderson...@gmail.com>
[AJB: fix authorship, dropped coverset]
Signed-off-by: Alex Bennée 
---
 accel/tcg/monitor.c  |  55 ++
 accel/tcg/tb-stats.c | 322 +++
 disas.c  |  31 +++-
 hmp-commands-info.hx |  16 ++
 include/exec/tb-stats.h  |  33 +++-
 include/monitor/hmp.h|   2 +
 include/qemu/log-for-trace.h |   6 +-
 include/qemu/log.h   |   2 +
 util/log.c   |  66 +--
 9 files changed, 512 insertions(+), 21 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index b342727262..6bfefd24f6 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -7,6 +7,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
@@ -17,6 +18,7 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
 #include "exec/tb-stats.h"
+#include "tb-context.h"
 #include "internal.h"
 
 
@@ -117,6 +119,59 @@ void hmp_tbstats(Monitor *mon, const QDict *qdict)
   RUN_ON_CPU_HOST_PTR(tbscommand));
 
 }
+
+void hmp_info_tblist(Monitor *mon, const QDict *qdict)
+{
+int number_int;
+const char *sortedby_str = NULL;
+if (!tcg_enabled()) {
+error_report("TB information is only available with accel=tcg");
+return;
+}
+if (!tb_ctx.tb_stats.map) {
+error_report("no TB information recorded");
+return;
+}
+
+number_int = qdict_get_try_int(qdict, "number", 10);
+sortedby_str = qdict_get_try_str(qdict, "sortedby");
+
+int sortedby = SORT_BY_HOTNESS;
+if (sortedby_str == NULL || strcmp(sortedby_str, "hotness") == 0) {
+sortedby = SORT_BY_HOTNESS;
+} else if (strcmp(sortedby_str, "hg") == 0) {
+sortedby = SORT_BY_HG;
+} else if (strcmp(sortedby_str, "spills") == 0) {
+sortedby = SORT_BY_SPILLS;
+} else {
+error_report("valid sort options are: hotness hg spills");
+return;
+}
+
+dump_tbs_info(number_int, sortedby, true);
+}
+
+void hmp_info_tb(Monitor *mon, const QDict *qdict)
+{
+const int id = qdict_get_int(qdict, "id");
+

[PATCH v11 09/14] debug: add -d tb_stats to control TBStatistics collection:

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

 -d tb_stats[[,level=(+all+jit+exec+time)][,dump_limit=]]

"dump_limit" is used to limit the number of dumped TBStats in
linux-user mode.

[all+jit+exec+time] control the profilling level used
by the TBStats. Can be used as follow:

-d tb_stats
-d tb_stats,level=jit+time
-d tb_stats,dump_limit=15
...

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-7-vanderson...@gmail.com>
[AJB: fix authorship, reword title]
Signed-off-by: Alex Bennée 
---
 accel/tcg/tb-stats.c  |  5 +
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats-flags.h | 30 
 include/exec/tb-stats.h   | 15 +++---
 include/qemu/log.h|  1 +
 stubs/meson.build |  1 +
 stubs/tb-stats.c  | 27 +
 util/log.c| 37 +++
 8 files changed, 105 insertions(+), 12 deletions(-)
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 stubs/tb-stats.c

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 3cd7f6fc06..4d24ac2472 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -208,3 +208,8 @@ uint32_t get_default_tbstats_flag(void)
 {
 return default_tbstats_flag;
 }
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+default_tbstats_flag = flags;
+}
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 20e7835ff0..4372817951 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -3,6 +3,7 @@
 
 #include "exec/exec-all.h"
 #include "exec/tb-stats.h"
+#include "tb-stats-flags.h"
 
 /* Helpers for instruction counting code generation.  */
 
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
new file mode 100644
index 00..559d819b6b
--- /dev/null
+++ b/include/exec/tb-stats-flags.h
@@ -0,0 +1,30 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * We define the flags and control bits here to avoid complications of
+ * including TCG/CPU information in common code.
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef TB_STATS_FLAGS
+#define TB_STATS_FLAGS
+
+#define TB_NOTHING(1 << 0)
+#define TB_EXEC_STATS (1 << 1)
+#define TB_JIT_STATS  (1 << 2)
+#define TB_JIT_TIME   (1 << 3)
+#define TB_ALL_STATS  (TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME)
+
+/* TBStatistic collection controls */
+void enable_collect_tb_stats(void);
+void disable_collect_tb_stats(void);
+void pause_collect_tb_stats(void);
+bool tb_stats_collection_enabled(void);
+bool tb_stats_collection_paused(void);
+
+uint32_t get_default_tbstats_flag(void);
+void set_default_tbstats_flag(uint32_t);
+
+#endif
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index a23b6320bd..ad60662f78 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -30,6 +30,8 @@
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
 
+#include "exec/tb-stats-flags.h"
+
 #define tb_stats_enabled(tb, JIT_STATS) \
 (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
 
@@ -111,17 +113,6 @@ void init_tb_stats_htable(void);
 void dump_jit_profile_info(TCGProfile *s, GString *buf);
 void dump_jit_exec_time_info(uint64_t dev_time);
 
-#define TB_NOTHING(1 << 0)
-#define TB_EXEC_STATS (1 << 1)
-#define TB_JIT_STATS  (1 << 2)
-#define TB_JIT_TIME   (1 << 3)
-
-void enable_collect_tb_stats(void);
-void disable_collect_tb_stats(void);
-void pause_collect_tb_stats(void);
-bool tb_stats_collection_enabled(void);
-bool tb_stats_collection_paused(void);
-
-uint32_t get_default_tbstats_flag(void);
+void set_tbstats_flags(uint32_t flags);
 
 #endif
diff --git a/include/qemu/log.h b/include/qemu/log.h
index c5643d8dd5..6f3b8091cd 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -35,6 +35,7 @@ bool qemu_log_separate(void);
 /* LOG_STRACE is used for user-mode strace logging. */
 #define LOG_STRACE (1 << 19)
 #define LOG_PER_THREAD (1 << 20)
+#define CPU_LOG_TB_STATS   (1 << 21)
 
 /* Lock/unlock output. */
 
diff --git a/stubs/meson.build b/stubs/meson.build
index b2b5956d97..ecce77a01f 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -63,3 +63,4 @@ else
 endif
 stub_ss.add(files('semihost-all.c'))
 stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: 
files('vfio-user-obj.c'))
+stub_ss.add(files('tb-stats.c'))
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
new file mode 100644
index 00..d212c2a1fa
--- /dev/null
+++ b/stubs/tb-stats.c
@@ -0,0 +1,27 @@
+/*
+ * TB Stats Stubs
+ *
+ * Copyright (c) 2019
+ * Written by Alex Bennée 
+ *
+ * This code is licensed under the GNU GPL v2, or later.
+ */
+
+
+#include "qemu/osdep.h"
+#include "exec/tb-stats-flags.h"
+
+void enable_collect_tb_stats(void)
+{
+return;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+return false;
+}
+
+void set_default_tbstats_flag(uint32_t flags)
+{
+return;
+

[PATCH v11 11/14] tb-stats: reset the tracked TBs on a tb_flush

2023-04-21 Thread Fei Wu
From: Alex Bennée 

We keep track of translations but can only do so up until the
translation cache is flushed. At that point we really have no idea if
we can re-create a translation because all the active tracking
information has been reset.

Signed-off-by: Alex Bennée 
Reviewed-by: Richard Henderson 
---
 accel/tcg/tb-maint.c|  1 +
 accel/tcg/tb-stats.c| 19 +++
 include/exec/tb-stats.h |  8 
 3 files changed, 28 insertions(+)

diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index ba1635aa4b..5f946e0285 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -762,6 +762,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
tb_flush_count)
 qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 tb_remove_all();
 
+tbstats_reset_tbs();
 tcg_region_reset_all();
 /* XXX: flush processor icache at this point if cache flush is expensive */
 qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 61bfbe96fc..56e944b225 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -269,6 +269,25 @@ void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data 
icmd)
 g_free(cmdinfo);
 }
 
+/*
+ * We have to reset the tbs array on a tb_flush as those
+ * TranslationBlocks no longer exist and we no loner know if the
+ * current mapping is still valid.
+ */
+
+static void reset_tbs_array(void *p, uint32_t hash, void *userp)
+{
+TBStatistics *tbs = p;
+g_ptr_array_set_size(tbs->tbs, 0);
+}
+
+void tbstats_reset_tbs(void)
+{
+if (tb_ctx.tb_stats.map) {
+qht_iter(&tb_ctx.tb_stats, reset_tbs_array, NULL);
+}
+}
+
 void init_tb_stats_htable(void)
 {
 if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 33eed8d385..ec47cbecc2 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -125,4 +125,12 @@ struct TbstatsCommand {
 
 void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd);
 
+/**
+ * tbstats_reset_tbs: reset the linked array of TBs
+ *
+ * Reset the list of tbs for a given array. Should be called from
+ * safe work during tb_flush.
+ */
+void tbstats_reset_tbs(void);
+
 #endif
-- 
2.25.1




[PATCH v11 04/14] accel: replacing part of CONFIG_PROFILER with TBStats

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

We add some of the statistics collected in the TCGProfiler into the
TBStats, having the statistics not only for the whole emulation but
for each TB. Then, we removed these stats from TCGProfiler and
reconstruct the information for the "info jit" using the sum of all
TBStats statistics.

The goal is to have one unique and better way of collecting emulation
statistics. Moreover, checking dynamiclly if the profiling is enabled
showed to have an insignificant impact on the performance:
https://wiki.qemu.org/Internships/ProjectIdeas/TCGCodeQuality#Overheads.

Reviewed-by: Alex Bennée 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-5-vanderson...@gmail.com>
[AJB: fix authorship, use prof structure]
Signed-off-by: Alex Bennée 
---
 accel/tcg/tb-stats.c  |  99 +
 accel/tcg/translate-all.c |  10 ++--
 include/exec/tb-stats.h   |  11 
 include/tcg/tcg.h |   9 +--
 tcg/tcg.c | 112 --
 5 files changed, 129 insertions(+), 112 deletions(-)

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index b1d4341b6f..434b235edb 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -9,6 +9,10 @@
 #include "qemu/osdep.h"
 
 #include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -24,6 +28,101 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
 
+struct jit_profile_info {
+uint64_t translations;
+uint64_t aborted;
+uint64_t ops;
+unsigned ops_max;
+uint64_t del_ops;
+uint64_t temps;
+unsigned temps_max;
+uint64_t host;
+uint64_t guest;
+uint64_t search_data;
+};
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+struct jit_profile_info *jpi = userp;
+TBStatistics *tbs = p;
+
+jpi->translations += tbs->translations.total;
+jpi->ops += tbs->code.num_tcg_ops;
+if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+}
+jpi->del_ops += tbs->code.deleted_ops;
+jpi->temps += tbs->code.temps;
+if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+jpi->temps_max = stat_per_translation(tbs, code.temps);
+}
+jpi->host += tbs->code.out_len;
+jpi->guest += tbs->code.in_len;
+jpi->search_data += tbs->code.search_out_len;
+}
+
+/* dump JIT statisticis using TCGProfile and TBStats */
+void dump_jit_profile_info(TCGProfile *s, GString *buf)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+struct jit_profile_info *jpi = g_new0(struct jit_profile_info, 1);
+
+qht_iter(&tb_ctx.tb_stats, collect_jit_profile_info, jpi);
+
+if (jpi->translations) {
+g_string_append_printf(buf, "translated TBs  %" PRId64 "\n",
+jpi->translations);
+g_string_append_printf(buf, "avg ops/TB  %0.1f max=%d\n",
+jpi->ops / (double) jpi->translations, jpi->ops_max);
+g_string_append_printf(buf, "deleted ops/TB  %0.2f\n",
+jpi->del_ops / (double) jpi->translations);
+g_string_append_printf(buf, "avg temps/TB%0.2f max=%d\n",
+jpi->temps / (double) jpi->translations, jpi->temps_max);
+g_string_append_printf(buf, "avg host code/TB%0.1f\n",
+jpi->host / (double) jpi->translations);
+g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
+jpi->search_data / (double) jpi->translations);
+
+if (s) {
+int64_t tot = s->interm_time + s->code_time;
+g_string_append_printf(buf, "JIT cycles  %" PRId64
+" (%0.3f s at 2.4 GHz)\n",
+tot, tot / 2.4e9);
+g_string_append_printf(buf, "cycles/op   %0.1f\n",
+jpi->ops ? (double)tot / jpi->ops : 0);
+g_string_append_printf(buf, "cycles/in byte  %0.1f\n",
+jpi->guest ? (double)tot / jpi->guest : 0);
+g_string_append_printf(buf, "cycles/out byte %0.1f\n",
+jpi->host ? (double)tot / jpi->host : 0);
+g_string_append_printf(buf, "cycles/search byte %0.1f\n",
+jpi->search_data ? (double)tot / jpi->search_data : 0);
+if (tot == 0) {
+tot = 1;
+}
+g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
+(double)s->interm_time / tot * 100.0);
+g_string_append_printf(buf, "  gen_code time %0.1f%%\n",
+(double)s->code_time / tot * 100.0);
+g_string_append_printf(buf, "optim./code time%0.1f%%\n",
+ 

[PATCH v11 10/14] monitor: adding tb_stats hmp command

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Adding tb_stats [start|pause|stop|filter] command to hmp.
This allows controlling the collection of statistics.
It is also possible to set the level of collection:
all, jit, or exec.

tb_stats filter allow to only collect statistics for the TB
in the last_search list.

The goal of this command is to allow the dynamic exploration
of the TCG behavior and quality. Therefore, for now, a
corresponding QMP command is not worthwhile.

Acked-by: Dr. David Alan Gilbert 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-8-vanderson...@gmail.com>
Message-Id: <20190829173437.5926-9-vanderson...@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée 
---
 accel/tcg/monitor.c   |  51 
 accel/tcg/tb-stats.c  | 112 ++
 hmp-commands.hx   |  16 +
 include/exec/tb-stats-flags.h |   1 +
 include/exec/tb-stats.h   |  10 +++
 include/monitor/hmp.h |   1 +
 softmmu/runstate.c|   6 ++
 7 files changed, 197 insertions(+)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 5f9dba56e3..b342727262 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -10,7 +10,9 @@
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
+#include "qapi/qmp/qdict.h"
 #include "monitor/monitor.h"
+#include "monitor/hmp.h"
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
@@ -72,12 +74,61 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
+#ifdef CONFIG_TCG
+void hmp_tbstats(Monitor *mon, const QDict *qdict)
+{
+if (!tcg_enabled()) {
+error_report("TB information is only available with accel=tcg");
+return;
+}
+
+char *cmd = (char *) qdict_get_try_str(qdict, "command");
+enum TbstatsCmd icmd = -1;
+
+if (strcmp(cmd, "start") == 0) {
+icmd = START;
+} else if (strcmp(cmd, "pause") == 0) {
+icmd = PAUSE;
+} else if (strcmp(cmd, "stop") == 0) {
+icmd = STOP;
+} else if (strcmp(cmd, "filter") == 0) {
+icmd = FILTER;
+} else {
+error_report("invalid command!");
+return;
+}
+
+char *slevel = (char *) qdict_get_try_str(qdict, "level");
+uint32_t level = TB_EXEC_STATS | TB_JIT_STATS | TB_JIT_TIME;
+if (slevel) {
+if (strcmp(slevel, "jit") == 0) {
+level = TB_JIT_STATS;
+} else if (strcmp(slevel, "exec") == 0) {
+level = TB_EXEC_STATS;
+} else if (strcmp(slevel, "time") == 0) {
+level = TB_JIT_TIME;
+}
+}
+
+struct TbstatsCommand *tbscommand = g_new0(struct TbstatsCommand, 1);
+tbscommand->cmd = icmd;
+tbscommand->level = level;
+async_safe_run_on_cpu(first_cpu, do_hmp_tbstats_safe,
+  RUN_ON_CPU_HOST_PTR(tbscommand));
+
+}
+#endif
+
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
 
+#ifdef CONFIG_TCG
 dump_jit_exec_time_info(dev_time);
 dev_time = 0;
+#else
+error_report("TCG should be enabled!");
+#endif
 
 return human_readable_text_from_str(buf);
 }
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 4d24ac2472..61bfbe96fc 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -16,6 +16,7 @@
 #include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
+#include "exec/tb-flush.h"
 #include "tb-context.h"
 
 /* TBStatistic collection controls */
@@ -28,6 +29,8 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+/* only accessed in safe work */
+static GList *last_search;
 
 uint64_t dev_time;
 
@@ -170,6 +173,102 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
 g_free(jpi);
 }
 
+static void free_tbstats(void *p, uint32_t hash, void *userp)
+{
+g_free(p);
+}
+
+static void clean_tbstats(void)
+{
+/* remove all tb_stats */
+qht_iter(&tb_ctx.tb_stats, free_tbstats, NULL);
+qht_destroy(&tb_ctx.tb_stats);
+}
+
+void do_hmp_tbstats_safe(CPUState *cpu, run_on_cpu_data icmd)
+{
+struct TbstatsCommand *cmdinfo = icmd.host_ptr;
+int cmd = cmdinfo->cmd;
+uint32_t level = cmdinfo->level;
+
+switch (cmd) {
+case START:
+if (tb_stats_collection_paused()) {
+set_tbstats_flags(level);
+} else {
+if (tb_stats_collection_enabled()) {
+qemu_printf("TB information already being recorded");
+return;
+}
+qht_init(&tb_ctx.tb_stats, tb_stats_cmp, CODE_GEN_HTABLE_SIZE,
+QHT_MODE_AUTO_RESIZE);
+}
+
+set_default_tbstats_flag(level);
+enable_collect_tb_stats();
+tb_flush(cpu);
+break;
+case PAUSE:
+if (!tb_stats_collection_enabled()) {
+qemu_printf("TB information not being r

[PATCH v11 13/14] tb-stats: dump hot TBs at the end of the execution

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Dump the hottest TBs if -d tb_stats,dump_limit=N is used.

Example of output for the 3 hottest TBs:

TB id:1 | phys:0x34d54 virt:0x00034d54 flags:0xf0
| exec:4828932/0 guest inst cov:16.38%
| trans:1 ints: g:3 op:82 op_opt:34 spills:3
| h/g (host bytes / guest insts): 90.64
| time to gen at 2.4GHz => code:3150.83(ns) IR:712.08(ns)
| targets: 0x00034d5e (id:11), 0x00034d0d (id:2)

TB id:2 | phys:0x34d0d virt:0x00034d0d flags:0xf0
| exec:4825842/0 guest inst cov:21.82%
| trans:1 ints: g:4 op:80 op_opt:38 spills:2
| h/g (host bytes / guest insts): 84.00
| time to gen at 2.4GHz => code:3362.92(ns) IR:793.75(ns)
| targets: 0x00034d19 (id:12), 0x00034d54 (id:1)

TB id:3 | phys:0xec1c1 virt:0x000ec1c1 flags:0xb0
| exec:872032/0 guest inst cov:1.97%
| trans:1 ints: g:2 op:56 op_opt:26 spills:1
| h/g (host bytes / guest insts): 68.00
| time to gen at 2.4GHz => code:1692.08(ns) IR:473.75(ns)
| targets: 0x000ec1c5 (id:4), 0x000ec1cb (id:13)

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-12-vanderson...@gmail.com>
[AJB: fix authorship, ad softmmu support]
Signed-off-by: Alex Bennée 
---
 accel/tcg/tb-stats.c  | 21 +
 include/exec/tb-stats-dump.h  | 21 +
 include/exec/tb-stats-flags.h |  1 +
 linux-user/exit.c |  2 ++
 softmmu/runstate.c|  2 ++
 stubs/tb-stats.c  |  5 +
 util/log.c|  4 ++--
 7 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 include/exec/tb-stats-dump.h

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 69ac3d25cf..da85f397fd 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -19,6 +19,7 @@
 
 #include "exec/tb-stats.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats-dump.h"
 #include "tb-context.h"
 
 #include "internal.h"
@@ -33,6 +34,7 @@ enum TBStatsStatus {
 
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
+static int max_dump_tbs;
 /* only accessed in safe work */
 static GList *last_search;
 
@@ -618,6 +620,20 @@ void dump_tb_info(int id, int log_mask, bool use_monitor)
 }
 
 
+/*
+ * Dump the final stats
+ */
+void tb_stats_dump(void)
+{
+if (!tb_stats_collection_enabled()) {
+return;
+}
+
+dump_tbs_info(max_dump_tbs, SORT_BY_HOTNESS, false);
+}
+
+/* TBStatistic collection controls */
+
 void enable_collect_tb_stats(void)
 {
 tcg_collect_tb_stats = TB_STATS_RUNNING;
@@ -666,3 +682,8 @@ void set_default_tbstats_flag(uint32_t flags)
 {
 default_tbstats_flag = flags;
 }
+
+void set_tbstats_max_tbs(int max)
+{
+max_dump_tbs = max;
+}
diff --git a/include/exec/tb-stats-dump.h b/include/exec/tb-stats-dump.h
new file mode 100644
index 00..197c6148e9
--- /dev/null
+++ b/include/exec/tb-stats-dump.h
@@ -0,0 +1,21 @@
+/*
+ * TB Stats common dump functions across sysemu/linux-user
+ *
+ * Copyright (c) 2019 Linaro
+ *
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#ifndef _TB_STATS_DUMP_H_
+#define _TB_STATS_DUMP_H_
+
+/**
+ * tb_stats_dump: dump final tb_stats at end of execution
+ */
+#ifdef CONFIG_TCG
+void tb_stats_dump(void);
+#else
+static inline void tb_stats_dump(void) { /* do nothing */ };
+#endif
+
+#endif /* _TB_STATS_DUMP_H_ */
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index e64fe8caf1..6fd1f8ce33 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -25,6 +25,7 @@ void pause_collect_tb_stats(void);
 bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
+void set_tbstats_max_tbs(int max);
 uint32_t get_default_tbstats_flag(void);
 void set_default_tbstats_flag(uint32_t);
 
diff --git a/linux-user/exit.c b/linux-user/exit.c
index 3017d28a3c..4fd23bcf60 100644
--- a/linux-user/exit.c
+++ b/linux-user/exit.c
@@ -25,6 +25,7 @@
 #ifdef CONFIG_GPROF
 #include 
 #endif
+#include "exec/tb-stats-dump.h"
 
 #ifdef CONFIG_GCOV
 extern void __gcov_dump(void);
@@ -41,4 +42,5 @@ void preexit_cleanup(CPUArchState *env, int code)
 gdb_exit(code);
 qemu_plugin_user_exit();
 perf_exit();
+tb_stats_dump();
 }
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index d168b82469..6c18b19d2b 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -30,6 +30,7 @@
 #include "crypto/cipher.h"
 #include "crypto/init.h"
 #include "exec/cpu-common.h"
+#include "exec/tb-stats-dump.h"
 #include "gdbstub/syscalls.h"
 #include "hw/boards.h"
 #include "migration/misc.h"
@@ -819,6 +820,7 @@ void qemu_cleanup(void)
 vm_shutdown();
 replay_finish();
 
+tb_stats_dump();
 job_cancel_sync_all();
 bdrv_close_all();
 
diff --git a/stubs/tb-stats.c b/stubs/tb-stats.c
index d212c2

[PATCH v11 07/14] accel/tcg: convert profiling of code generation to TBStats

2023-04-21 Thread Fei Wu
From: Alex Bennée 

We continue the conversion of CONFIG_PROFILER data to TBStats by
measuring the time it takes to generate code. Instead of calculating
elapsed time as we do we simply store key timestamps in the profiler
structure and then calculate the totals and add them to TBStats under
lock.

Signed-off-by: Alex Bennée 
---
 accel/tcg/tb-stats.c  | 33 ---
 accel/tcg/translate-all.c | 69 ++-
 include/exec/tb-stats.h   |  7 
 include/tcg/tcg.h | 14 
 tcg/tcg.c | 17 --
 5 files changed, 59 insertions(+), 81 deletions(-)

diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index a2438a1f51..01adbac2a0 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -88,39 +88,6 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
 jpi->host / (double) jpi->translations);
 g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
 jpi->search_data / (double) jpi->translations);
-
-if (s) {
-int64_t tot = s->interm_time + s->code_time;
-g_string_append_printf(buf, "JIT cycles  %" PRId64
-" (%0.3f s at 2.4 GHz)\n",
-tot, tot / 2.4e9);
-g_string_append_printf(buf, "cycles/op   %0.1f\n",
-jpi->ops ? (double)tot / jpi->ops : 0);
-g_string_append_printf(buf, "cycles/in byte  %0.1f\n",
-jpi->guest ? (double)tot / jpi->guest : 0);
-g_string_append_printf(buf, "cycles/out byte %0.1f\n",
-jpi->host ? (double)tot / jpi->host : 0);
-g_string_append_printf(buf, "cycles/search byte %0.1f\n",
-jpi->search_data ? (double)tot / jpi->search_data : 0);
-if (tot == 0) {
-tot = 1;
-}
-g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
-(double)s->interm_time / tot * 100.0);
-g_string_append_printf(buf, "  gen_code time %0.1f%%\n",
-(double)s->code_time / tot * 100.0);
-g_string_append_printf(buf, "optim./code time%0.1f%%\n",
-(double)s->opt_time / (s->code_time ? s->code_time : 1)
-* 100.0);
-g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
-(double)s->la_time / (s->code_time ? s->code_time : 1)
-* 100.0);
-g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
-s->restore_count);
-g_string_append_printf(buf, "  avg cycles%0.1f\n",
-s->restore_count ?
-(double)s->restore_time / s->restore_count : 0);
-}
 }
 g_free(jpi);
 }
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index bf10987450..92285d0add 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -278,8 +278,9 @@ void page_init(void)
  */
 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
target_ulong pc, void *host_pc,
-   int *max_insns, int64_t *ti)
+   int *max_insns)
 {
+TCGProfile *prof = &tcg_ctx->prof;
 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
 if (unlikely(ret != 0)) {
 return ret;
@@ -293,11 +294,9 @@ static int setjmp_gen_code(CPUArchState *env, 
TranslationBlock *tb,
 tcg_ctx->cpu = NULL;
 *max_insns = tb->icount;
 
-#ifdef CONFIG_PROFILER
-qatomic_set(&tcg_ctx->prof.interm_time,
-tcg_ctx->prof.interm_time + profile_getclock() - *ti);
-*ti = profile_getclock();
-#endif
+if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+prof->gen_ir_done_time = profile_getclock();
+}
 
 return tcg_gen_code(tcg_ctx, tb, pc);
 }
@@ -348,7 +347,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tcg_insn_unit *gen_code_buf;
 int gen_code_size, search_size, max_insns;
 TCGProfile *prof = &tcg_ctx->prof;
-int64_t ti;
 void *host_pc;
 
 assert_memory_lock();
@@ -392,10 +390,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tcg_ctx->gen_tb = tb;
  tb_overflow:
 
-#ifdef CONFIG_PROFILER
-ti = profile_getclock();
-#endif
-
 trace_translate_block(tb, pc, tb->tc.ptr);
 
 /*
@@ -407,11 +401,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 if (tb_stats_collection_enabled() &&
 qemu_log_in_addr_range(tb->pc)) {
 tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+prof->gen_start_time = profile_getclock();
+}
 } else {
 tb->tb_stats = NULL;
 }
 
-gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
+gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns);
 if (unlikely(

[PATCH v11 06/14] accel/tcg: convert profiling of restore operations to TBStats

2023-04-21 Thread Fei Wu
From: Alex Bennée 

This starts the conversion of CONFIG_PROFILER data collection to under
the TBStats system. We introduce a new flag TB_JIT_TIME and start
tracking how much time is spent restoring execution state from a given
TB.

Signed-off-by: Alex Bennée 
---
 accel/tcg/translate-all.c | 23 ++-
 include/exec/tb-stats.h   |  6 ++
 include/qemu/timer.h  |  3 ---
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 8a51c291d8..bf10987450 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -200,10 +200,12 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
uintptr_t host_pc)
 {
 uint64_t data[TARGET_INSN_START_WORDS];
-#ifdef CONFIG_PROFILER
-TCGProfile *prof = &tcg_ctx->prof;
-int64_t ti = profile_getclock();
-#endif
+uint64_t ti = 0;
+
+if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ti = profile_getclock();
+}
+
 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 
 if (insns_left < 0) {
@@ -221,11 +223,14 @@ void cpu_restore_state_from_tb(CPUState *cpu, 
TranslationBlock *tb,
 
 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
 
-#ifdef CONFIG_PROFILER
-qatomic_set(&prof->restore_time,
-prof->restore_time + profile_getclock() - ti);
-qatomic_set(&prof->restore_count, prof->restore_count + 1);
-#endif
+if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+TBStatistics *ts = tb->tb_stats;
+uint64_t elapsed = profile_getclock() - ti;
+qemu_mutex_lock(&ts->jit_stats_lock);
+ts->tb_restore_time += elapsed;
+ts->tb_restore_count++;
+qemu_mutex_unlock(&ts->jit_stats_lock);
+}
 }
 
 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 0ccf025ae4..80314c50f9 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -91,6 +91,11 @@ struct TBStatistics {
  * this TBStats structure. Has to be reset on a tb_flush.
  */
 GPtrArray *tbs;
+
+/* Recover state from TB */
+uint64_t tb_restore_time;
+uint64_t tb_restore_count;
+
 };
 
 bool tb_stats_cmp(const void *ap, const void *bp);
@@ -102,6 +107,7 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf);
 #define TB_NOTHING(1 << 0)
 #define TB_EXEC_STATS (1 << 1)
 #define TB_JIT_STATS  (1 << 2)
+#define TB_JIT_TIME   (1 << 3)
 
 void enable_collect_tb_stats(void);
 void disable_collect_tb_stats(void);
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index d86fc73a17..ad0da18a5f 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -989,7 +989,6 @@ static inline int64_t cpu_get_host_ticks(void)
 }
 #endif
 
-#ifdef CONFIG_PROFILER
 static inline int64_t profile_getclock(void)
 {
 return get_clock();
@@ -997,5 +996,3 @@ static inline int64_t profile_getclock(void)
 
 extern uint64_t dev_time;
 #endif
-
-#endif
-- 
2.25.1




[PATCH v11 08/14] accel: adding TB_JIT_TIME and full replacing CONFIG_PROFILER

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

Replace all others CONFIG_PROFILER statistics and migrate it to
TBStatistics system. However, TCGProfiler still exists and can
be use to store global statistics and times. All TB related
statistics goes to TBStatistics.

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-6-vanderson...@gmail.com>
[AJB: fix authorship]
Signed-off-by: Alex Bennée 
---
 accel/tcg/monitor.c   |  29 +++
 accel/tcg/tb-stats.c  |  78 +
 accel/tcg/tcg-accel-ops.c |  15 +++---
 include/tcg/tcg.h |   5 +-
 softmmu/runstate.c|   8 +--
 tcg/tcg.c | 100 ++
 6 files changed, 123 insertions(+), 112 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index c9eec426ff..5f9dba56e3 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -14,6 +14,7 @@
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
+#include "exec/tb-stats.h"
 #include "internal.h"
 
 
@@ -55,6 +56,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
 
+if (!tb_stats_collection_enabled()) {
+error_report("TB information not being recorded.");
+return NULL;
+}
+
 if (!tcg_enabled()) {
 error_setg(errp,
"Opcode count information is only available with 
accel=tcg");
@@ -66,34 +72,15 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 return human_readable_text_from_str(buf);
 }
 
-#ifdef CONFIG_PROFILER
-
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
-static int64_t last_cpu_exec_time;
-int64_t cpu_exec_time;
-int64_t delta;
-
-cpu_exec_time = tcg_cpu_exec_time();
-delta = cpu_exec_time - last_cpu_exec_time;
-
-g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
-   dev_time, dev_time / 
(double)NANOSECONDS_PER_SECOND);
-g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
-   delta, delta / (double)NANOSECONDS_PER_SECOND);
-last_cpu_exec_time = cpu_exec_time;
+
+dump_jit_exec_time_info(dev_time);
 dev_time = 0;
 
 return human_readable_text_from_str(buf);
 }
-#else
-HumanReadableText *qmp_x_query_profile(Error **errp)
-{
-error_setg(errp, "Internal profiler not compiled");
-return NULL;
-}
-#endif
 
 static void hmp_tcg_register(void)
 {
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 01adbac2a0..3cd7f6fc06 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -13,6 +13,7 @@
 #include "tcg/tcg.h"
 
 #include "qemu/qemu-print.h"
+#include "qemu/timer.h"
 
 #include "exec/tb-stats.h"
 #include "tb-context.h"
@@ -41,6 +42,13 @@ struct jit_profile_info {
 uint64_t host;
 uint64_t guest;
 uint64_t search_data;
+
+uint64_t interm_time;
+uint64_t code_time;
+uint64_t restore_count;
+uint64_t restore_time;
+uint64_t opt_time;
+uint64_t la_time;
 };
 
 /* accumulate the statistics from all TBs */
@@ -62,6 +70,35 @@ static void collect_jit_profile_info(void *p, uint32_t hash, 
void *userp)
 jpi->host += tbs->code.out_len;
 jpi->guest += tbs->code.in_len;
 jpi->search_data += tbs->code.search_out_len;
+
+jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
+jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
+jpi->la_time += stat_per_translation(tbs, gen_times.la);
+jpi->code_time += stat_per_translation(tbs, gen_times.code);
+
+/*
+ * The restore time covers how long we have spent restoring state
+ * from a given TB (e.g. recovering from a fault). It is therefor
+ * not related to the number of translations we have done.
+ */
+jpi->restore_time += tbs->tb_restore_time;
+jpi->restore_count += tbs->tb_restore_count;
+}
+
+void dump_jit_exec_time_info(uint64_t dev_time)
+{
+static uint64_t last_cpu_exec_time;
+uint64_t cpu_exec_time;
+uint64_t delta;
+
+cpu_exec_time = tcg_cpu_exec_time();
+delta = cpu_exec_time - last_cpu_exec_time;
+
+qemu_printf("async time  %" PRId64 " (%0.3f)\n",
+   dev_time, dev_time / (double) NANOSECONDS_PER_SECOND);
+qemu_printf("qemu time   %" PRId64 " (%0.3f)\n",
+   delta, delta / (double) NANOSECONDS_PER_SECOND);
+last_cpu_exec_time = cpu_exec_time;
 }
 
 /* dump JIT statisticis using TCGProfile and TBStats */
@@ -88,6 +125,47 @@ void dump_jit_profile_info(TCGProfile *s, GString *buf)
 jpi->host / (double) jpi->translations);
 g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
 jpi->search_data / (double) jpi->translations);
+
+uint64_t tot = jpi->interm_time + jpi->code_time;
+
+g_string_append_printf(buf, "JIT cycles  %" PRId64
+" (%0.3fs at 2.4 GHz)\n",

[PATCH v11 05/14] accel/tcg: move profiler dev_time to tb_stats

2023-04-21 Thread Fei Wu
From: Alex Bennée 

This shouldn't live in the monitor code anyway. While we are at it
make it an uint64_t as we won't be dealing in negative numbers.

Signed-off-by: Alex Bennée 
---
 accel/tcg/monitor.c  | 2 --
 accel/tcg/tb-stats.c | 2 ++
 include/qemu/timer.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index 1450e160e9..c9eec426ff 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -68,8 +68,6 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
 
 #ifdef CONFIG_PROFILER
 
-int64_t dev_time;
-
 HumanReadableText *qmp_x_query_profile(Error **errp)
 {
 g_autoptr(GString) buf = g_string_new("");
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 434b235edb..a2438a1f51 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -28,6 +28,8 @@ enum TBStatsStatus {
 static enum TBStatsStatus tcg_collect_tb_stats;
 static uint32_t default_tbstats_flag;
 
+uint64_t dev_time;
+
 struct jit_profile_info {
 uint64_t translations;
 uint64_t aborted;
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index ee071e07d1..d86fc73a17 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -995,7 +995,7 @@ static inline int64_t profile_getclock(void)
 return get_clock();
 }
 
-extern int64_t dev_time;
+extern uint64_t dev_time;
 #endif
 
 #endif
-- 
2.25.1




[PATCH v11 03/14] accel: collecting JIT statistics

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

If a TB has a TBS (TBStatistics) with the TB_JIT_STATS enabled then we
collect statistics of its translation processes and code translation.

To help with collection we include the TCGProfile structure
unconditionally. It will have further alterations in future commits.

Collecting the number of host instructions seems to be not simple as
it would imply in having to modify several target source files. So,
for now, we are only collecting the size of the host gen code.

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-4-vanderson...@gmail.com>
Signed-off-by: Alex Bennée 
---
 accel/tcg/translate-all.c | 29 +++--
 accel/tcg/translator.c|  3 +++
 include/exec/tb-stats.h   | 24 
 include/tcg/tcg.h | 22 --
 tcg/tcg.c |  9 +++--
 5 files changed, 81 insertions(+), 6 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index afc89d5692..1cd051ca7c 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -309,6 +309,8 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, 
target_ulong pc,
 new_stats->cs_base = cs_base;
 new_stats->flags = flags;
 new_stats->stats_enabled = get_default_tbstats_flag();
+new_stats->tbs = g_ptr_array_sized_new(4);
+qemu_mutex_init(&new_stats->jit_stats_lock);
 
 /*
  * All initialisation must be complete before we insert into qht
@@ -322,6 +324,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, 
target_ulong pc,
  * If there is already a TBStatistic for this TB from a previous flush
  * then just make the new TB point to the older TBStatistic
  */
+g_ptr_array_free(new_stats->tbs, true);
 g_free(new_stats);
 return existing_stats;
 } else {
@@ -340,9 +343,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tb_page_addr_t phys_pc;
 tcg_insn_unit *gen_code_buf;
 int gen_code_size, search_size, max_insns;
-#ifdef CONFIG_PROFILER
 TCGProfile *prof = &tcg_ctx->prof;
-#endif
 int64_t ti;
 void *host_pc;
 
@@ -571,6 +572,30 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 return tb;
 }
 
+/*
+ * Collect JIT stats when enabled. We batch them all up here to
+ * avoid spamming the cache with atomic accesses
+ */
+if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+TBStatistics *ts = tb->tb_stats;
+qemu_mutex_lock(&ts->jit_stats_lock);
+
+ts->code.num_guest_inst += prof->translation.nb_guest_insns;
+ts->code.num_tcg_ops += prof->translation.nb_ops_pre_opt;
+ts->code.num_tcg_ops_opt += tcg_ctx->nb_ops;
+ts->code.spills += prof->translation.nb_spills;
+ts->code.out_len += tb->tc.size;
+
+ts->translations.total++;
+if (tb_page_addr1(tb) != -1) {
+ts->translations.spanning++;
+}
+
+g_ptr_array_add(ts->tbs, tb);
+
+qemu_mutex_unlock(&ts->jit_stats_lock);
+}
+
 /*
  * Insert TB into the corresponding region tree before publishing it
  * through QHT. Otherwise rewinding happened in the TB might fail to
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 165072c8c3..40a56962b1 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -132,6 +132,9 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, 
int *max_insns,
 tb->size = db->pc_next - db->pc_first;
 tb->icount = db->num_insns;
 
+/* Save number of guest instructions for TB_JIT_STATS */
+tcg_ctx->prof.translation.nb_guest_insns = db->num_insns;
+
 #ifdef DEBUG_DISAS
 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
 && qemu_log_in_addr_range(db->pc_first)) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 27a233b7f8..3506c8d4f7 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -59,6 +59,29 @@ struct TBStatistics {
 unsigned long atomic;
 } executions;
 
+/* JIT Stats - protected by lock */
+QemuMutex jit_stats_lock;
+
+/* Sum of all operations for all translations */
+struct {
+unsigned num_guest_inst;
+unsigned num_tcg_ops;
+unsigned num_tcg_ops_opt;
+unsigned spills;
+unsigned out_len;
+} code;
+
+struct {
+unsigned long total;
+unsigned long uncached;
+unsigned long spanning;
+} translations;
+
+/*
+ * All persistent (cached) TranslationBlocks using
+ * this TBStats structure. Has to be reset on a tb_flush.
+ */
+GPtrArray *tbs;
 };
 
 bool tb_stats_cmp(const void *ap, const void *bp);
@@ -67,6 +90,7 @@ void init_tb_stats_htable(void);
 
 #define TB_NOTHING(1 << 0)
 #define TB_EXEC_STATS (1 << 1)
+#define TB_JIT_STATS  (1 << 2)
 
 void enable_collect_tb_stats(void);
 void disable_collect_tb_stats(void);
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 5cfa

[PATCH v11 01/14] accel/tcg: introduce TBStatistics structure

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

To store statistics for each TB, we created a TBStatistics structure
which is linked with the TBs. TBStatistics can stay alive after
tb_flush and be relinked to a regenerated TB. So the statistics can
be accumulated even through flushes.

The goal is to have all present and future qemu/tcg statistics and
meta-data stored in this new structure.

Reviewed-by: Alex Bennée 
Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-2-vanderson...@gmail.com>
[AJB: fix git author, review comments]
Signed-off-by: Alex Bennée 
---
 accel/tcg/meson.build |  1 +
 accel/tcg/tb-context.h|  1 +
 accel/tcg/tb-hash.h   |  7 +
 accel/tcg/tb-maint.c  | 19 
 accel/tcg/tb-stats.c  | 58 +
 accel/tcg/translate-all.c | 43 +++
 include/exec/exec-all.h   |  3 ++
 include/exec/tb-stats.h   | 61 +++
 8 files changed, 193 insertions(+)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats.h

diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index aeb20a6ef0..9263bdde11 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -4,6 +4,7 @@ tcg_ss.add(files(
   'cpu-exec-common.c',
   'cpu-exec.c',
   'tb-maint.c',
+  'tb-stats.c',
   'tcg-runtime-gvec.c',
   'tcg-runtime.c',
   'translate-all.c',
diff --git a/accel/tcg/tb-context.h b/accel/tcg/tb-context.h
index cac62d9749..d7910d586b 100644
--- a/accel/tcg/tb-context.h
+++ b/accel/tcg/tb-context.h
@@ -35,6 +35,7 @@ struct TBContext {
 /* statistics */
 unsigned tb_flush_count;
 unsigned tb_phys_invalidate_count;
+struct qht tb_stats;
 };
 
 extern TBContext tb_ctx;
diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 83dc610e4c..87d657a1c6 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -67,4 +67,11 @@ uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong 
pc, uint32_t flags,
 return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }
 
+static inline
+uint32_t tb_stats_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
+uint32_t flags)
+{
+return qemu_xxhash5(phys_pc, pc, flags);
+}
+
 #endif
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 7246c1c46b..ba1635aa4b 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -23,6 +23,7 @@
 #include "exec/log.h"
 #include "exec/exec-all.h"
 #include "exec/tb-flush.h"
+#include "exec/tb-stats.h"
 #include "exec/translate-all.h"
 #include "sysemu/tcg.h"
 #include "tcg/tcg.h"
@@ -40,6 +41,23 @@
 #define TB_FOR_EACH_JMP(head_tb, tb, n) \
 TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
 
+/*
+ * This is the more or less the same compare as tb_cmp(), but the
+ * data persists over tb_flush. We also aggregate the various
+ * variations of cflags under one record and ignore the details of
+ * page overlap (although we can count it).
+ */
+bool tb_stats_cmp(const void *ap, const void *bp)
+{
+const TBStatistics *a = ap;
+const TBStatistics *b = bp;
+
+return a->phys_pc == b->phys_pc &&
+a->pc == b->pc &&
+a->cs_base == b->cs_base &&
+a->flags == b->flags;
+}
+
 static bool tb_cmp(const void *ap, const void *bp)
 {
 const TranslationBlock *a = ap;
@@ -59,6 +77,7 @@ void tb_htable_init(void)
 unsigned int mode = QHT_MODE_AUTO_RESIZE;
 
 qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
+init_tb_stats_htable();
 }
 
 typedef struct PageDesc PageDesc;
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
new file mode 100644
index 00..f78a21f522
--- /dev/null
+++ b/accel/tcg/tb-stats.c
@@ -0,0 +1,58 @@
+/*
+ * QEMU System Emulator, Code Quality Monitor System
+ *
+ * Copyright (c) 2019 Vanderson M. do Rosario 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "disas/disas.h"
+
+#include "exec/tb-stats.h"
+#include "tb-context.h"
+
+/* TBStatistic collection controls */
+enum TBStatsStatus {
+TB_STATS_DISABLED = 0,
+TB_STATS_RUNNING,
+TB_STATS_PAUSED,
+TB_STATS_STOPPED
+};
+
+static enum TBStatsStatus tcg_collect_tb_stats;
+
+void init_tb_stats_htable(void)
+{
+if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
+qht_init(&tb_ctx.tb_stats, tb_stats_cmp,
+CODE_GEN_HTABLE_SIZE, QHT_MODE_AUTO_RESIZE);
+}
+}
+
+void enable_collect_tb_stats(void)
+{
+tcg_collect_tb_stats = TB_STATS_RUNNING;
+init_tb_stats_htable();
+}
+
+void disable_collect_tb_stats(void)
+{
+tcg_collect_tb_stats = TB_STATS_PAUSED;
+}
+
+void pause_collect_tb_stats(void)
+{
+tcg_collect_tb_stats = TB_STATS_STOPPED;
+}
+
+bool tb_stats_collection_enabled(void)
+{
+return tcg_collect_tb_stats == TB_STATS_RUNNING;
+}
+
+bool tb_stats_collection_paused(void)
+{
+return tcg_collect_tb_stats == TB_STATS_PAUSED;

[PATCH v11 02/14] accel: collecting TB execution count

2023-04-21 Thread Fei Wu
From: "Vanderson M. do Rosario" 

If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of this TB
to atomically count the number of times it is executed.
We count both the number of "normal" executions and atomic
executions of a TB.

The execution count of the TB is stored in its respective
TBS.

All TBStatistics are created by default with the flags from
default_tbstats_flag.

Signed-off-by: Vanderson M. do Rosario 
Message-Id: <20190829173437.5926-3-vanderson...@gmail.com>
[AJB: Fix author]
Signed-off-by: Alex Bennée 
---
 accel/tcg/cpu-exec.c  |  6 ++
 accel/tcg/tb-stats.c  |  6 ++
 accel/tcg/tcg-runtime.c   |  8 
 accel/tcg/tcg-runtime.h   |  2 ++
 accel/tcg/translate-all.c |  7 +--
 accel/tcg/translator.c| 10 ++
 include/exec/gen-icount.h |  1 +
 include/exec/tb-stats.h   | 18 ++
 8 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index c815f2dbfd..d89f9fe493 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "tcg/tcg.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu.h"
@@ -564,7 +565,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
 mmap_unlock();
 }
 
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+tb->tb_stats->executions.atomic++;
+}
+
 cpu_exec_enter(cpu);
+
 /* execute the generated code */
 trace_exec_tb(tb, pc);
 cpu_tb_exec(cpu, tb, &tb_exit);
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index f78a21f522..b1d4341b6f 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -22,6 +22,7 @@ enum TBStatsStatus {
 };
 
 static enum TBStatsStatus tcg_collect_tb_stats;
+static uint32_t default_tbstats_flag;
 
 void init_tb_stats_htable(void)
 {
@@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
 {
 return tcg_collect_tb_stats == TB_STATS_PAUSED;
 }
+
+uint32_t get_default_tbstats_flag(void)
+{
+return default_tbstats_flag;
+}
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index e4e030043f..44a349b334 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -27,6 +27,7 @@
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
+#include "exec/tb-stats.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"
@@ -148,3 +149,10 @@ void HELPER(exit_atomic)(CPUArchState *env)
 {
 cpu_loop_exit_atomic(env_cpu(env), GETPC());
 }
+
+void HELPER(inc_exec_freq)(void *ptr)
+{
+TBStatistics *stats = (TBStatistics *) ptr;
+tcg_debug_assert(stats);
+qatomic_inc(&stats->executions.normal);
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index e141a6ab24..66954aa210 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -39,6 +39,8 @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
 #endif /* IN_HELPER_PROTO */
 
+DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
+
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
 DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 0fcde2e84a..afc89d5692 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -308,6 +308,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, 
target_ulong pc,
 new_stats->pc = pc;
 new_stats->cs_base = cs_base;
 new_stats->flags = flags;
+new_stats->stats_enabled = get_default_tbstats_flag();
 
 /*
  * All initialisation must be complete before we insert into qht
@@ -397,9 +398,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 /*
  * We want to fetch the stats structure before we start code
  * generation so we can count interesting things about this
- * generation.
+ * generation. If dfilter is in effect we will only collect stats
+ * for the specified range.
  */
-if (tb_stats_collection_enabled()) {
+if (tb_stats_collection_enabled() &&
+qemu_log_in_addr_range(tb->pc)) {
 tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
 } else {
 tb->tb_stats = NULL;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 7bda43ff61..165072c8c3 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -18,6 +18,15 @@
 #include "exec/plugin-gen.h"
 #include "exec/replay-core.h"
 
+static inline void gen_tb_exec_count(TranslationBlock *tb)
+{
+if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+TCGv_ptr ptr = tcg_temp_new_ptr();
+tcg_gen_movi_ptr(ptr, (intptr_t)tb->tb_stats);
+gen_helper_inc_exec_freq(ptr);
+}
+}
+
 bool translator_use_goto_tb(DisasContextBase 

[PATCH v11 00/14] TCG code quality tracking

2023-04-21 Thread Fei Wu
This patch series were done by Vanderson and Alex originally in 2019, I
(Fei Wu) rebased them on latest upstream from:
https://github.com/stsquad/qemu/tree/tcg/tbstats-and-perf-v10
and send out this review per Alex's request, I will continue to address
any future review comments here. As it's been a very long time and there
are lots of conflicts during rebase, it's my fault if I introduce any
problems during the process.

Alex in his v10 series has addressed part of v9 comments:
   https://www.mail-archive.com/qemu-devel@nongnu.org/msg650258.html
and I tried to figure out some others. It looks to me not all of v9
comments had made complete agreement and I didn't join that discussion
then, so please comment to this series and I will fix them up if
necessary.


Alex Bennée (5):
  accel/tcg: move profiler dev_time to tb_stats
  accel/tcg: convert profiling of restore operations to TBStats
  accel/tcg: convert profiling of code generation to TBStats
  tb-stats: reset the tracked TBs on a tb_flush
  configure: remove the final bits of --profiler support

Vanderson M. do Rosario (9):
  accel/tcg: introduce TBStatistics structure
  accel: collecting TB execution count
  accel: collecting JIT statistics
  accel: replacing part of CONFIG_PROFILER with TBStats
  accel: adding TB_JIT_TIME and full replacing CONFIG_PROFILER
  debug: add -d tb_stats to control TBStatistics collection:
  monitor: adding tb_stats hmp command
  Adding info [tb-list|tb] commands to HMP (WIP)
  tb-stats: dump hot TBs at the end of the execution

 accel/tcg/cpu-exec.c  |   6 +
 accel/tcg/meson.build |   1 +
 accel/tcg/monitor.c   | 133 +--
 accel/tcg/tb-context.h|   1 +
 accel/tcg/tb-hash.h   |   7 +
 accel/tcg/tb-maint.c  |  20 +
 accel/tcg/tb-stats.c  | 689 ++
 accel/tcg/tcg-accel-ops.c |  15 +-
 accel/tcg/tcg-runtime.c   |   8 +
 accel/tcg/tcg-runtime.h   |   2 +
 accel/tcg/translate-all.c | 147 ++--
 accel/tcg/translator.c|  13 +
 disas.c   |  31 +-
 hmp-commands-info.hx  |  16 +
 hmp-commands.hx   |  16 +
 include/exec/exec-all.h   |   3 +
 include/exec/gen-icount.h |   2 +
 include/exec/tb-stats-dump.h  |  21 ++
 include/exec/tb-stats-flags.h |  32 ++
 include/exec/tb-stats.h   | 165 
 include/monitor/hmp.h |   3 +
 include/qemu/log-for-trace.h  |   6 +-
 include/qemu/log.h|   3 +
 include/qemu/timer.h  |   5 +-
 include/tcg/tcg.h |  50 ++-
 linux-user/exit.c |   2 +
 meson.build   |   2 -
 meson_options.txt |   2 -
 scripts/meson-buildoptions.sh |   3 -
 softmmu/runstate.c|  10 +-
 stubs/meson.build |   1 +
 stubs/tb-stats.c  |  32 ++
 tcg/tcg.c | 224 +++
 util/log.c| 103 -
 34 files changed, 1482 insertions(+), 292 deletions(-)
 create mode 100644 accel/tcg/tb-stats.c
 create mode 100644 include/exec/tb-stats-dump.h
 create mode 100644 include/exec/tb-stats-flags.h
 create mode 100644 include/exec/tb-stats.h
 create mode 100644 stubs/tb-stats.c

-- 
2.25.1




[PATCH 1/2] accel/tcg/plugin: export host insn size

2023-04-05 Thread Fei Wu
The translation ratio of host to guest instruction count is one of the
key performance factor of binary translation. TCG doesn't collect host
instruction count at present, it does collect host instruction size
instead, although they are not the same thing as instruction size might
not be fixed, instruction size is still a valid estimation.

Signed-off-by: Fei Wu 
---
 accel/tcg/plugin-gen.c   | 1 +
 include/qemu/plugin.h| 2 ++
 include/qemu/qemu-plugin.h   | 8 
 plugins/api.c| 5 +
 plugins/qemu-plugins.symbols | 1 +
 5 files changed, 17 insertions(+)

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 5efb8db258..4a3ca8fa2f 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -881,6 +881,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const 
DisasContextBase *db,
 ptb->haddr2 = NULL;
 ptb->mem_only = mem_only;
 ptb->mem_helper = false;
+ptb->host_insn_size = &db->tb->tc.size;
 
 plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
 }
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index bc0781cab8..b38fd139e1 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -151,6 +151,8 @@ struct qemu_plugin_tb {
 /* if set, the TB calls helpers that might access guest memory */
 bool mem_helper;
 
+uint64_t *host_insn_size;
+
 GArray *cbs[PLUGIN_N_CB_SUBTYPES];
 };
 
diff --git a/include/qemu/qemu-plugin.h b/include/qemu/qemu-plugin.h
index 50a9957279..2397574a21 100644
--- a/include/qemu/qemu-plugin.h
+++ b/include/qemu/qemu-plugin.h
@@ -336,6 +336,14 @@ void qemu_plugin_register_vcpu_insn_exec_inline(struct 
qemu_plugin_insn *insn,
  */
 size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb *tb);
 
+/**
+ * qemu_plugin_tb_n_insns() - query helper for host insns size in TB
+ * @tb: opaque handle to TB passed to callback
+ *
+ * Returns: address of host insns size of this block
+ */
+void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb);
+
 /**
  * qemu_plugin_tb_vaddr() - query helper for vaddr of TB start
  * @tb: opaque handle to TB passed to callback
diff --git a/plugins/api.c b/plugins/api.c
index 2078b16edb..0d70cb1f0f 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -188,6 +188,11 @@ size_t qemu_plugin_tb_n_insns(const struct qemu_plugin_tb 
*tb)
 return tb->n;
 }
 
+void *qemu_plugin_tb_host_insn_size(const struct qemu_plugin_tb *tb)
+{
+return tb->host_insn_size;
+}
+
 uint64_t qemu_plugin_tb_vaddr(const struct qemu_plugin_tb *tb)
 {
 return tb->vaddr;
diff --git a/plugins/qemu-plugins.symbols b/plugins/qemu-plugins.symbols
index 71f6c90549..3e92c3b8ba 100644
--- a/plugins/qemu-plugins.symbols
+++ b/plugins/qemu-plugins.symbols
@@ -39,6 +39,7 @@
   qemu_plugin_start_code;
   qemu_plugin_tb_get_insn;
   qemu_plugin_tb_n_insns;
+  qemu_plugin_tb_host_insn_size;
   qemu_plugin_tb_vaddr;
   qemu_plugin_uninstall;
   qemu_plugin_vcpu_for_each;
-- 
2.25.1




[PATCH 2/2] plugins/hotblocks: add host insn size

2023-04-05 Thread Fei Wu
It's only valid when inline=false, otherwise it's default to 0.

Signed-off-by: Fei Wu 
---
 contrib/plugins/hotblocks.c | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c
index 062200a7a4..c9716da7fe 100644
--- a/contrib/plugins/hotblocks.c
+++ b/contrib/plugins/hotblocks.c
@@ -37,6 +37,8 @@ typedef struct {
 uint64_t exec_count;
 int  trans_count;
 unsigned long insns;
+void*p_host_insn_size;
+uint64_t host_insn_size;
 } ExecCount;
 
 static gint cmp_exec_count(gconstpointer a, gconstpointer b)
@@ -59,13 +61,17 @@ static void plugin_exit(qemu_plugin_id_t id, void *p)
 it = g_list_sort(counts, cmp_exec_count);
 
 if (it) {
-g_string_append_printf(report, "pc, tcount, icount, ecount\n");
+g_string_append_printf(report,
+   "host isize is only valid when inline=false\n"
+   "pc, tcount, icount, ecount, host isize\n");
 
 for (i = 0; i < limit && it->next; i++, it = it->next) {
 ExecCount *rec = (ExecCount *) it->data;
-g_string_append_printf(report, "0x%016"PRIx64", %d, %ld, 
%"PRId64"\n",
+g_string_append_printf(report, "0x%016"PRIx64", %d, %ld, %"PRId64
+   ", %"PRIu64"\n",
rec->start_addr, rec->trans_count,
-   rec->insns, rec->exec_count);
+   rec->insns, rec->exec_count,
+   rec->host_insn_size);
 }
 
 g_list_free(it);
@@ -82,14 +88,13 @@ static void plugin_init(void)
 
 static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
 {
-ExecCount *cnt;
-uint64_t hash = (uint64_t) udata;
+ExecCount *cnt = (ExecCount *) udata;
 
 g_mutex_lock(&lock);
-cnt = (ExecCount *) g_hash_table_lookup(hotblocks, (gconstpointer) hash);
-/* should always succeed */
-g_assert(cnt);
 cnt->exec_count++;
+if (cnt->host_insn_size == 0) {
+cnt->host_insn_size = *((uint64_t *)cnt->p_host_insn_size);
+}
 g_mutex_unlock(&lock);
 }
 
@@ -114,6 +119,7 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct 
qemu_plugin_tb *tb)
 cnt->start_addr = pc;
 cnt->trans_count = 1;
 cnt->insns = insns;
+cnt->p_host_insn_size = qemu_plugin_tb_host_insn_size(tb);
 g_hash_table_insert(hotblocks, (gpointer) hash, (gpointer) cnt);
 }
 
@@ -125,7 +131,7 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct 
qemu_plugin_tb *tb)
 } else {
 qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
  QEMU_PLUGIN_CB_NO_REGS,
- (void *)hash);
+ (void *)cnt);
 }
 }
 
-- 
2.25.1




[PATCH 0/2] accel/tcg/plugin: host insn size for plugin

2023-04-05 Thread Fei Wu
The translation ratio of host to guest instruction count is one of the
key performance factor of binary translation. It's better to have this
kind of information exported to plugin for analysis. As the host insn
size is not determined at guest->IR time, its address is recorded for
later dereference, and plugin inline mode is not supported.

Here is an example of the output with modified plugin hotblocks:

pc, tcount, icount, ecount, host isize
0x8041ad6c, 1, 9, 130450345, 456
0x800084f0, 1, 9, 88273714, 264
0x800084e4, 1, 3, 88264146, 135
0x8041abd0, 1, 1, 46032689, 123
0x8041ab3c, 1, 1, 46021650, 123
0x8045ffe8, 1, 5, 40927215, 328

Fei Wu (2):
  accel/tcg/plugin: export host insn size
  plugins/hotblocks: add host insn size

 accel/tcg/plugin-gen.c   |  1 +
 contrib/plugins/hotblocks.c  | 24 +++-
 include/qemu/plugin.h|  2 ++
 include/qemu/qemu-plugin.h   |  8 
 plugins/api.c|  5 +
 plugins/qemu-plugins.symbols |  1 +
 6 files changed, 32 insertions(+), 9 deletions(-)

-- 
2.25.1




[PATCH v5 2/2] target/riscv: reduce overhead of MSTATUS_SUM change

2023-03-24 Thread Fei Wu
Kernel needs to access user mode memory e.g. during syscalls, the window
is usually opened up for a very limited time through MSTATUS.SUM, the
overhead is too much if tlb_flush() gets called for every SUM change.

This patch creates a separate MMU index for S+SUM, so that it's not
necessary to flush tlb anymore when SUM changes. This is similar to how
ARM handles Privileged Access Never (PAN).

Result of 'pipe 10' from unixbench boosts from 223656 to 1705006. Many
other syscalls benefit a lot from this too.

Signed-off-by: Fei Wu 
---
 target/riscv/cpu.h  |  1 -
 target/riscv/cpu_helper.c   | 17 +++--
 target/riscv/csr.c  |  3 +--
 target/riscv/insn_trans/trans_rvh.c.inc |  4 ++--
 target/riscv/internals.h| 14 ++
 target/riscv/op_helper.c|  5 +++--
 6 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index ac3eb9abca..09d3b0a083 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -623,7 +623,6 @@ G_NORETURN void riscv_raise_exception(CPURISCVState *env,
 target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
 void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
 
-#define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS
 #define TB_FLAGS_MSTATUS_VS MSTATUS_VS
 
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 4e275b904a..4a6097f133 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -21,6 +21,7 @@
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
+#include "internals.h"
 #include "pmu.h"
 #include "exec/exec-all.h"
 #include "instmap.h"
@@ -36,7 +37,19 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 #ifdef CONFIG_USER_ONLY
 return 0;
 #else
-return env->priv;
+if (ifetch) {
+return env->priv;
+}
+
+/* All priv -> mmu_idx mapping are here */
+int mode = env->priv;
+if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) {
+mode = get_field(env->mstatus, MSTATUS_MPP);
+}
+if (mode == PRV_S && get_field(env->mstatus, MSTATUS_SUM)) {
+return MMUIdx_S_SUM;
+}
+return mode;
 #endif
 }
 
@@ -598,7 +611,7 @@ void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool 
enable)
 
 bool riscv_cpu_two_stage_lookup(int mmu_idx)
 {
-return mmu_idx & TB_FLAGS_PRIV_HYP_ACCESS_MASK;
+return mmu_idx & MMU_HYP_ACCESS_BIT;
 }
 
 int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts)
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index d522efc0b6..f74e40e66d 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1246,8 +1246,7 @@ static RISCVException write_mstatus(CPURISCVState *env, 
int csrno,
 RISCVMXL xl = riscv_cpu_mxl(env);
 
 /* flush tlb on mstatus fields that affect VM */
-if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
-MSTATUS_MPRV | MSTATUS_SUM)) {
+if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPV)) {
 tlb_flush(env_cpu(env));
 }
 mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
diff --git a/target/riscv/insn_trans/trans_rvh.c.inc 
b/target/riscv/insn_trans/trans_rvh.c.inc
index 9248b48c36..15842f4282 100644
--- a/target/riscv/insn_trans/trans_rvh.c.inc
+++ b/target/riscv/insn_trans/trans_rvh.c.inc
@@ -40,7 +40,7 @@ static bool do_hlv(DisasContext *ctx, arg_r2 *a, MemOp mop)
 if (check_access(ctx)) {
 TCGv dest = dest_gpr(ctx, a->rd);
 TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
-int mem_idx = ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK;
+int mem_idx = ctx->mem_idx | MMU_HYP_ACCESS_BIT;
 tcg_gen_qemu_ld_tl(dest, addr, mem_idx, mop);
 gen_set_gpr(ctx, a->rd, dest);
 }
@@ -87,7 +87,7 @@ static bool do_hsv(DisasContext *ctx, arg_r2_s *a, MemOp mop)
 if (check_access(ctx)) {
 TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
 TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
-int mem_idx = ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK;
+int mem_idx = ctx->mem_idx | MMU_HYP_ACCESS_BIT;
 tcg_gen_qemu_st_tl(data, addr, mem_idx, mop);
 }
 return true;
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 5620fbffb6..b55152a7dc 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -21,6 +21,20 @@
 
 #include "hw/registerfields.h"
 
+/*
+ * The current MMU Modes are:
+ *  - U 0b000
+ *  - S 0b001
+ *  - S+SUM 0b010
+ *  - M 0b011
+ *  - HLV/HLVX/HSV adds 0b100
+ */
+#define MMUIdx_U0
+#define MMUIdx_S1
+#define MMUIdx_S_SUM2
+#define MMUIdx_M3
+#define MMU

[PATCH v5 0/2] target/riscv: reduce MSTATUS_SUM overhead

2023-03-24 Thread Fei Wu
v4 -> v5:
* add priv to tb_flags

v3 -> v4:
* seperate priv from mmu_idx
* use index 2 for S+SUM mmu_idx
* no tlb_flush for MPRV / MPP changes


Fei Wu (2):
  target/riscv: separate priv from mmu_idx
  target/riscv: reduce overhead of MSTATUS_SUM change

 target/riscv/cpu.h|  3 +--
 target/riscv/cpu_helper.c | 21 ---
 target/riscv/csr.c|  3 +--
 .../riscv/insn_trans/trans_privileged.c.inc   |  2 +-
 target/riscv/insn_trans/trans_rvh.c.inc   |  4 ++--
 target/riscv/insn_trans/trans_xthead.c.inc|  7 +--
 target/riscv/internals.h  | 14 +
 target/riscv/op_helper.c  |  5 +++--
 target/riscv/translate.c  |  3 +++
 9 files changed, 44 insertions(+), 18 deletions(-)

-- 
2.25.1




[PATCH v5 1/2] target/riscv: separate priv from mmu_idx

2023-03-24 Thread Fei Wu
Currently it's assumed the 2 low bits of mmu_idx map to privilege mode,
this assumption won't last as we are about to add more mmu_idx. Here an
individual priv field is added into TB_FLAGS.

Signed-off-by: Fei Wu 
---
 target/riscv/cpu.h | 2 +-
 target/riscv/cpu_helper.c  | 4 +++-
 target/riscv/insn_trans/trans_privileged.c.inc | 2 +-
 target/riscv/insn_trans/trans_xthead.c.inc | 7 +--
 target/riscv/translate.c   | 3 +++
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 638e47c75a..ac3eb9abca 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -623,7 +623,6 @@ G_NORETURN void riscv_raise_exception(CPURISCVState *env,
 target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
 void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
 
-#define TB_FLAGS_PRIV_MMU_MASK3
 #define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS
 #define TB_FLAGS_MSTATUS_VS MSTATUS_VS
@@ -650,6 +649,7 @@ FIELD(TB_FLAGS, VTA, 24, 1)
 FIELD(TB_FLAGS, VMA, 25, 1)
 /* Native debug itrigger */
 FIELD(TB_FLAGS, ITRIGGER, 26, 1)
+FIELD(TB_FLAGS, PRIV, 27, 2)
 
 #ifdef TARGET_RISCV32
 #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index f88c503cf4..4e275b904a 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -82,6 +82,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
*pc,
 flags |= TB_FLAGS_MSTATUS_FS;
 flags |= TB_FLAGS_MSTATUS_VS;
 #else
+flags = FIELD_DP32(flags, TB_FLAGS, PRIV, env->priv);
+
 flags |= cpu_mmu_index(env, 0);
 if (riscv_cpu_fp_enabled(env)) {
 flags |= env->mstatus & MSTATUS_FS;
@@ -762,7 +764,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr 
*physical,
  * (riscv_cpu_do_interrupt) is correct */
 MemTxResult res;
 MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
-int mode = mmu_idx & TB_FLAGS_PRIV_MMU_MASK;
+int mode = env->priv;
 bool use_background = false;
 hwaddr ppn;
 RISCVCPU *cpu = env_archcpu(env);
diff --git a/target/riscv/insn_trans/trans_privileged.c.inc 
b/target/riscv/insn_trans/trans_privileged.c.inc
index 59501b2780..9305b18299 100644
--- a/target/riscv/insn_trans/trans_privileged.c.inc
+++ b/target/riscv/insn_trans/trans_privileged.c.inc
@@ -52,7 +52,7 @@ static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
  * that no exception will be raised when fetching them.
  */
 
-if (semihosting_enabled(ctx->mem_idx < PRV_S) &&
+if (semihosting_enabled(ctx->priv < PRV_S) &&
 (pre_addr & TARGET_PAGE_MASK) == (post_addr & TARGET_PAGE_MASK)) {
 pre= opcode_at(&ctx->base, pre_addr);
 ebreak = opcode_at(&ctx->base, ebreak_addr);
diff --git a/target/riscv/insn_trans/trans_xthead.c.inc 
b/target/riscv/insn_trans/trans_xthead.c.inc
index df504c3f2c..adfb53cb4c 100644
--- a/target/riscv/insn_trans/trans_xthead.c.inc
+++ b/target/riscv/insn_trans/trans_xthead.c.inc
@@ -265,12 +265,7 @@ static bool trans_th_tst(DisasContext *ctx, arg_th_tst *a)
 
 static inline int priv_level(DisasContext *ctx)
 {
-#ifdef CONFIG_USER_ONLY
-return PRV_U;
-#else
- /* Priv level is part of mem_idx. */
-return ctx->mem_idx & TB_FLAGS_PRIV_MMU_MASK;
-#endif
+return ctx->priv;
 }
 
 /* Test if priv level is M, S, or U (cannot fail). */
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 0ee8ee147d..b215d18250 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -69,6 +69,7 @@ typedef struct DisasContext {
 uint32_t mstatus_hs_fs;
 uint32_t mstatus_hs_vs;
 uint32_t mem_idx;
+uint32_t priv;
 /* Remember the rounding mode encoded in the previous fp instruction,
which we have already installed into env->fp_status.  Or -1 for
no previous fp instruction.  Note that we exit the TB when writing
@@ -1162,8 +1163,10 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 } else {
 ctx->virt_enabled = false;
 }
+ctx->priv = FIELD_EX32(tb_flags, TB_FLAGS, PRIV);
 #else
 ctx->virt_enabled = false;
+ctx->priv = PRV_U;
 #endif
 ctx->misa_ext = env->misa_ext;
 ctx->frm = -1;  /* unknown rounding mode */
-- 
2.25.1




[PATCH v4 0/2] target/riscv: reduce MSTATUS_SUM overhead

2023-03-22 Thread Fei Wu
v3 -> v4:
* seperate priv from mmu_idx
* use index 2 for S+SUM mmu_idx
* no tlb_flush for MPRV / MPP changes

Fei Wu (2):
  target/riscv: separate priv from mmu_idx
  target/riscv: reduce overhead of MSTATUS_SUM change

 target/riscv/cpu.h|  2 --
 target/riscv/cpu_helper.c | 19 ---
 target/riscv/csr.c|  3 +--
 .../riscv/insn_trans/trans_privileged.c.inc   |  2 +-
 target/riscv/insn_trans/trans_rvh.c.inc   |  4 ++--
 target/riscv/insn_trans/trans_xthead.c.inc|  7 +--
 target/riscv/internals.h  | 14 ++
 target/riscv/op_helper.c  |  5 +++--
 target/riscv/translate.c  |  3 +++
 9 files changed, 41 insertions(+), 18 deletions(-)

-- 
2.25.1




[PATCH v4 2/2] target/riscv: reduce overhead of MSTATUS_SUM change

2023-03-22 Thread Fei Wu
Kernel needs to access user mode memory e.g. during syscalls, the window
is usually opened up for a very limited time through MSTATUS.SUM, the
overhead is too much if tlb_flush() gets called for every SUM change.

This patch creates a separate MMU index for S+SUM, so that it's not
necessary to flush tlb anymore when SUM changes. This is similar to how
ARM handles Privileged Access Never (PAN).

Result of 'pipe 10' from unixbench boosts from 223656 to 1705006. Many
other syscalls benefit a lot from this too.

Signed-off-by: Fei Wu 
---
 target/riscv/cpu.h  |  1 -
 target/riscv/cpu_helper.c   | 17 +++--
 target/riscv/csr.c  |  3 +--
 target/riscv/insn_trans/trans_rvh.c.inc |  4 ++--
 target/riscv/internals.h| 14 ++
 target/riscv/op_helper.c|  5 +++--
 6 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 66f7e3d1ba..d65eeb3c85 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -623,7 +623,6 @@ G_NORETURN void riscv_raise_exception(CPURISCVState *env,
 target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
 void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
 
-#define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS
 #define TB_FLAGS_MSTATUS_VS MSTATUS_VS
 
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 76e1b0100e..bbc612badf 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -21,6 +21,7 @@
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
+#include "internals.h"
 #include "pmu.h"
 #include "exec/exec-all.h"
 #include "instmap.h"
@@ -36,7 +37,19 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 #ifdef CONFIG_USER_ONLY
 return 0;
 #else
-return env->priv;
+if (ifetch) {
+return env->priv;
+}
+
+/* All priv -> mmu_idx mapping are here */
+int mode = env->priv;
+if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) {
+mode = get_field(env->mstatus, MSTATUS_MPP);
+}
+if (mode == PRV_S && get_field(env->mstatus, MSTATUS_SUM)) {
+return MMUIdx_S_SUM;
+}
+return mode;
 #endif
 }
 
@@ -596,7 +609,7 @@ void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool 
enable)
 
 bool riscv_cpu_two_stage_lookup(int mmu_idx)
 {
-return mmu_idx & TB_FLAGS_PRIV_HYP_ACCESS_MASK;
+return mmu_idx & MMU_HYP_ACCESS_BIT;
 }
 
 int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts)
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index d522efc0b6..f74e40e66d 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1246,8 +1246,7 @@ static RISCVException write_mstatus(CPURISCVState *env, 
int csrno,
 RISCVMXL xl = riscv_cpu_mxl(env);
 
 /* flush tlb on mstatus fields that affect VM */
-if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
-MSTATUS_MPRV | MSTATUS_SUM)) {
+if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPV)) {
 tlb_flush(env_cpu(env));
 }
 mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
diff --git a/target/riscv/insn_trans/trans_rvh.c.inc 
b/target/riscv/insn_trans/trans_rvh.c.inc
index 9248b48c36..15842f4282 100644
--- a/target/riscv/insn_trans/trans_rvh.c.inc
+++ b/target/riscv/insn_trans/trans_rvh.c.inc
@@ -40,7 +40,7 @@ static bool do_hlv(DisasContext *ctx, arg_r2 *a, MemOp mop)
 if (check_access(ctx)) {
 TCGv dest = dest_gpr(ctx, a->rd);
 TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
-int mem_idx = ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK;
+int mem_idx = ctx->mem_idx | MMU_HYP_ACCESS_BIT;
 tcg_gen_qemu_ld_tl(dest, addr, mem_idx, mop);
 gen_set_gpr(ctx, a->rd, dest);
 }
@@ -87,7 +87,7 @@ static bool do_hsv(DisasContext *ctx, arg_r2_s *a, MemOp mop)
 if (check_access(ctx)) {
 TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE);
 TCGv data = get_gpr(ctx, a->rs2, EXT_NONE);
-int mem_idx = ctx->mem_idx | TB_FLAGS_PRIV_HYP_ACCESS_MASK;
+int mem_idx = ctx->mem_idx | MMU_HYP_ACCESS_BIT;
 tcg_gen_qemu_st_tl(data, addr, mem_idx, mop);
 }
 return true;
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 5620fbffb6..b55152a7dc 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -21,6 +21,20 @@
 
 #include "hw/registerfields.h"
 
+/*
+ * The current MMU Modes are:
+ *  - U 0b000
+ *  - S 0b001
+ *  - S+SUM 0b010
+ *  - M 0b011
+ *  - HLV/HLVX/HSV adds 0b100
+ */
+#define MMUIdx_U0
+#define MMUIdx_S1
+#define MMUIdx_S_SUM2
+#define MMUIdx_M3
+#define MMU

[PATCH v4 1/2] target/riscv: separate priv from mmu_idx

2023-03-22 Thread Fei Wu
Currently it's assumed the 2 low bits of mmu_idx map to privilege mode,
this assumption won't last as we are about to add more mmu_idx.

Signed-off-by: Fei Wu 
---
 target/riscv/cpu.h | 1 -
 target/riscv/cpu_helper.c  | 2 +-
 target/riscv/insn_trans/trans_privileged.c.inc | 2 +-
 target/riscv/insn_trans/trans_xthead.c.inc | 7 +--
 target/riscv/translate.c   | 3 +++
 5 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 638e47c75a..66f7e3d1ba 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -623,7 +623,6 @@ G_NORETURN void riscv_raise_exception(CPURISCVState *env,
 target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
 void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
 
-#define TB_FLAGS_PRIV_MMU_MASK3
 #define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS
 #define TB_FLAGS_MSTATUS_VS MSTATUS_VS
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index f88c503cf4..76e1b0100e 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -762,7 +762,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr 
*physical,
  * (riscv_cpu_do_interrupt) is correct */
 MemTxResult res;
 MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
-int mode = mmu_idx & TB_FLAGS_PRIV_MMU_MASK;
+int mode = env->priv;
 bool use_background = false;
 hwaddr ppn;
 RISCVCPU *cpu = env_archcpu(env);
diff --git a/target/riscv/insn_trans/trans_privileged.c.inc 
b/target/riscv/insn_trans/trans_privileged.c.inc
index 59501b2780..9305b18299 100644
--- a/target/riscv/insn_trans/trans_privileged.c.inc
+++ b/target/riscv/insn_trans/trans_privileged.c.inc
@@ -52,7 +52,7 @@ static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
  * that no exception will be raised when fetching them.
  */
 
-if (semihosting_enabled(ctx->mem_idx < PRV_S) &&
+if (semihosting_enabled(ctx->priv < PRV_S) &&
 (pre_addr & TARGET_PAGE_MASK) == (post_addr & TARGET_PAGE_MASK)) {
 pre= opcode_at(&ctx->base, pre_addr);
 ebreak = opcode_at(&ctx->base, ebreak_addr);
diff --git a/target/riscv/insn_trans/trans_xthead.c.inc 
b/target/riscv/insn_trans/trans_xthead.c.inc
index df504c3f2c..adfb53cb4c 100644
--- a/target/riscv/insn_trans/trans_xthead.c.inc
+++ b/target/riscv/insn_trans/trans_xthead.c.inc
@@ -265,12 +265,7 @@ static bool trans_th_tst(DisasContext *ctx, arg_th_tst *a)
 
 static inline int priv_level(DisasContext *ctx)
 {
-#ifdef CONFIG_USER_ONLY
-return PRV_U;
-#else
- /* Priv level is part of mem_idx. */
-return ctx->mem_idx & TB_FLAGS_PRIV_MMU_MASK;
-#endif
+return ctx->priv;
 }
 
 /* Test if priv level is M, S, or U (cannot fail). */
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 0ee8ee147d..e8880f9423 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -69,6 +69,7 @@ typedef struct DisasContext {
 uint32_t mstatus_hs_fs;
 uint32_t mstatus_hs_vs;
 uint32_t mem_idx;
+uint32_t priv;
 /* Remember the rounding mode encoded in the previous fp instruction,
which we have already installed into env->fp_status.  Or -1 for
no previous fp instruction.  Note that we exit the TB when writing
@@ -1162,8 +1163,10 @@ static void riscv_tr_init_disas_context(DisasContextBase 
*dcbase, CPUState *cs)
 } else {
 ctx->virt_enabled = false;
 }
+ctx->priv = env->priv;
 #else
 ctx->virt_enabled = false;
+ctx->priv = PRV_U;
 #endif
 ctx->misa_ext = env->misa_ext;
 ctx->frm = -1;  /* unknown rounding mode */
-- 
2.25.1




[PATCH v3] target/riscv: reduce overhead of MSTATUS_SUM change

2023-03-22 Thread Fei Wu
Kernel needs to access user mode memory e.g. during syscalls, the window
is usually opened up for a very limited time through MSTATUS.SUM, the
overhead is too much if tlb_flush() gets called for every SUM change.

This patch creates a separate MMU index for S+SUM, so that it's not
necessary to flush tlb anymore when SUM changes. This is similar to how
ARM handles Privileged Access Never (PAN).

Result of 'pipe 10' from unixbench boosts from 223656 to 1705006. Many
other syscalls benefit a lot from this too.

Signed-off-by: Fei Wu 
---
 target/riscv/cpu-param.h  |  2 +-
 target/riscv/cpu.h|  2 +-
 target/riscv/cpu_bits.h   |  1 +
 target/riscv/cpu_helper.c | 11 +++
 target/riscv/csr.c|  2 +-
 5 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/target/riscv/cpu-param.h b/target/riscv/cpu-param.h
index ebaf26d26d..9e21b943f9 100644
--- a/target/riscv/cpu-param.h
+++ b/target/riscv/cpu-param.h
@@ -27,6 +27,6 @@
  *  - S mode HLV/HLVX/HSV 0b101
  *  - M mode HLV/HLVX/HSV 0b111
  */
-#define NB_MMU_MODES 8
+#define NB_MMU_MODES 16
 
 #endif
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 638e47c75a..ac8bee11a7 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -624,7 +624,7 @@ target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
 void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
 
 #define TB_FLAGS_PRIV_MMU_MASK3
-#define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
+#define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 3)
 #define TB_FLAGS_MSTATUS_FS MSTATUS_FS
 #define TB_FLAGS_MSTATUS_VS MSTATUS_VS
 
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index fca7ef0cef..dd9e62b6e4 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -606,6 +606,7 @@ typedef enum {
 #define PRV_S 1
 #define PRV_H 2 /* Reserved */
 #define PRV_M 3
+#define MMUIdx_S_SUM 5
 
 /* Virtulisation Register Fields */
 #define VIRT_ONOFF  1
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index f88c503cf4..e52e9765d0 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -36,6 +36,17 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
 #ifdef CONFIG_USER_ONLY
 return 0;
 #else
+if (ifetch) {
+return env->priv;
+}
+
+int mode = env->priv;
+if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) {
+mode = get_field(env->mstatus, MSTATUS_MPP);
+}
+if (mode == PRV_S && get_field(env->mstatus, MSTATUS_SUM)) {
+return MMUIdx_S_SUM;
+}
 return env->priv;
 #endif
 }
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ab566639e5..eacc40e912 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1246,7 +1246,7 @@ static RISCVException write_mstatus(CPURISCVState *env, 
int csrno,
 
 /* flush tlb on mstatus fields that affect VM */
 if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
-MSTATUS_MPRV | MSTATUS_SUM)) {
+MSTATUS_MPRV)) {
 tlb_flush(env_cpu(env));
 }
 mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
-- 
2.25.1




[PATCH v2] target/riscv: reduce overhead of MSTATUS_SUM change

2023-03-21 Thread Fei Wu
Kernel needs to access user mode memory e.g. during syscalls, the window
is usually opened up for a very limited time through MSTATUS.SUM, the
overhead is too much if tlb_flush() gets called for every SUM change.
This patch saves addresses accessed when SUM=1, and flushs only these
pages when SUM changes to 0. If the buffer is not large enough to save
all the pages during SUM=1, it will fall back to tlb_flush when
necessary.

The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
most of the time kernel accesses 1 or 2 pages, it's very rare to see
more than 4 pages accessed.

It's not necessary to save/restore these new added status, as
tlb_flush() is always called after restore.

Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
other syscalls benefit a lot from this one too.

Signed-off-by: Fei Wu 
Reviewed-by: LIU Zhiwei 
Reviewed-by: Weiwei Li 
---
 target/riscv/cpu.h|  4 
 target/riscv/cpu_helper.c |  7 +++
 target/riscv/csr.c| 14 +-
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 638e47c75a..926dbce59f 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -383,6 +383,10 @@ struct CPUArchState {
 uint64_t kvm_timer_compare;
 uint64_t kvm_timer_state;
 uint64_t kvm_timer_frequency;
+
+#define MAX_CACHED_SUM_U_ADDR_NUM 4
+uint64_t sum_u_count;
+uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
 };
 
 OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index f88c503cf4..d701017a60 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1068,6 +1068,13 @@ restart:
 (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
 *prot |= PAGE_WRITE;
 }
+if ((pte & PTE_U) && (mode == PRV_S) &&
+get_field(env->mstatus, MSTATUS_SUM)) {
+if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
+env->sum_u_addr[env->sum_u_count] = addr;
+}
+++env->sum_u_count;
+}
 return TRANSLATE_SUCCESS;
 }
 }
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ab566639e5..e7dfdc6a93 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1246,9 +1246,21 @@ static RISCVException write_mstatus(CPURISCVState *env, 
int csrno,
 
 /* flush tlb on mstatus fields that affect VM */
 if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
-MSTATUS_MPRV | MSTATUS_SUM)) {
+MSTATUS_MPRV)) {
 tlb_flush(env_cpu(env));
+env->sum_u_count = 0;
+} else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
+if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
+tlb_flush_by_mmuidx(env_cpu(env), 1 << PRV_S | 1 << PRV_M);
+} else {
+for (int i = 0; i < env->sum_u_count; ++i) {
+tlb_flush_page_by_mmuidx(env_cpu(env), env->sum_u_addr[i],
+ 1 << PRV_S | 1 << PRV_M);
+}
+}
+env->sum_u_count = 0;
 }
+
 mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
 MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
 MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
-- 
2.25.1