Abdelrahman Elbehery <[email protected]> writes:

> Profiling showed g_hash_table_lookup as a hot spot when running TCG
> plugins that log all CPU registers (e.g. execlog with reg=*).
> Replace it with a direct array index to avoid lookup overhead.
>
> Signed-off-by: Abdelrahman Elbehery <[email protected]>
> ---
> While profiling (via callgrind) a TCG plugin that logs all the CPU registers
> (very identical to execlog with reg=*) i noticed that arm_gdb_get_sysreg is a 
> hot spot,
> mostly while calling g_hash_table_lookup. Probably this is due to some 
> hashtable collision
> that is handleded by glib internally.
>
> Currently each DynamicGDBFeatureInfo item holds keys for cp_regs, and the 
> keys are always
> used to retrieve the ARMCPRegInfo pointer.
>
> By replacing the keys array with a GPtrArray, we now just access reg info 
> directly from
> the given index.
>
> The benchmarking was done on c2d-highcpu-16 (16 vCPUs, 32 GB memory) GCP
>
> Setup was to run qemu-system-aarch64 against a buildroot kernel and rootfs
> with and without the patch mostly using:
>     --plugin contrib/plugins/libstoptrigger.so,icount=2000000
>     --plugin contrib/plugins/libexeclog.so,reg=*
>     -M virt -cpu cortex-a710 -smp 1
>
> Benchmark showed ~15% performance gain when hashtable is not used.
> For benchmarking, i used hyperfine with 3 warmup rounds and 10 iterations 
> each.
> ---
>  target/arm/cpu.h     |  6 +++---
>  target/arm/gdbstub.c | 28 ++++++++++++++++------------
>  2 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 
> 15a13b929276dad161032b61ba51ebbce7eeebc6..8816e94e3f8271ad403e77f6182fdb8cee31c527
>  100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -121,15 +121,15 @@
>   * DynamicGDBFeatureInfo:
>   * @desc: Contains the feature descriptions.
>   * @data: A union with data specific to the set of registers
> - *    @cpregs_keys: Array that contains the corresponding Key of
> - *                  a given cpreg with the same order of the cpreg
> + *    @cpregs_regs: Array that contains the corresponding Register info 
> pointer
> + *                  of a given cpreg with the same order of the cpreg
>   *                  in the XML description.
>   */
>  typedef struct DynamicGDBFeatureInfo {
>      GDBFeature desc;
>      union {
>          struct {
> -            uint32_t *keys;
> +            GPtrArray *regs;
>          } cpregs;
>      } data;
>  } DynamicGDBFeatureInfo;
> diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c
> index 
> d6e29c4cf46745dc7851bc5f8339c8d7c6857b8c..e66af8207b436fe8f1dd27441a587b1eb52ff928
>  100644
> --- a/target/arm/gdbstub.c
> +++ b/target/arm/gdbstub.c
> @@ -238,19 +238,23 @@ static int arm_gdb_get_sysreg(CPUState *cs, GByteArray 
> *buf, int reg)
>      ARMCPU *cpu = ARM_CPU(cs);
>      CPUARMState *env = &cpu->env;
>      const ARMCPRegInfo *ri;
> -    uint32_t key;
>  
> -    key = cpu->dyn_sysreg_feature.data.cpregs.keys[reg];
> -    ri = get_arm_cp_reginfo(cpu->cp_regs, key);
> +    ri = g_ptr_array_index(cpu->dyn_sysreg_feature.data.cpregs.regs,
> reg);

I was only re-using key because we still use get_arm_cp_reginfo but I
have no objection to making the access faster for plugins given the
system registers don't change after CPU reset.


Tested-by: Alex Bennée <[email protected]>
Reviewed-by: Alex Bennée <[email protected]>


>      if (ri) {
>          switch (cpreg_field_type(ri)) {
>          case MO_64:
>              if (ri->vhe_redir_to_el2 &&
>                  (arm_hcr_el2_eff(env) & HCR_E2H) &&
>                  arm_current_el(env) == 2) {
> -                ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el2);
> +                ri = g_ptr_array_index(
> +                    cpu->dyn_sysreg_feature.data.cpregs.regs,
> +                    ri->vhe_redir_to_el2
> +                );
>              } else if (ri->vhe_redir_to_el01) {
> -                ri = get_arm_cp_reginfo(cpu->cp_regs, ri->vhe_redir_to_el01);
> +                ri = g_ptr_array_index(
> +                    cpu->dyn_sysreg_feature.data.cpregs.regs,
> +                    ri->vhe_redir_to_el01
> +                );
>              }
>              return gdb_get_reg64(buf, (uint64_t)read_raw_cp_reg(env, ri));
>          case MO_32:
> @@ -269,19 +273,18 @@ static int arm_gdb_set_sysreg(CPUState *cs, uint8_t 
> *buf, int reg)
>  
>  static void arm_gen_one_feature_sysreg(GDBFeatureBuilder *builder,
>                                         DynamicGDBFeatureInfo *dyn_feature,
> -                                       ARMCPRegInfo *ri, uint32_t ri_key,
> +                                       ARMCPRegInfo *ri,
>                                         int bitsize, int n)
>  {
>      gdb_feature_builder_append_reg(builder, ri->name, bitsize, n,
>                                     "int", "cp_regs");
>  
> -    dyn_feature->data.cpregs.keys[n] = ri_key;
> +    g_ptr_array_index(dyn_feature->data.cpregs.regs, n) = ri;
>  }
>  
>  static void arm_register_sysreg_for_feature(gpointer key, gpointer value,
>                                              gpointer p)
>  {
> -    uint32_t ri_key = (uintptr_t)key;
>      ARMCPRegInfo *ri = value;
>      RegisterSysregFeatureParam *param = p;
>      ARMCPU *cpu = ARM_CPU(param->cs);
> @@ -292,7 +295,7 @@ static void arm_register_sysreg_for_feature(gpointer key, 
> gpointer value,
>          if (arm_feature(env, ARM_FEATURE_AARCH64)) {
>              if (ri->state == ARM_CP_STATE_AA64) {
>                  arm_gen_one_feature_sysreg(&param->builder, dyn_feature,
> -                                           ri, ri_key, 64, param->n++);
> +                                           ri, 64, param->n++);
>              }
>          } else {
>              if (ri->state == ARM_CP_STATE_AA32) {
> @@ -302,10 +305,10 @@ static void arm_register_sysreg_for_feature(gpointer 
> key, gpointer value,
>                  }
>                  if (ri->type & ARM_CP_64BIT) {
>                      arm_gen_one_feature_sysreg(&param->builder, dyn_feature,
> -                                               ri, ri_key, 64, param->n++);
> +                                               ri, 64, param->n++);
>                  } else {
>                      arm_gen_one_feature_sysreg(&param->builder, dyn_feature,
> -                                               ri, ri_key, 32, param->n++);
> +                                               ri, 32, param->n++);
>                  }
>              }
>          }
> @@ -323,7 +326,8 @@ static GDBFeature 
> *arm_gen_dynamic_sysreg_feature(CPUState *cs, int base_reg)
>                               "org.qemu.gdb.arm.sys.regs",
>                               "system-registers.xml",
>                               base_reg);
> -    cpu->dyn_sysreg_feature.data.cpregs.keys = g_new(uint32_t, num_regs);
> +    cpu->dyn_sysreg_feature.data.cpregs.regs = g_ptr_array_new();
> +    g_ptr_array_set_size(cpu->dyn_sysreg_feature.data.cpregs.regs, num_regs);
>      g_hash_table_foreach(cpu->cp_regs, arm_register_sysreg_for_feature, 
> &param);
>      gdb_feature_builder_end(&param.builder);
>      return &cpu->dyn_sysreg_feature.desc;
>
> ---
> base-commit: ac6721b88df944ade0048822b2b74210f543d656
> change-id: 20260518-enhance_arm_gdb_get_sysreg_performance-4288474401d6
>
> Best regards,

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro

Reply via email to