Re: [PATCH v2 24/27] contrib/plugins: extend execlog to track register changes

2024-02-25 Thread Pierrick Bouvier

On 2/23/24 8:21 PM, Alex Bennée wrote:

With the new plugin register API we can now track changes to register
values. Currently the implementation is fairly dumb which will slow
down if a large number of register values are being tracked. This
could be improved by only instrumenting instructions which mention
registers we are interested in tracking.

Example usage:

   ./qemu-aarch64 -D plugin.log -d plugin \
  -cpu max,sve256=on \
  -plugin contrib/plugins/libexeclog.so,reg=sp,reg=z\* \
  ./tests/tcg/aarch64-linux-user/sha512-sve

will display in the execlog any changes to the stack pointer (sp) and
the SVE Z registers.

As testing registers every instruction will be quite a heavy operation
there is an additional flag which attempts to optimise the register
tracking by only instrumenting instructions which are likely to change
its value. This relies on the QEMU disassembler showing up the register
names in disassembly so is an explicit opt-in.

Message-Id: <20240103173349.398526-41-alex.ben...@linaro.org>
Signed-off-by: Alex Bennée 
Cc: Akihiko Odaki 
Based-On: <20231025093128.33116-19-akihiko.od...@daynix.com>

---
v3
   - just use a GArray for the CPU array
   - drop duplicate of cpu_index
v4
   - rebase and api fixups
   - I accidentally squashed the optimisation last round so update
   commit message with the details.
---
  docs/devel/tcg-plugins.rst |  17 +-
  contrib/plugins/execlog.c  | 316 +++--
  2 files changed, 281 insertions(+), 52 deletions(-)

diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index 81dcd43a612..fa7421279f5 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -497,6 +497,22 @@ arguments if required::
$ qemu-system-arm $(QEMU_ARGS) \
  -plugin ./contrib/plugins/libexeclog.so,ifilter=st1w,afilter=0x40001808 
-d plugin
  
+This plugin can also dump registers when they change value. Specify the name of the

+registers with multiple ``reg`` options. You can also use glob style matching 
if you wish::
+
+  $ qemu-system-arm $(QEMU_ARGS) \
+-plugin ./contrib/plugins/libexeclog.so,reg=\*_el2,reg=sp -d plugin
+
+Be aware that each additional register to check will slow down
+execution quite considerably. You can optimise the number of register
+checks done by using the rdisas option. This will only instrument
+instructions that mention the registers in question in disassembly.
+This is not foolproof as some instructions implicitly change
+instructions. You can use the ifilter to catch these cases:
+
+  $ qemu-system-arm $(QEMU_ARGS) \
+-plugin 
./contrib/plugins/libexeclog.so,ifilter=msr,ifilter=blr,reg=x30,reg=\*_el1,rdisas=on
+
  - contrib/plugins/cache.c
  
  Cache modelling plugin that measures the performance of a given L1 cache

@@ -583,4 +599,3 @@ The following API is generated from the inline 
documentation in
  include the full kernel-doc annotations.
  
  .. kernel-doc:: include/qemu/qemu-plugin.h

-
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
index f262eeb..dd7168cb548 100644
--- a/contrib/plugins/execlog.c
+++ b/contrib/plugins/execlog.c
@@ -1,7 +1,7 @@
  /*
   * Copyright (C) 2021, Alexandre Iooss 
   *
- * Log instruction execution with memory access.
+ * Log instruction execution with memory access and register changes
   *
   * License: GNU GPL, version 2 or later.
   *   See the COPYING file in the top-level directory.
@@ -15,29 +15,40 @@
  
  #include 
  
+typedef struct {

+struct qemu_plugin_register *handle;
+GByteArray *last;
+GByteArray *new;
+const char *name;
+} Register;
+
+typedef struct CPU {
+/* Store last executed instruction on each vCPU as a GString */
+GString *last_exec;
+/* Ptr array of Register */
+GPtrArray *registers;
+} CPU;
+
  QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
  
-/* Store last executed instruction on each vCPU as a GString */

-static GPtrArray *last_exec;
+static GArray *cpus;
  static GRWLock expand_array_lock;
  
  static GPtrArray *imatches;

  static GArray *amatches;
+static GPtrArray *rmatches;
+static bool disas_assist;
+static GMutex add_reg_name_lock;
+static GPtrArray *all_reg_names;
  
-/*

- * Expand last_exec array.
- *
- * As we could have multiple threads trying to do this we need to
- * serialise the expansion under a lock.
- */
-static void expand_last_exec(int cpu_index)
+static CPU *get_cpu(int vcpu_index)
  {
-g_rw_lock_writer_lock(_array_lock);
-while (cpu_index >= last_exec->len) {
-GString *s = g_string_new(NULL);
-g_ptr_array_add(last_exec, s);
-}
-g_rw_lock_writer_unlock(_array_lock);
+CPU *c;
+g_rw_lock_reader_lock(_array_lock);
+c = _array_index(cpus, CPU, vcpu_index);
+g_rw_lock_reader_unlock(_array_lock);
+
+return c;
  }
  
  /**

@@ -46,13 +57,10 @@ static void expand_last_exec(int cpu_index)
  static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,

[PATCH v2 24/27] contrib/plugins: extend execlog to track register changes

2024-02-23 Thread Alex Bennée
With the new plugin register API we can now track changes to register
values. Currently the implementation is fairly dumb which will slow
down if a large number of register values are being tracked. This
could be improved by only instrumenting instructions which mention
registers we are interested in tracking.

Example usage:

  ./qemu-aarch64 -D plugin.log -d plugin \
 -cpu max,sve256=on \
 -plugin contrib/plugins/libexeclog.so,reg=sp,reg=z\* \
 ./tests/tcg/aarch64-linux-user/sha512-sve

will display in the execlog any changes to the stack pointer (sp) and
the SVE Z registers.

As testing registers every instruction will be quite a heavy operation
there is an additional flag which attempts to optimise the register
tracking by only instrumenting instructions which are likely to change
its value. This relies on the QEMU disassembler showing up the register
names in disassembly so is an explicit opt-in.

Message-Id: <20240103173349.398526-41-alex.ben...@linaro.org>
Signed-off-by: Alex Bennée 
Cc: Akihiko Odaki 
Based-On: <20231025093128.33116-19-akihiko.od...@daynix.com>

---
v3
  - just use a GArray for the CPU array
  - drop duplicate of cpu_index
v4
  - rebase and api fixups
  - I accidentally squashed the optimisation last round so update
  commit message with the details.
---
 docs/devel/tcg-plugins.rst |  17 +-
 contrib/plugins/execlog.c  | 316 +++--
 2 files changed, 281 insertions(+), 52 deletions(-)

diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index 81dcd43a612..fa7421279f5 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -497,6 +497,22 @@ arguments if required::
   $ qemu-system-arm $(QEMU_ARGS) \
 -plugin ./contrib/plugins/libexeclog.so,ifilter=st1w,afilter=0x40001808 -d 
plugin
 
+This plugin can also dump registers when they change value. Specify the name 
of the
+registers with multiple ``reg`` options. You can also use glob style matching 
if you wish::
+
+  $ qemu-system-arm $(QEMU_ARGS) \
+-plugin ./contrib/plugins/libexeclog.so,reg=\*_el2,reg=sp -d plugin
+
+Be aware that each additional register to check will slow down
+execution quite considerably. You can optimise the number of register
+checks done by using the rdisas option. This will only instrument
+instructions that mention the registers in question in disassembly.
+This is not foolproof as some instructions implicitly change
+instructions. You can use the ifilter to catch these cases:
+
+  $ qemu-system-arm $(QEMU_ARGS) \
+-plugin 
./contrib/plugins/libexeclog.so,ifilter=msr,ifilter=blr,reg=x30,reg=\*_el1,rdisas=on
+
 - contrib/plugins/cache.c
 
 Cache modelling plugin that measures the performance of a given L1 cache
@@ -583,4 +599,3 @@ The following API is generated from the inline 
documentation in
 include the full kernel-doc annotations.
 
 .. kernel-doc:: include/qemu/qemu-plugin.h
-
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
index f262eeb..dd7168cb548 100644
--- a/contrib/plugins/execlog.c
+++ b/contrib/plugins/execlog.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2021, Alexandre Iooss 
  *
- * Log instruction execution with memory access.
+ * Log instruction execution with memory access and register changes
  *
  * License: GNU GPL, version 2 or later.
  *   See the COPYING file in the top-level directory.
@@ -15,29 +15,40 @@
 
 #include 
 
+typedef struct {
+struct qemu_plugin_register *handle;
+GByteArray *last;
+GByteArray *new;
+const char *name;
+} Register;
+
+typedef struct CPU {
+/* Store last executed instruction on each vCPU as a GString */
+GString *last_exec;
+/* Ptr array of Register */
+GPtrArray *registers;
+} CPU;
+
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
-/* Store last executed instruction on each vCPU as a GString */
-static GPtrArray *last_exec;
+static GArray *cpus;
 static GRWLock expand_array_lock;
 
 static GPtrArray *imatches;
 static GArray *amatches;
+static GPtrArray *rmatches;
+static bool disas_assist;
+static GMutex add_reg_name_lock;
+static GPtrArray *all_reg_names;
 
-/*
- * Expand last_exec array.
- *
- * As we could have multiple threads trying to do this we need to
- * serialise the expansion under a lock.
- */
-static void expand_last_exec(int cpu_index)
+static CPU *get_cpu(int vcpu_index)
 {
-g_rw_lock_writer_lock(_array_lock);
-while (cpu_index >= last_exec->len) {
-GString *s = g_string_new(NULL);
-g_ptr_array_add(last_exec, s);
-}
-g_rw_lock_writer_unlock(_array_lock);
+CPU *c;
+g_rw_lock_reader_lock(_array_lock);
+c = _array_index(cpus, CPU, vcpu_index);
+g_rw_lock_reader_unlock(_array_lock);
+
+return c;
 }
 
 /**
@@ -46,13 +57,10 @@ static void expand_last_exec(int cpu_index)
 static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
  uint64_t vaddr, void *udata)
 {
-GString *s;
+CPU *c =