Re: [PATCH v2 01/14] plugins: implement inline operation relative to cpu_index

2024-01-29 Thread Pierrick Bouvier

On 1/26/24 16:07, Alex Bennée wrote:

Pierrick Bouvier  writes:


Instead of working on a fixed memory location, allow to address it based
on cpu_index, an element size and a given offset.
Result address: ptr + offset + cpu_index * element_size.

With this, we can target a member in a struct array from a base pointer.

Current semantic is not modified, thus inline operation still targets
always the same memory location.

Signed-off-by: Pierrick Bouvier 
---
  accel/tcg/plugin-gen.c | 67 +++---
  include/qemu/plugin.h  |  3 ++
  plugins/api.c  |  7 +++--
  plugins/core.c | 18 +---
  plugins/plugin.h   |  6 ++--
  5 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index b37ce7683e6..1a2375d7779 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -132,16 +132,28 @@ static void gen_empty_udata_cb_no_rwg(void)
   */
  static void gen_empty_inline_cb(void)
  {
+TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
+TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr();
  TCGv_i64 val = tcg_temp_ebb_new_i64();
  TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
  
+tcg_gen_ld_i32(cpu_index, tcg_env,

+   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+/* pass an immediate != 0 so that it doesn't get optimized away */
+tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef);
+tcg_gen_ext_i32_ptr(cpu_index_as_ptr, cpu_index);
+
  tcg_gen_movi_ptr(ptr, 0);
+tcg_gen_add_ptr(ptr, ptr, cpu_index_as_ptr);
  tcg_gen_ld_i64(val, ptr, 0);
  /* pass an immediate != 0 so that it doesn't get optimized away */
  tcg_gen_addi_i64(val, val, 0xdeadface);
+
  tcg_gen_st_i64(val, ptr, 0);
  tcg_temp_free_ptr(ptr);
  tcg_temp_free_i64(val);
+tcg_temp_free_ptr(cpu_index_as_ptr);
+tcg_temp_free_i32(cpu_index);
  }
  



  if (UINTPTR_MAX == UINT32_MAX) {
@@ -395,18 +439,19 @@ static TCGOp *append_inline_cb(const struct 
qemu_plugin_dyn_cb *cb,
 TCGOp *begin_op, TCGOp *op,
 int *unused)
  {
-/* const_ptr */
-op = copy_const_ptr(_op, op, cb->userp);
-
-/* ld_i64 */
+char *ptr = cb->userp;
+if (!cb->inline_direct_ptr) {
+/* dereference userp once to get access to memory location */
+ptr = *(char **)cb->userp;
+}


I'm confused here, probably because inline_direct_ptr gets removed later
on?



Yes, we temporarily need two code paths for this patch series. Else, 
plugins should be updated at the same time than we make all changes to 
not break anything.



+op = copy_ld_i32(_op, op);
+op = copy_mul_i32(_op, op, cb->inline_element_size);
+op = copy_ext_i32_ptr(_op, op);
+op = copy_const_ptr(_op, op, ptr + cb->inline_offset);
+op = copy_add_ptr(_op, op);
  op = copy_ld_i64(_op, op);
-
-/* add_i64 */
  op = copy_add_i64(_op, op, cb->inline_insn.imm);
-
-/* st_i64 */
  op = copy_st_i64(_op, op);
-
  return op;
  }
  
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h

index b0c5ac68293..9346249145d 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -86,6 +86,9 @@ enum plugin_dyn_cb_subtype {
  struct qemu_plugin_dyn_cb {
  union qemu_plugin_cb_sig f;
  void *userp;
+size_t inline_offset;
+size_t inline_element_size;
+bool inline_direct_ptr;


Ahh here it is.

(I seem to recall there is a setting for git to order headers first that
helps with this).



Indeed, found it (thanks):
https://gitlab.com/qemu-project/qemu/-/blob/master/scripts/git.orderfile


We could do with some documentation here. I can guess the offset and
element size values but what inline_direct_ptr means is not clear.



It represents if the userp is a pointer to data, or a pointer to pointer 
to data. Any suggestion for name having this detail?


I have another patch that replace all this by a qemu_plugin_u64_t (from 
scoreboard), that I'll integrate in a v3, which is much clearer. But 
there will still be one commit needed with this.



  enum plugin_dyn_cb_subtype type;
  /* @rw applies to mem callbacks only (both regular and inline) */
  enum qemu_plugin_mem_rw rw;
diff --git a/plugins/api.c b/plugins/api.c
index 8d5cca53295..e777eb4e9fc 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -106,7 +106,8 @@ void qemu_plugin_register_vcpu_tb_exec_inline(struct 
qemu_plugin_tb *tb,
void *ptr, uint64_t imm)
  {
  if (!tb->mem_only) {
-plugin_register_inline_op(>cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
+plugin_register_inline_op(>cbs[PLUGIN_CB_INLINE],
+  0, op, ptr, 0, sizeof(uint64_t), true, imm);
  }
  }
  
@@ -131,7 +132,7 @@ void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,

  {
  if (!insn->mem_only) {
  

Re: [PATCH v2 01/14] plugins: implement inline operation relative to cpu_index

2024-01-26 Thread Alex Bennée
Pierrick Bouvier  writes:

> Instead of working on a fixed memory location, allow to address it based
> on cpu_index, an element size and a given offset.
> Result address: ptr + offset + cpu_index * element_size.
>
> With this, we can target a member in a struct array from a base pointer.
>
> Current semantic is not modified, thus inline operation still targets
> always the same memory location.
>
> Signed-off-by: Pierrick Bouvier 
> ---
>  accel/tcg/plugin-gen.c | 67 +++---
>  include/qemu/plugin.h  |  3 ++
>  plugins/api.c  |  7 +++--
>  plugins/core.c | 18 +---
>  plugins/plugin.h   |  6 ++--
>  5 files changed, 81 insertions(+), 20 deletions(-)
>
> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
> index b37ce7683e6..1a2375d7779 100644
> --- a/accel/tcg/plugin-gen.c
> +++ b/accel/tcg/plugin-gen.c
> @@ -132,16 +132,28 @@ static void gen_empty_udata_cb_no_rwg(void)
>   */
>  static void gen_empty_inline_cb(void)
>  {
> +TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
> +TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr();
>  TCGv_i64 val = tcg_temp_ebb_new_i64();
>  TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
>  
> +tcg_gen_ld_i32(cpu_index, tcg_env,
> +   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
> +/* pass an immediate != 0 so that it doesn't get optimized away */
> +tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef);
> +tcg_gen_ext_i32_ptr(cpu_index_as_ptr, cpu_index);
> +
>  tcg_gen_movi_ptr(ptr, 0);
> +tcg_gen_add_ptr(ptr, ptr, cpu_index_as_ptr);
>  tcg_gen_ld_i64(val, ptr, 0);
>  /* pass an immediate != 0 so that it doesn't get optimized away */
>  tcg_gen_addi_i64(val, val, 0xdeadface);
> +
>  tcg_gen_st_i64(val, ptr, 0);
>  tcg_temp_free_ptr(ptr);
>  tcg_temp_free_i64(val);
> +tcg_temp_free_ptr(cpu_index_as_ptr);
> +tcg_temp_free_i32(cpu_index);
>  }
>  

>  if (UINTPTR_MAX == UINT32_MAX) {
> @@ -395,18 +439,19 @@ static TCGOp *append_inline_cb(const struct 
> qemu_plugin_dyn_cb *cb,
> TCGOp *begin_op, TCGOp *op,
> int *unused)
>  {
> -/* const_ptr */
> -op = copy_const_ptr(_op, op, cb->userp);
> -
> -/* ld_i64 */
> +char *ptr = cb->userp;
> +if (!cb->inline_direct_ptr) {
> +/* dereference userp once to get access to memory location */
> +ptr = *(char **)cb->userp;
> +}

I'm confused here, probably because inline_direct_ptr gets removed later
on?

> +op = copy_ld_i32(_op, op);
> +op = copy_mul_i32(_op, op, cb->inline_element_size);
> +op = copy_ext_i32_ptr(_op, op);
> +op = copy_const_ptr(_op, op, ptr + cb->inline_offset);
> +op = copy_add_ptr(_op, op);
>  op = copy_ld_i64(_op, op);
> -
> -/* add_i64 */
>  op = copy_add_i64(_op, op, cb->inline_insn.imm);
> -
> -/* st_i64 */
>  op = copy_st_i64(_op, op);
> -
>  return op;
>  }
>  
> diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
> index b0c5ac68293..9346249145d 100644
> --- a/include/qemu/plugin.h
> +++ b/include/qemu/plugin.h
> @@ -86,6 +86,9 @@ enum plugin_dyn_cb_subtype {
>  struct qemu_plugin_dyn_cb {
>  union qemu_plugin_cb_sig f;
>  void *userp;
> +size_t inline_offset;
> +size_t inline_element_size;
> +bool inline_direct_ptr;

Ahh here it is.

(I seem to recall there is a setting for git to order headers first that
helps with this).

We could do with some documentation here. I can guess the offset and
element size values but what inline_direct_ptr means is not clear.

>  enum plugin_dyn_cb_subtype type;
>  /* @rw applies to mem callbacks only (both regular and inline) */
>  enum qemu_plugin_mem_rw rw;
> diff --git a/plugins/api.c b/plugins/api.c
> index 8d5cca53295..e777eb4e9fc 100644
> --- a/plugins/api.c
> +++ b/plugins/api.c
> @@ -106,7 +106,8 @@ void qemu_plugin_register_vcpu_tb_exec_inline(struct 
> qemu_plugin_tb *tb,
>void *ptr, uint64_t imm)
>  {
>  if (!tb->mem_only) {
> -plugin_register_inline_op(>cbs[PLUGIN_CB_INLINE], 0, op, ptr, 
> imm);
> +plugin_register_inline_op(>cbs[PLUGIN_CB_INLINE],
> +  0, op, ptr, 0, sizeof(uint64_t), true, 
> imm);
>  }
>  }
>  
> @@ -131,7 +132,7 @@ void qemu_plugin_register_vcpu_insn_exec_inline(struct 
> qemu_plugin_insn *insn,
>  {
>  if (!insn->mem_only) {
>  
> plugin_register_inline_op(>cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
> -  0, op, ptr, imm);
> +  0, op, ptr, 0, sizeof(uint64_t), true, 
> imm);
>  }
>  }
>  
> @@ -156,7 +157,7 @@ void qemu_plugin_register_vcpu_mem_inline(struct 
> qemu_plugin_insn *insn,
>uint64_t imm)
>  {
>  plugin_register_inline_op(>cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE],
> -

[PATCH v2 01/14] plugins: implement inline operation relative to cpu_index

2024-01-17 Thread Pierrick Bouvier
Instead of working on a fixed memory location, allow to address it based
on cpu_index, an element size and a given offset.
Result address: ptr + offset + cpu_index * element_size.

With this, we can target a member in a struct array from a base pointer.

Current semantic is not modified, thus inline operation still targets
always the same memory location.

Signed-off-by: Pierrick Bouvier 
---
 accel/tcg/plugin-gen.c | 67 +++---
 include/qemu/plugin.h  |  3 ++
 plugins/api.c  |  7 +++--
 plugins/core.c | 18 +---
 plugins/plugin.h   |  6 ++--
 5 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index b37ce7683e6..1a2375d7779 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -132,16 +132,28 @@ static void gen_empty_udata_cb_no_rwg(void)
  */
 static void gen_empty_inline_cb(void)
 {
+TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
+TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr();
 TCGv_i64 val = tcg_temp_ebb_new_i64();
 TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
 
+tcg_gen_ld_i32(cpu_index, tcg_env,
+   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+/* pass an immediate != 0 so that it doesn't get optimized away */
+tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef);
+tcg_gen_ext_i32_ptr(cpu_index_as_ptr, cpu_index);
+
 tcg_gen_movi_ptr(ptr, 0);
+tcg_gen_add_ptr(ptr, ptr, cpu_index_as_ptr);
 tcg_gen_ld_i64(val, ptr, 0);
 /* pass an immediate != 0 so that it doesn't get optimized away */
 tcg_gen_addi_i64(val, val, 0xdeadface);
+
 tcg_gen_st_i64(val, ptr, 0);
 tcg_temp_free_ptr(ptr);
 tcg_temp_free_i64(val);
+tcg_temp_free_ptr(cpu_index_as_ptr);
+tcg_temp_free_i32(cpu_index);
 }
 
 static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
@@ -289,12 +301,37 @@ static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, 
void *ptr)
 return op;
 }
 
+static TCGOp *copy_ld_i32(TCGOp **begin_op, TCGOp *op)
+{
+return copy_op(begin_op, op, INDEX_op_ld_i32);
+}
+
+static TCGOp *copy_ext_i32_ptr(TCGOp **begin_op, TCGOp *op)
+{
+if (UINTPTR_MAX == UINT32_MAX) {
+op = copy_op(begin_op, op, INDEX_op_mov_i32);
+} else {
+op = copy_op(begin_op, op, INDEX_op_ext_i32_i64);
+}
+return op;
+}
+
+static TCGOp *copy_add_ptr(TCGOp **begin_op, TCGOp *op)
+{
+if (UINTPTR_MAX == UINT32_MAX) {
+op = copy_op(begin_op, op, INDEX_op_add_i32);
+} else {
+op = copy_op(begin_op, op, INDEX_op_add_i64);
+}
+return op;
+}
+
 static TCGOp *copy_ld_i64(TCGOp **begin_op, TCGOp *op)
 {
 if (TCG_TARGET_REG_BITS == 32) {
 /* 2x ld_i32 */
-op = copy_op(begin_op, op, INDEX_op_ld_i32);
-op = copy_op(begin_op, op, INDEX_op_ld_i32);
+op = copy_ld_i32(begin_op, op);
+op = copy_ld_i32(begin_op, op);
 } else {
 /* ld_i64 */
 op = copy_op(begin_op, op, INDEX_op_ld_i64);
@@ -330,6 +367,13 @@ static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op, 
uint64_t v)
 return op;
 }
 
+static TCGOp *copy_mul_i32(TCGOp **begin_op, TCGOp *op, uint32_t v)
+{
+op = copy_op(begin_op, op, INDEX_op_mul_i32);
+op->args[2] = tcgv_i32_arg(tcg_constant_i32(v));
+return op;
+}
+
 static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op)
 {
 if (UINTPTR_MAX == UINT32_MAX) {
@@ -395,18 +439,19 @@ static TCGOp *append_inline_cb(const struct 
qemu_plugin_dyn_cb *cb,
TCGOp *begin_op, TCGOp *op,
int *unused)
 {
-/* const_ptr */
-op = copy_const_ptr(_op, op, cb->userp);
-
-/* ld_i64 */
+char *ptr = cb->userp;
+if (!cb->inline_direct_ptr) {
+/* dereference userp once to get access to memory location */
+ptr = *(char **)cb->userp;
+}
+op = copy_ld_i32(_op, op);
+op = copy_mul_i32(_op, op, cb->inline_element_size);
+op = copy_ext_i32_ptr(_op, op);
+op = copy_const_ptr(_op, op, ptr + cb->inline_offset);
+op = copy_add_ptr(_op, op);
 op = copy_ld_i64(_op, op);
-
-/* add_i64 */
 op = copy_add_i64(_op, op, cb->inline_insn.imm);
-
-/* st_i64 */
 op = copy_st_i64(_op, op);
-
 return op;
 }
 
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index b0c5ac68293..9346249145d 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -86,6 +86,9 @@ enum plugin_dyn_cb_subtype {
 struct qemu_plugin_dyn_cb {
 union qemu_plugin_cb_sig f;
 void *userp;
+size_t inline_offset;
+size_t inline_element_size;
+bool inline_direct_ptr;
 enum plugin_dyn_cb_subtype type;
 /* @rw applies to mem callbacks only (both regular and inline) */
 enum qemu_plugin_mem_rw rw;
diff --git a/plugins/api.c b/plugins/api.c
index 8d5cca53295..e777eb4e9fc 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -106,7 +106,8 @@ void