Re: [PATCH v2 05/14] tests/plugin: add test plugin for inline operations

2024-01-30 Thread Pierrick Bouvier

On 1/30/24 18:52, Alex Bennée wrote:

Pierrick Bouvier  writes:


On 1/26/24 20:05, Alex Bennée wrote:

Pierrick Bouvier  writes:


For now, it simply performs instruction, bb and mem count, and ensure
that inline vs callback versions have the same result. Later, we'll
extend it when new inline operations are added.

Use existing plugins to test everything works is a bit cumbersome, as
different events are treated in different plugins. Thus, this new one.

Signed-off-by: Pierrick Bouvier 
---
   tests/plugin/inline.c| 182 +++
   tests/plugin/meson.build |   2 +-
   2 files changed, 183 insertions(+), 1 deletion(-)
   create mode 100644 tests/plugin/inline.c

diff --git a/tests/plugin/inline.c b/tests/plugin/inline.c
new file mode 100644
index 000..28d1c3b1e48
--- /dev/null
+++ b/tests/plugin/inline.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2023, Pierrick Bouvier 
+ *
+ * Demonstrates and tests usage of inline ops.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+typedef struct {
+uint64_t count_tb;
+uint64_t count_tb_inline;
+uint64_t count_insn;
+uint64_t count_insn_inline;
+uint64_t count_mem;
+uint64_t count_mem_inline;
+} CPUCount;

I wonder if there is any way to enforce the structures being an
array of
64 bit counts? I do worry the compiler might want day decide to do
something silly but legal leading to confusion.
I guess qemu_plugin_scoreboard_new could:
g_assert((element_size % sizeof(uint64_t)) == 0)



Given explaination on patch [02/14], do you see more that CPUCount
could hold any type, and qemu_plugin_u64 allows to target a specific
member in it?

In general, qemu plugin runtime simply is given an offset and total
size of the struct, so a compiled plugin can have any
optimization/padding on this struct without this affecting the result.


?


+static qemu_plugin_u64_t count_tb;
+static qemu_plugin_u64_t count_tb_inline;
+static qemu_plugin_u64_t count_insn;
+static qemu_plugin_u64_t count_insn_inline;
+static qemu_plugin_u64_t count_mem;
+static qemu_plugin_u64_t count_mem_inline;

Can't this just be a non scoreboard instance of CPUCount?



We could always use:
CPUCount* count = qemu_plugin_scoreboard_get(score, i);
count->count_tb++;


I thought these where globals protected by a lock? I wasn't suggesting
hiding the abstraction behind the scoreboard API.



global_count_* var are protected by a lock (and serve as reference value 
to test inline operations).

The scoreboard is used for inline and callback (per cpu) operations.



However, the part where we sum all values would now need some glue
code, which is longer and more error prone than having those
definition here (and declaration in install function).


+
+static uint64_t global_count_tb;
+static uint64_t global_count_insn;
+static uint64_t global_count_mem;
+static unsigned int max_cpu_index;
+static GMutex tb_lock;
+static GMutex insn_lock;
+static GMutex mem_lock;
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static void stats_insn(void)
+{
+const uint64_t expected = global_count_insn;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_insn);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_insn_inline);
+printf("insn: %" PRIu64 "\n", expected);
+printf("insn: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("insn: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void stats_tb(void)
+{
+const uint64_t expected = global_count_tb;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_tb);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_tb_inline);
+printf("tb: %" PRIu64 "\n", expected);
+printf("tb: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("tb: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void stats_mem(void)
+{
+const uint64_t expected = global_count_mem;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_mem);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_mem_inline);
+printf("mem: %" PRIu64 "\n", expected);
+printf("mem: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("mem: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *udata)
+{
+const unsigned int num_cpus = qemu_plugin_scoreboard_size(counts);
+g_assert(num_cpus == max_cpu_index + 1);
+
+for (int i = 0; i < num_cpus ; ++i) {
+const uint64_t tb = *qemu_plugin_u64_get(count_tb, i);
+const uint64_t tb_inline = 

Re: [PATCH v2 05/14] tests/plugin: add test plugin for inline operations

2024-01-30 Thread Alex Bennée
Pierrick Bouvier  writes:

> On 1/26/24 20:05, Alex Bennée wrote:
>> Pierrick Bouvier  writes:
>> 
>>> For now, it simply performs instruction, bb and mem count, and ensure
>>> that inline vs callback versions have the same result. Later, we'll
>>> extend it when new inline operations are added.
>>>
>>> Use existing plugins to test everything works is a bit cumbersome, as
>>> different events are treated in different plugins. Thus, this new one.
>>>
>>> Signed-off-by: Pierrick Bouvier 
>>> ---
>>>   tests/plugin/inline.c| 182 +++
>>>   tests/plugin/meson.build |   2 +-
>>>   2 files changed, 183 insertions(+), 1 deletion(-)
>>>   create mode 100644 tests/plugin/inline.c
>>>
>>> diff --git a/tests/plugin/inline.c b/tests/plugin/inline.c
>>> new file mode 100644
>>> index 000..28d1c3b1e48
>>> --- /dev/null
>>> +++ b/tests/plugin/inline.c
>>> @@ -0,0 +1,182 @@
>>> +/*
>>> + * Copyright (C) 2023, Pierrick Bouvier 
>>> + *
>>> + * Demonstrates and tests usage of inline ops.
>>> + *
>>> + * License: GNU GPL, version 2 or later.
>>> + *   See the COPYING file in the top-level directory.
>>> + */
>>> +
>>> +#include 
>>> +#include 
>>> +#include 
>>> +
>>> +#include 
>>> +
>>> +typedef struct {
>>> +uint64_t count_tb;
>>> +uint64_t count_tb_inline;
>>> +uint64_t count_insn;
>>> +uint64_t count_insn_inline;
>>> +uint64_t count_mem;
>>> +uint64_t count_mem_inline;
>>> +} CPUCount;
>> I wonder if there is any way to enforce the structures being an
>> array of
>> 64 bit counts? I do worry the compiler might want day decide to do
>> something silly but legal leading to confusion.
>> I guess qemu_plugin_scoreboard_new could:
>>g_assert((element_size % sizeof(uint64_t)) == 0)
>> 
>
> Given explaination on patch [02/14], do you see more that CPUCount
> could hold any type, and qemu_plugin_u64 allows to target a specific
> member in it?
>
> In general, qemu plugin runtime simply is given an offset and total
> size of the struct, so a compiled plugin can have any
> optimization/padding on this struct without this affecting the result.
>
>> ?
>> 
>>> +static qemu_plugin_u64_t count_tb;
>>> +static qemu_plugin_u64_t count_tb_inline;
>>> +static qemu_plugin_u64_t count_insn;
>>> +static qemu_plugin_u64_t count_insn_inline;
>>> +static qemu_plugin_u64_t count_mem;
>>> +static qemu_plugin_u64_t count_mem_inline;
>> Can't this just be a non scoreboard instance of CPUCount?
>> 
>
> We could always use:
> CPUCount* count = qemu_plugin_scoreboard_get(score, i);
> count->count_tb++;

I thought these where globals protected by a lock? I wasn't suggesting
hiding the abstraction behind the scoreboard API.

>
> However, the part where we sum all values would now need some glue
> code, which is longer and more error prone than having those
> definition here (and declaration in install function).
>
>>> +
>>> +static uint64_t global_count_tb;
>>> +static uint64_t global_count_insn;
>>> +static uint64_t global_count_mem;
>>> +static unsigned int max_cpu_index;
>>> +static GMutex tb_lock;
>>> +static GMutex insn_lock;
>>> +static GMutex mem_lock;
>>> +
>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>> +
>>> +static void stats_insn(void)
>>> +{
>>> +const uint64_t expected = global_count_insn;
>>> +const uint64_t per_vcpu = qemu_plugin_u64_sum(count_insn);
>>> +const uint64_t inl_per_vcpu =
>>> +qemu_plugin_u64_sum(count_insn_inline);
>>> +printf("insn: %" PRIu64 "\n", expected);
>>> +printf("insn: %" PRIu64 " (per vcpu)\n", per_vcpu);
>>> +printf("insn: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
>>> +g_assert(expected > 0);
>>> +g_assert(per_vcpu == expected);
>>> +g_assert(inl_per_vcpu == expected);
>>> +}
>>> +
>>> +static void stats_tb(void)
>>> +{
>>> +const uint64_t expected = global_count_tb;
>>> +const uint64_t per_vcpu = qemu_plugin_u64_sum(count_tb);
>>> +const uint64_t inl_per_vcpu =
>>> +qemu_plugin_u64_sum(count_tb_inline);
>>> +printf("tb: %" PRIu64 "\n", expected);
>>> +printf("tb: %" PRIu64 " (per vcpu)\n", per_vcpu);
>>> +printf("tb: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
>>> +g_assert(expected > 0);
>>> +g_assert(per_vcpu == expected);
>>> +g_assert(inl_per_vcpu == expected);
>>> +}
>>> +
>>> +static void stats_mem(void)
>>> +{
>>> +const uint64_t expected = global_count_mem;
>>> +const uint64_t per_vcpu = qemu_plugin_u64_sum(count_mem);
>>> +const uint64_t inl_per_vcpu =
>>> +qemu_plugin_u64_sum(count_mem_inline);
>>> +printf("mem: %" PRIu64 "\n", expected);
>>> +printf("mem: %" PRIu64 " (per vcpu)\n", per_vcpu);
>>> +printf("mem: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
>>> +g_assert(expected > 0);
>>> +g_assert(per_vcpu == expected);
>>> +g_assert(inl_per_vcpu == expected);
>>> +}
>>> +
>>> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
>>> +{
>>> +const 

Re: [PATCH v2 05/14] tests/plugin: add test plugin for inline operations

2024-01-29 Thread Pierrick Bouvier

On 1/26/24 20:05, Alex Bennée wrote:

Pierrick Bouvier  writes:


For now, it simply performs instruction, bb and mem count, and ensure
that inline vs callback versions have the same result. Later, we'll
extend it when new inline operations are added.

Use existing plugins to test everything works is a bit cumbersome, as
different events are treated in different plugins. Thus, this new one.

Signed-off-by: Pierrick Bouvier 
---
  tests/plugin/inline.c| 182 +++
  tests/plugin/meson.build |   2 +-
  2 files changed, 183 insertions(+), 1 deletion(-)
  create mode 100644 tests/plugin/inline.c

diff --git a/tests/plugin/inline.c b/tests/plugin/inline.c
new file mode 100644
index 000..28d1c3b1e48
--- /dev/null
+++ b/tests/plugin/inline.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2023, Pierrick Bouvier 
+ *
+ * Demonstrates and tests usage of inline ops.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+typedef struct {
+uint64_t count_tb;
+uint64_t count_tb_inline;
+uint64_t count_insn;
+uint64_t count_insn_inline;
+uint64_t count_mem;
+uint64_t count_mem_inline;
+} CPUCount;


I wonder if there is any way to enforce the structures being an array of
64 bit counts? I do worry the compiler might want day decide to do
something silly but legal leading to confusion.

I guess qemu_plugin_scoreboard_new could:

   g_assert((element_size % sizeof(uint64_t)) == 0)



Given explaination on patch [02/14], do you see more that CPUCount could 
hold any type, and qemu_plugin_u64 allows to target a specific member in it?


In general, qemu plugin runtime simply is given an offset and total size 
of the struct, so a compiled plugin can have any optimization/padding on 
this struct without this affecting the result.



?


+static qemu_plugin_u64_t count_tb;
+static qemu_plugin_u64_t count_tb_inline;
+static qemu_plugin_u64_t count_insn;
+static qemu_plugin_u64_t count_insn_inline;
+static qemu_plugin_u64_t count_mem;
+static qemu_plugin_u64_t count_mem_inline;


Can't this just be a non scoreboard instance of CPUCount?



We could always use:
CPUCount* count = qemu_plugin_scoreboard_get(score, i);
count->count_tb++;

However, the part where we sum all values would now need some glue code, 
which is longer and more error prone than having those definition here 
(and declaration in install function).



+
+static uint64_t global_count_tb;
+static uint64_t global_count_insn;
+static uint64_t global_count_mem;
+static unsigned int max_cpu_index;
+static GMutex tb_lock;
+static GMutex insn_lock;
+static GMutex mem_lock;
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static void stats_insn(void)
+{
+const uint64_t expected = global_count_insn;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_insn);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_insn_inline);
+printf("insn: %" PRIu64 "\n", expected);
+printf("insn: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("insn: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void stats_tb(void)
+{
+const uint64_t expected = global_count_tb;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_tb);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_tb_inline);
+printf("tb: %" PRIu64 "\n", expected);
+printf("tb: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("tb: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void stats_mem(void)
+{
+const uint64_t expected = global_count_mem;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_mem);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_mem_inline);
+printf("mem: %" PRIu64 "\n", expected);
+printf("mem: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("mem: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *udata)
+{
+const unsigned int num_cpus = qemu_plugin_scoreboard_size(counts);
+g_assert(num_cpus == max_cpu_index + 1);
+
+for (int i = 0; i < num_cpus ; ++i) {
+const uint64_t tb = *qemu_plugin_u64_get(count_tb, i);
+const uint64_t tb_inline = *qemu_plugin_u64_get(count_tb_inline, i);
+const uint64_t insn = *qemu_plugin_u64_get(count_insn, i);
+const uint64_t insn_inline = *qemu_plugin_u64_get(count_insn_inline, 
i);
+const uint64_t mem = *qemu_plugin_u64_get(count_mem, i);
+const uint64_t mem_inline = *qemu_plugin_u64_get(count_mem_inline, i);
+printf("cpu %d: 

Re: [PATCH v2 05/14] tests/plugin: add test plugin for inline operations

2024-01-26 Thread Alex Bennée
Pierrick Bouvier  writes:

> For now, it simply performs instruction, bb and mem count, and ensure
> that inline vs callback versions have the same result. Later, we'll
> extend it when new inline operations are added.
>
> Use existing plugins to test everything works is a bit cumbersome, as
> different events are treated in different plugins. Thus, this new one.
>
> Signed-off-by: Pierrick Bouvier 
> ---
>  tests/plugin/inline.c| 182 +++
>  tests/plugin/meson.build |   2 +-
>  2 files changed, 183 insertions(+), 1 deletion(-)
>  create mode 100644 tests/plugin/inline.c
>
> diff --git a/tests/plugin/inline.c b/tests/plugin/inline.c
> new file mode 100644
> index 000..28d1c3b1e48
> --- /dev/null
> +++ b/tests/plugin/inline.c
> @@ -0,0 +1,182 @@
> +/*
> + * Copyright (C) 2023, Pierrick Bouvier 
> + *
> + * Demonstrates and tests usage of inline ops.
> + *
> + * License: GNU GPL, version 2 or later.
> + *   See the COPYING file in the top-level directory.
> + */
> +
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +typedef struct {
> +uint64_t count_tb;
> +uint64_t count_tb_inline;
> +uint64_t count_insn;
> +uint64_t count_insn_inline;
> +uint64_t count_mem;
> +uint64_t count_mem_inline;
> +} CPUCount;

I wonder if there is any way to enforce the structures being an array of
64 bit counts? I do worry the compiler might want day decide to do
something silly but legal leading to confusion.

I guess qemu_plugin_scoreboard_new could:

  g_assert((element_size % sizeof(uint64_t)) == 0)

?

> +static qemu_plugin_u64_t count_tb;
> +static qemu_plugin_u64_t count_tb_inline;
> +static qemu_plugin_u64_t count_insn;
> +static qemu_plugin_u64_t count_insn_inline;
> +static qemu_plugin_u64_t count_mem;
> +static qemu_plugin_u64_t count_mem_inline;

Can't this just be a non scoreboard instance of CPUCount?

> +
> +static uint64_t global_count_tb;
> +static uint64_t global_count_insn;
> +static uint64_t global_count_mem;
> +static unsigned int max_cpu_index;
> +static GMutex tb_lock;
> +static GMutex insn_lock;
> +static GMutex mem_lock;
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> +
> +static void stats_insn(void)
> +{
> +const uint64_t expected = global_count_insn;
> +const uint64_t per_vcpu = qemu_plugin_u64_sum(count_insn);
> +const uint64_t inl_per_vcpu =
> +qemu_plugin_u64_sum(count_insn_inline);
> +printf("insn: %" PRIu64 "\n", expected);
> +printf("insn: %" PRIu64 " (per vcpu)\n", per_vcpu);
> +printf("insn: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
> +g_assert(expected > 0);
> +g_assert(per_vcpu == expected);
> +g_assert(inl_per_vcpu == expected);
> +}
> +
> +static void stats_tb(void)
> +{
> +const uint64_t expected = global_count_tb;
> +const uint64_t per_vcpu = qemu_plugin_u64_sum(count_tb);
> +const uint64_t inl_per_vcpu =
> +qemu_plugin_u64_sum(count_tb_inline);
> +printf("tb: %" PRIu64 "\n", expected);
> +printf("tb: %" PRIu64 " (per vcpu)\n", per_vcpu);
> +printf("tb: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
> +g_assert(expected > 0);
> +g_assert(per_vcpu == expected);
> +g_assert(inl_per_vcpu == expected);
> +}
> +
> +static void stats_mem(void)
> +{
> +const uint64_t expected = global_count_mem;
> +const uint64_t per_vcpu = qemu_plugin_u64_sum(count_mem);
> +const uint64_t inl_per_vcpu =
> +qemu_plugin_u64_sum(count_mem_inline);
> +printf("mem: %" PRIu64 "\n", expected);
> +printf("mem: %" PRIu64 " (per vcpu)\n", per_vcpu);
> +printf("mem: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
> +g_assert(expected > 0);
> +g_assert(per_vcpu == expected);
> +g_assert(inl_per_vcpu == expected);
> +}
> +
> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
> +{
> +const unsigned int num_cpus = qemu_plugin_scoreboard_size(counts);
> +g_assert(num_cpus == max_cpu_index + 1);
> +
> +for (int i = 0; i < num_cpus ; ++i) {
> +const uint64_t tb = *qemu_plugin_u64_get(count_tb, i);
> +const uint64_t tb_inline = *qemu_plugin_u64_get(count_tb_inline, i);
> +const uint64_t insn = *qemu_plugin_u64_get(count_insn, i);
> +const uint64_t insn_inline = *qemu_plugin_u64_get(count_insn_inline, 
> i);
> +const uint64_t mem = *qemu_plugin_u64_get(count_mem, i);
> +const uint64_t mem_inline = *qemu_plugin_u64_get(count_mem_inline, 
> i);
> +printf("cpu %d: tb (%" PRIu64 ", %" PRIu64 ") | "
> +   "insn (%" PRIu64 ", %" PRIu64 ") | "
> +   "mem (%" PRIu64 ", %" PRIu64 ")"
> +   "\n",
> +   i, tb, tb_inline, insn, insn_inline, mem, mem_inline);
> +g_assert(tb == tb_inline);
> +g_assert(insn == insn_inline);
> +g_assert(mem == mem_inline);
> +}
> +
> +stats_tb();
> +stats_insn();
> +stats_mem();
> +
> +

[PATCH v2 05/14] tests/plugin: add test plugin for inline operations

2024-01-17 Thread Pierrick Bouvier
For now, it simply performs instruction, bb and mem count, and ensure
that inline vs callback versions have the same result. Later, we'll
extend it when new inline operations are added.

Use existing plugins to test everything works is a bit cumbersome, as
different events are treated in different plugins. Thus, this new one.

Signed-off-by: Pierrick Bouvier 
---
 tests/plugin/inline.c| 182 +++
 tests/plugin/meson.build |   2 +-
 2 files changed, 183 insertions(+), 1 deletion(-)
 create mode 100644 tests/plugin/inline.c

diff --git a/tests/plugin/inline.c b/tests/plugin/inline.c
new file mode 100644
index 000..28d1c3b1e48
--- /dev/null
+++ b/tests/plugin/inline.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2023, Pierrick Bouvier 
+ *
+ * Demonstrates and tests usage of inline ops.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include 
+#include 
+
+#include 
+
+typedef struct {
+uint64_t count_tb;
+uint64_t count_tb_inline;
+uint64_t count_insn;
+uint64_t count_insn_inline;
+uint64_t count_mem;
+uint64_t count_mem_inline;
+} CPUCount;
+
+static struct qemu_plugin_scoreboard *counts;
+static qemu_plugin_u64_t count_tb;
+static qemu_plugin_u64_t count_tb_inline;
+static qemu_plugin_u64_t count_insn;
+static qemu_plugin_u64_t count_insn_inline;
+static qemu_plugin_u64_t count_mem;
+static qemu_plugin_u64_t count_mem_inline;
+
+static uint64_t global_count_tb;
+static uint64_t global_count_insn;
+static uint64_t global_count_mem;
+static unsigned int max_cpu_index;
+static GMutex tb_lock;
+static GMutex insn_lock;
+static GMutex mem_lock;
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static void stats_insn(void)
+{
+const uint64_t expected = global_count_insn;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_insn);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_insn_inline);
+printf("insn: %" PRIu64 "\n", expected);
+printf("insn: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("insn: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void stats_tb(void)
+{
+const uint64_t expected = global_count_tb;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_tb);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_tb_inline);
+printf("tb: %" PRIu64 "\n", expected);
+printf("tb: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("tb: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void stats_mem(void)
+{
+const uint64_t expected = global_count_mem;
+const uint64_t per_vcpu = qemu_plugin_u64_sum(count_mem);
+const uint64_t inl_per_vcpu =
+qemu_plugin_u64_sum(count_mem_inline);
+printf("mem: %" PRIu64 "\n", expected);
+printf("mem: %" PRIu64 " (per vcpu)\n", per_vcpu);
+printf("mem: %" PRIu64 " (per vcpu inline)\n", inl_per_vcpu);
+g_assert(expected > 0);
+g_assert(per_vcpu == expected);
+g_assert(inl_per_vcpu == expected);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *udata)
+{
+const unsigned int num_cpus = qemu_plugin_scoreboard_size(counts);
+g_assert(num_cpus == max_cpu_index + 1);
+
+for (int i = 0; i < num_cpus ; ++i) {
+const uint64_t tb = *qemu_plugin_u64_get(count_tb, i);
+const uint64_t tb_inline = *qemu_plugin_u64_get(count_tb_inline, i);
+const uint64_t insn = *qemu_plugin_u64_get(count_insn, i);
+const uint64_t insn_inline = *qemu_plugin_u64_get(count_insn_inline, 
i);
+const uint64_t mem = *qemu_plugin_u64_get(count_mem, i);
+const uint64_t mem_inline = *qemu_plugin_u64_get(count_mem_inline, i);
+printf("cpu %d: tb (%" PRIu64 ", %" PRIu64 ") | "
+   "insn (%" PRIu64 ", %" PRIu64 ") | "
+   "mem (%" PRIu64 ", %" PRIu64 ")"
+   "\n",
+   i, tb, tb_inline, insn, insn_inline, mem, mem_inline);
+g_assert(tb == tb_inline);
+g_assert(insn == insn_inline);
+g_assert(mem == mem_inline);
+}
+
+stats_tb();
+stats_insn();
+stats_mem();
+
+qemu_plugin_scoreboard_free(counts);
+}
+
+static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
+{
+(*qemu_plugin_u64_get(count_tb, cpu_index))++;
+g_mutex_lock(_lock);
+max_cpu_index = MAX(max_cpu_index, cpu_index);
+global_count_tb++;
+g_mutex_unlock(_lock);
+}
+
+static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
+{
+(*qemu_plugin_u64_get(count_insn, cpu_index))++;
+g_mutex_lock(_lock);
+global_count_insn++;
+g_mutex_unlock(_lock);
+}
+
+static void vcpu_mem_access(unsigned int cpu_index,
+