Re: [PATCH v17 11/13] target/loongarch: Adjust functions and structure to support user-mode

2022-06-17 Thread Richard Henderson

On 6/16/22 05:16, Song Gao wrote:

Some functions and member of the structure are different with softmmu-mode
So we need adjust them to support user-mode.

Signed-off-by: Song Gao
Signed-off-by: Xiaojuan Yang
---
  target/loongarch/cpu.c| 21 ++-
  target/loongarch/cpu.h|  6 
  target/loongarch/helper.h |  2 ++
  .../insn_trans/trans_privileged.c.inc | 36 +++
  target/loongarch/internals.h  |  2 ++
  target/loongarch/op_helper.c  |  6 
  6 files changed, 72 insertions(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v17 10/13] target/loongarch: remove unused include hw/loader.h

2022-06-17 Thread Richard Henderson

On 6/16/22 05:16, Song Gao wrote:

Signed-off-by: Song Gao
Signed-off-by: Xiaojuan Yang
---
  target/loongarch/cpu.c | 1 -
  1 file changed, 1 deletion(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v17 09/13] target/loongarch: Fix helper_asrtle_d/asrtgt_d raise wrong exception

2022-06-17 Thread Richard Henderson

On 6/16/22 05:16, Song Gao wrote:

Raising EXCCODE_BCE instead of EXCCODE_ADEM for helper_asrtle_d/asrtgt_d.

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
---
  target/loongarch/cpu.c   | 2 ++
  target/loongarch/op_helper.c | 4 ++--
  2 files changed, 4 insertions(+), 2 deletions(-)


"Raise" not "Raising".

Reviewed-by: Richard Henderson 


r~



diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index e32d4cc269..0013582a3a 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -51,6 +51,7 @@ static const char * const excp_names[] = {
  [EXCCODE_IPE] = "Instruction privilege error",
  [EXCCODE_FPE] = "Floating Point Exception",
  [EXCCODE_DBP] = "Debug breakpoint",
+[EXCCODE_BCE] = "Bound Check Exception",
  };
  
  const char *loongarch_exception_name(int32_t exception)

@@ -176,6 +177,7 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
  case EXCCODE_INE:
  case EXCCODE_IPE:
  case EXCCODE_FPE:
+case EXCCODE_BCE:
  env->CSR_BADV = env->pc;
  QEMU_FALLTHROUGH;
  case EXCCODE_ADEM:
diff --git a/target/loongarch/op_helper.c b/target/loongarch/op_helper.c
index d87049851f..df049cec59 100644
--- a/target/loongarch/op_helper.c
+++ b/target/loongarch/op_helper.c
@@ -49,14 +49,14 @@ target_ulong helper_bitswap(target_ulong v)
  void helper_asrtle_d(CPULoongArchState *env, target_ulong rj, target_ulong rk)
  {
  if (rj > rk) {
-do_raise_exception(env, EXCCODE_ADEM, GETPC());
+do_raise_exception(env, EXCCODE_BCE, 0);
  }
  }
  
  void helper_asrtgt_d(CPULoongArchState *env, target_ulong rj, target_ulong rk)

  {
  if (rj <= rk) {
-do_raise_exception(env, EXCCODE_ADEM, GETPC());
+do_raise_exception(env, EXCCODE_BCE, 0);
  }
  }
  





Re: [PATCH v17 08/13] target/loongarch: Fix missing update CSR_BADV

2022-06-17 Thread Richard Henderson

On 6/16/22 05:16, Song Gao wrote:

loongarch_cpu_do_interrupt() should update CSR_BADV for some EXCCODE.

Signed-off-by: Song Gao
Signed-off-by: Xiaojuan Yang
---
  target/loongarch/cpu.c | 10 ++
  1 file changed, 6 insertions(+), 4 deletions(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v17 07/13] target/loongarch: remove badaddr from CPULoongArch

2022-06-17 Thread Richard Henderson

On 6/16/22 05:16, Song Gao wrote:

We can use CSR_BADV to replace badaddr.

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
---


Reviewed-by: Richard Henderson 


r~


  target/loongarch/cpu.h | 2 --
  target/loongarch/gdbstub.c | 2 +-
  2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 71a5036c3c..4b4fbcdc71 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -246,8 +246,6 @@ typedef struct CPUArchState {
  uint64_t lladdr; /* LL virtual address compared against SC */
  uint64_t llval;
  
-uint64_t badaddr;

-
  /* LoongArch CSRs */
  uint64_t CSR_CRMD;
  uint64_t CSR_PRMD;
diff --git a/target/loongarch/gdbstub.c b/target/loongarch/gdbstub.c
index 0c48834201..24e126fb2d 100644
--- a/target/loongarch/gdbstub.c
+++ b/target/loongarch/gdbstub.c
@@ -21,7 +21,7 @@ int loongarch_cpu_gdb_read_register(CPUState *cs, GByteArray 
*mem_buf, int n)
  } else if (n == 32) {
  return gdb_get_regl(mem_buf, env->pc);
  } else if (n == 33) {
-return gdb_get_regl(mem_buf, env->badaddr);
+return gdb_get_regl(mem_buf, env->CSR_BADV);
  }
  return 0;
  }





Re: [PATCH v17 02/13] linux-user: Add LoongArch signal support

2022-06-17 Thread Richard Henderson

On 6/16/22 05:16, Song Gao wrote:

+static void copy_fpu_to_sigcontext(CPULoongArchState *env,
+   struct extctx_layout *extctx)
+{
+int i;
+struct target_sctx_info *info = (struct target_sctx_info 
*)extctx->fpu.addr;
+struct target_fpu_context *fpu_ctx = get_ctx(info);
+
+for (i = 1; i < 32; ++i) {
+__put_user(env->fpr[i], _ctx->regs[i]);
+}
+
+fpu_ctx->fcc = read_all_fcc(env);
+__put_user(env->fcsr0, _ctx->fcsr);
+__put_user(extctx->fpu.size, >size);
+}


You've failed to set magic.


+
+static abi_ulong extframe_alloc(struct extctx_layout *extctx,
+struct ctx_layout *layout,
+size_t size, abi_ulong base)
+{
+abi_ulong new_base = base - size;
+
+new_base -= sizeof(struct target_sctx_info);


Missing ROUND_DOWN.


+layout->addr = new_base;
+layout->size = (unsigned int)(base - new_base);


Cast not required.


+static int parse_extcontext(struct target_sigcontext *sc,
+ struct extctx_layout *extctx)
+{
+unsigned int size;
+struct target_sctx_info *info = (struct target_sctx_info *)
+>sc_extcontext;
+
+__get_user(size, >size);
+
+if (size < (sizeof(struct target_sctx_info) +
+sizeof(struct target_fpu_context))) {
+return -TARGET_EINVAL;
+}
+
+extctx->fpu.addr = (abi_ulong)info;
+info = (struct target_sctx_info *)((char *)info + size);
+
+return 0;
+}


This does not even closely resemble the kernel's parse_extcontext.
You *really* have to be more careful about this.


r~



[PATCH 1/1] hw: m25p80: fixing individual test failure when tests are running in isolation

2022-06-17 Thread Iris Chen
Signed-off-by: Iris Chen 
---
 tests/qtest/aspeed_smc-test.c | 74 +--
 1 file changed, 63 insertions(+), 11 deletions(-)

diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
index ec233315e6..b1e682db65 100644
--- a/tests/qtest/aspeed_smc-test.c
+++ b/tests/qtest/aspeed_smc-test.c
@@ -135,6 +135,9 @@ static void flash_reset(void)
 spi_ctrl_start_user();
 writeb(ASPEED_FLASH_BASE, RESET_ENABLE);
 writeb(ASPEED_FLASH_BASE, RESET_MEMORY);
+writeb(ASPEED_FLASH_BASE, WREN);
+writeb(ASPEED_FLASH_BASE, BULK_ERASE);
+writeb(ASPEED_FLASH_BASE, WRDI);
 spi_ctrl_stop_user();
 
 spi_conf_remove(CONF_ENABLE_W0);
@@ -195,21 +198,41 @@ static void test_erase_sector(void)
 
 spi_conf(CONF_ENABLE_W0);
 
+/*
+ * Previous page should be full of 0xffs after backend is
+ * initialized
+ */
+read_page(some_page_addr - FLASH_PAGE_SIZE, page);
+for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+g_assert_cmphex(page[i], ==, 0x);
+}
+
 spi_ctrl_start_user();
-writeb(ASPEED_FLASH_BASE, WREN);
 writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
-writeb(ASPEED_FLASH_BASE, ERASE_SECTOR);
+writeb(ASPEED_FLASH_BASE, WREN);
+writeb(ASPEED_FLASH_BASE, PP);
 writel(ASPEED_FLASH_BASE, make_be32(some_page_addr));
+
+/* Fill the page with its own addresses */
+for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+writel(ASPEED_FLASH_BASE, make_be32(some_page_addr + i * 4));
+}
 spi_ctrl_stop_user();
 
-/* Previous page should be full of zeroes as backend is not
- * initialized */
-read_page(some_page_addr - FLASH_PAGE_SIZE, page);
+/* Check the page is correctly written */
+read_page(some_page_addr, page);
 for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-g_assert_cmphex(page[i], ==, 0x0);
+g_assert_cmphex(page[i], ==, some_page_addr + i * 4);
 }
 
-/* But this one was erased */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+writeb(ASPEED_FLASH_BASE, ERASE_SECTOR);
+writel(ASPEED_FLASH_BASE, make_be32(some_page_addr));
+spi_ctrl_stop_user();
+
+/* Check the page is erased */
 read_page(some_page_addr, page);
 for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
 g_assert_cmphex(page[i], ==, 0x);
@@ -226,11 +249,31 @@ static void test_erase_all(void)
 
 spi_conf(CONF_ENABLE_W0);
 
-/* Check some random page. Should be full of zeroes as backend is
- * not initialized */
+/*
+ * Previous page should be full of 0xffs after backend is
+ * initialized
+ */
+read_page(some_page_addr - FLASH_PAGE_SIZE, page);
+for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+g_assert_cmphex(page[i], ==, 0x);
+}
+
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+writeb(ASPEED_FLASH_BASE, WREN);
+writeb(ASPEED_FLASH_BASE, PP);
+writel(ASPEED_FLASH_BASE, make_be32(some_page_addr));
+
+/* Fill the page with its own addresses */
+for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+writel(ASPEED_FLASH_BASE, make_be32(some_page_addr + i * 4));
+}
+spi_ctrl_stop_user();
+
+/* Check the page is correctly written */
 read_page(some_page_addr, page);
 for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
-g_assert_cmphex(page[i], ==, 0x0);
+g_assert_cmphex(page[i], ==, some_page_addr + i * 4);
 }
 
 spi_ctrl_start_user();
@@ -238,7 +281,7 @@ static void test_erase_all(void)
 writeb(ASPEED_FLASH_BASE, BULK_ERASE);
 spi_ctrl_stop_user();
 
-/* Recheck that some random page */
+/* Check the page is erased */
 read_page(some_page_addr, page);
 for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
 g_assert_cmphex(page[i], ==, 0x);
@@ -299,6 +342,14 @@ static void test_read_page_mem(void)
 spi_conf(CONF_ENABLE_W0);
 spi_ctrl_start_user();
 writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+writeb(ASPEED_FLASH_BASE, WREN);
+writeb(ASPEED_FLASH_BASE, PP);
+writel(ASPEED_FLASH_BASE, make_be32(my_page_addr));
+
+/* Fill the page with its own addresses */
+for (i = 0; i < FLASH_PAGE_SIZE / 4; i++) {
+writel(ASPEED_FLASH_BASE, make_be32(my_page_addr + i * 4));
+}
 spi_ctrl_stop_user();
 spi_conf_remove(CONF_ENABLE_W0);
 
@@ -417,6 +468,7 @@ int main(int argc, char **argv)
 qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
 qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg);
 
+flash_reset();
 ret = g_test_run();
 
 qtest_quit(global_qtest);
-- 
2.30.2




[PATCH 0/1] hw: m25p80: fix aspeed_smc tests failure when run in isolation

2022-06-17 Thread Iris Chen
Hey everyone,

I discovered that some of the tests in tests/qtest/aspeed_smc-test.c were
failing when run in isolation due to dependencies between the tests. For
example, one test would test the reading of a block of memory written
in the test before it.

I think it would make sense to add flash_reset() between running the
tests and make sure the tests do not rely on each other. Thus, I have
made changes to the tests so that they now pass individually with
no dependencies on each other.

Thanks,
Iris

Iris Chen (1):
  hw: m25p80: fixing individual test failure when tests are running in
isolation

 tests/qtest/aspeed_smc-test.c | 74 +--
 1 file changed, 63 insertions(+), 11 deletions(-)

--
2.30.2



Re: [PATCH v3 1/2] hw: m25p80: add WP# pin and SRWD bit for write protection

2022-06-17 Thread Dan Zhang
On Fri, Jun 17, 2022 at 03:02:45PM -0700, Iris Chen wrote:
> Signed-off-by: Iris Chen 
> ---
> Thanks everyone for your comments. This is a v3 patch that addresses all 
> suggestions (moving write_enable to decode_new_cmd). 
> I am waiting on some feedback from Dan's (dz4l...@gmail.com) patch
> regarding adding a STATE_STANDBY state. 
The STATE_STANDBY can be use to handle the current code's potential issue:
The two data bytes of the WRSR instruction will get interprted as the
new command when abort because of HPM is on. If the data bytes coincident
be a legimtate command, the model will introduce tricky behavior when
HPM is on.

BRs
Dan
> 
> Currently, all tests are passing. 
>  
>  hw/block/m25p80.c | 77 ++-
>  1 file changed, 62 insertions(+), 15 deletions(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 81ba3da4df..12a59ca57c 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -27,12 +27,14 @@
>  #include "hw/qdev-properties.h"
>  #include "hw/qdev-properties-system.h"
>  #include "hw/ssi/ssi.h"
> +#include "hw/irq.h"
>  #include "migration/vmstate.h"
>  #include "qemu/bitops.h"
>  #include "qemu/log.h"
>  #include "qemu/module.h"
>  #include "qemu/error-report.h"
>  #include "qapi/error.h"
> +#include "qapi/visitor.h"
>  #include "trace.h"
>  #include "qom/object.h"
>  
> @@ -472,11 +474,13 @@ struct Flash {
>  uint8_t spansion_cr2v;
>  uint8_t spansion_cr3v;
>  uint8_t spansion_cr4v;
> +bool wp_level;
>  bool write_enable;
>  bool four_bytes_address_mode;
>  bool reset_enable;
>  bool quad_enable;
>  bool aai_enable;
> +bool status_register_write_disabled;
>  uint8_t ear;
>  
>  int64_t dirty_page;
> @@ -723,6 +727,8 @@ static void complete_collecting_data(Flash *s)
>  flash_erase(s, s->cur_addr, s->cmd_in_progress);
>  break;
>  case WRSR:
> +s->status_register_write_disabled = extract32(s->data[0], 7, 1);
> +
>  switch (get_man(s)) {
>  case MAN_SPANSION:
>  s->quad_enable = !!(s->data[1] & 0x02);
> @@ -1165,22 +1171,34 @@ static void decode_new_cmd(Flash *s, uint32_t value)
>  break;
>  
>  case WRSR:
> -if (s->write_enable) {
> -switch (get_man(s)) {
> -case MAN_SPANSION:
> -s->needed_bytes = 2;
> -s->state = STATE_COLLECTING_DATA;
> -break;
> -case MAN_MACRONIX:
> -s->needed_bytes = 2;
> -s->state = STATE_COLLECTING_VAR_LEN_DATA;
> -break;
> -default:
> -s->needed_bytes = 1;
> -s->state = STATE_COLLECTING_DATA;
> -}
> -s->pos = 0;
> +/*
> + * If WP# is low and status_register_write_disabled is high,
> + * status register writes are disabled.
> + * This is also called "hardware protected mode" (HPM). All other
> + * combinations of the two states are called "software protected 
> mode"
> + * (SPM), and status register writes are permitted.
> + */
> +if ((s->wp_level == 0 && s->status_register_write_disabled)
> +|| !s->write_enable) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "M25P80: Status register write is disabled!\n");
> +break;
>  }
> +
> +switch (get_man(s)) {
> +case MAN_SPANSION:
> +s->needed_bytes = 2;
> +s->state = STATE_COLLECTING_DATA;
> +break;
> +case MAN_MACRONIX:
> +s->needed_bytes = 2;
> +s->state = STATE_COLLECTING_VAR_LEN_DATA;
> +break;
> +default:
> +s->needed_bytes = 1;
> +s->state = STATE_COLLECTING_DATA;
> +}
> +s->pos = 0;
>  break;
>  
>  case WRDI:
> @@ -1195,6 +1213,8 @@ static void decode_new_cmd(Flash *s, uint32_t value)
>  
>  case RDSR:
>  s->data[0] = (!!s->write_enable) << 1;
> +s->data[0] |= (!!s->status_register_write_disabled) << 7;
> +
>  if (get_man(s) == MAN_MACRONIX || get_man(s) == MAN_ISSI) {
>  s->data[0] |= (!!s->quad_enable) << 6;
>  }
> @@ -1484,6 +1504,14 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
> uint32_t tx)
>  return r;
>  }
>  
> +static void m25p80_write_protect_pin_irq_handler(void *opaque, int n, int 
> level)
> +{
> +Flash *s = M25P80(opaque);
> +/* WP# is just a single pin. */
> +assert(n == 0);
> +s->wp_level = !!level;
> +}
> +
>  static void m25p80_realize(SSIPeripheral *ss, Error **errp)
>  {
>  Flash *s = M25P80(ss);
> @@ -1515,12 +1543,18 @@ static void m25p80_realize(SSIPeripheral *ss, Error 
> **errp)
>  s->storage = blk_blockalign(NULL, s->size);
>  memset(s->storage, 0xFF, s->size);
>  }
> +
> +qdev_init_gpio_in_named(DEVICE(s),
> +

[PATCH v3 2/2] hw: m25p80: add tests for write protect (WP# and SRWD bit)

2022-06-17 Thread Iris Chen
---
Fixing suggestions to move testing related code to a different commit.

 tests/qtest/aspeed_smc-test.c | 62 +++
 1 file changed, 62 insertions(+)

diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
index ec233315e6..7786addfb8 100644
--- a/tests/qtest/aspeed_smc-test.c
+++ b/tests/qtest/aspeed_smc-test.c
@@ -56,7 +56,9 @@ enum {
 BULK_ERASE = 0xc7,
 READ = 0x03,
 PP = 0x02,
+WRSR = 0x1,
 WREN = 0x6,
+SRWD = 0x80,
 RESET_ENABLE = 0x66,
 RESET_MEMORY = 0x99,
 EN_4BYTE_ADDR = 0xB7,
@@ -390,6 +392,64 @@ static void test_read_status_reg(void)
 flash_reset();
 }
 
+static void test_status_reg_write_protection(void)
+{
+uint8_t r;
+
+spi_conf(CONF_ENABLE_W0);
+
+/* default case: WP# is high and SRWD is low -> status register writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, SRWD);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+/* WP# high and SRWD high -> status register writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, 0);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, 0);
+
+/* WP# low and SRWD low -> status register writable */
+qtest_set_irq_in(global_qtest,
+ "/machine/soc/fmc/ssi.0/child[0]", "WP#", 0, 0);
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, SRWD);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+/* WP# low and SRWD high -> status register NOT writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, 0);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+/* write is not successful */
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+qtest_set_irq_in(global_qtest,
+ "/machine/soc/fmc/ssi.0/child[0]", "WP#", 0, 1);
+flash_reset();
+}
+
 static char tmp_path[] = "/tmp/qtest.m25p80.XX";
 
 int main(int argc, char **argv)
@@ -416,6 +476,8 @@ int main(int argc, char **argv)
 qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem);
 qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
 qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg);
+qtest_add_func("/ast2400/smc/status_reg_write_protection",
+   test_status_reg_write_protection);
 
 ret = g_test_run();
 
-- 
2.30.2




[PATCH v3 1/2] hw: m25p80: add WP# pin and SRWD bit for write protection

2022-06-17 Thread Iris Chen
Signed-off-by: Iris Chen 
---
Thanks everyone for your comments. This is a v3 patch that addresses all 
suggestions (moving write_enable to decode_new_cmd). 
I am waiting on some feedback from Dan's (dz4l...@gmail.com) patch
regarding adding a STATE_STANDBY state. 

Currently, all tests are passing. 
 
 hw/block/m25p80.c | 77 ++-
 1 file changed, 62 insertions(+), 15 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 81ba3da4df..12a59ca57c 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -27,12 +27,14 @@
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
 #include "hw/ssi/ssi.h"
+#include "hw/irq.h"
 #include "migration/vmstate.h"
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "qapi/visitor.h"
 #include "trace.h"
 #include "qom/object.h"
 
@@ -472,11 +474,13 @@ struct Flash {
 uint8_t spansion_cr2v;
 uint8_t spansion_cr3v;
 uint8_t spansion_cr4v;
+bool wp_level;
 bool write_enable;
 bool four_bytes_address_mode;
 bool reset_enable;
 bool quad_enable;
 bool aai_enable;
+bool status_register_write_disabled;
 uint8_t ear;
 
 int64_t dirty_page;
@@ -723,6 +727,8 @@ static void complete_collecting_data(Flash *s)
 flash_erase(s, s->cur_addr, s->cmd_in_progress);
 break;
 case WRSR:
+s->status_register_write_disabled = extract32(s->data[0], 7, 1);
+
 switch (get_man(s)) {
 case MAN_SPANSION:
 s->quad_enable = !!(s->data[1] & 0x02);
@@ -1165,22 +1171,34 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 break;
 
 case WRSR:
-if (s->write_enable) {
-switch (get_man(s)) {
-case MAN_SPANSION:
-s->needed_bytes = 2;
-s->state = STATE_COLLECTING_DATA;
-break;
-case MAN_MACRONIX:
-s->needed_bytes = 2;
-s->state = STATE_COLLECTING_VAR_LEN_DATA;
-break;
-default:
-s->needed_bytes = 1;
-s->state = STATE_COLLECTING_DATA;
-}
-s->pos = 0;
+/*
+ * If WP# is low and status_register_write_disabled is high,
+ * status register writes are disabled.
+ * This is also called "hardware protected mode" (HPM). All other
+ * combinations of the two states are called "software protected mode"
+ * (SPM), and status register writes are permitted.
+ */
+if ((s->wp_level == 0 && s->status_register_write_disabled)
+|| !s->write_enable) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "M25P80: Status register write is disabled!\n");
+break;
 }
+
+switch (get_man(s)) {
+case MAN_SPANSION:
+s->needed_bytes = 2;
+s->state = STATE_COLLECTING_DATA;
+break;
+case MAN_MACRONIX:
+s->needed_bytes = 2;
+s->state = STATE_COLLECTING_VAR_LEN_DATA;
+break;
+default:
+s->needed_bytes = 1;
+s->state = STATE_COLLECTING_DATA;
+}
+s->pos = 0;
 break;
 
 case WRDI:
@@ -1195,6 +1213,8 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 
 case RDSR:
 s->data[0] = (!!s->write_enable) << 1;
+s->data[0] |= (!!s->status_register_write_disabled) << 7;
+
 if (get_man(s) == MAN_MACRONIX || get_man(s) == MAN_ISSI) {
 s->data[0] |= (!!s->quad_enable) << 6;
 }
@@ -1484,6 +1504,14 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
uint32_t tx)
 return r;
 }
 
+static void m25p80_write_protect_pin_irq_handler(void *opaque, int n, int 
level)
+{
+Flash *s = M25P80(opaque);
+/* WP# is just a single pin. */
+assert(n == 0);
+s->wp_level = !!level;
+}
+
 static void m25p80_realize(SSIPeripheral *ss, Error **errp)
 {
 Flash *s = M25P80(ss);
@@ -1515,12 +1543,18 @@ static void m25p80_realize(SSIPeripheral *ss, Error 
**errp)
 s->storage = blk_blockalign(NULL, s->size);
 memset(s->storage, 0xFF, s->size);
 }
+
+qdev_init_gpio_in_named(DEVICE(s),
+m25p80_write_protect_pin_irq_handler, "WP#", 1);
 }
 
 static void m25p80_reset(DeviceState *d)
 {
 Flash *s = M25P80(d);
 
+s->wp_level = true;
+s->status_register_write_disabled = false;
+
 reset_memory(s);
 }
 
@@ -1587,6 +1621,18 @@ static const VMStateDescription 
vmstate_m25p80_aai_enable = {
 }
 };
 
+static const VMStateDescription vmstate_m25p80_write_protect = {
+.name = "m25p80/write_protect",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = false,
+.fields = (VMStateField[]) {
+VMSTATE_BOOL(wp_level, Flash),
+VMSTATE_BOOL(status_register_write_disabled, 

Re: [PATCH v2 00/11] vfio/migration: Implement VFIO migration protocol v2

2022-06-17 Thread Alex Williamson
On Mon, 13 Jun 2022 14:21:26 +0300
Avihai Horon  wrote:

> On 6/8/2022 12:32 AM, Alex Williamson wrote:
> > External email: Use caution opening links or attachments
> >
> >
> > On Tue, 7 Jun 2022 20:44:23 +0300
> > Avihai Horon  wrote:
> >  
> >> On 5/30/2022 8:07 PM, Avihai Horon wrote:  
> >>> Hello,
> >>>
> >>> Following VFIO migration protocol v2 acceptance in kernel, this series
> >>> implements VFIO migration according to the new v2 protocol and replaces
> >>> the now deprecated v1 implementation.
> >>>
> >>> The main differences between v1 and v2 migration protocols are:
> >>> 1. VFIO device state is represented as a finite state machine instead of
> >>>  a bitmap.
> >>>
> >>> 2. The migration interface with kernel is done using VFIO_DEVICE_FEATURE
> >>>  ioctl and normal read() and write() instead of the migration region
> >>>  used in v1.
> >>>
> >>> 3. Migration protocol v2 currently doesn't support the pre-copy phase of
> >>>  migration.
> >>>
> >>> Full description of the v2 protocol and the differences from v1 can be
> >>> found here [1].
> >>>
> >>> Patches 1-3 are prep patches fixing bugs and adding QEMUFile function
> >>> that will be used later.
> >>>
> >>> Patches 4-6 refactor v1 protocol code to make it easier to add v2
> >>> protocol.
> >>>
> >>> Patches 7-11 implement v2 protocol and remove v1 protocol.
> >>>
> >>> Thanks.
> >>>
> >>> [1]
> >>> https://lore.kernel.org/all/20220224142024.147653-10-yish...@nvidia.com/
> >>>
> >>> Changes from v1: 
> >>> https://lore.kernel.org/all/20220512154320.19697-1-avih...@nvidia.com/
> >>> - Split the big patch that replaced v1 with v2 into several patches as
> >>> suggested by Joao, to make review easier.
> >>> - Change warn_report to warn_report_once when container doesn't support
> >>> dirty tracking.
> >>> - Add Reviewed-by tag.
> >>>
> >>> Avihai Horon (11):
> >>> vfio/migration: Fix NULL pointer dereference bug
> >>> vfio/migration: Skip pre-copy if dirty page tracking is not supported
> >>> migration/qemu-file: Add qemu_file_get_to_fd()
> >>> vfio/common: Change vfio_devices_all_running_and_saving() logic to
> >>>   equivalent one
> >>> vfio/migration: Move migration v1 logic to vfio_migration_init()
> >>> vfio/migration: Rename functions/structs related to v1 protocol
> >>> vfio/migration: Implement VFIO migration protocol v2
> >>> vfio/migration: Remove VFIO migration protocol v1
> >>> vfio/migration: Reset device if setting recover state fails
> >>> vfio: Alphabetize migration section of VFIO trace-events file
> >>> docs/devel: Align vfio-migration docs to VFIO migration v2
> >>>
> >>>docs/devel/vfio-migration.rst |  77 ++--
> >>>hw/vfio/common.c  |  21 +-
> >>>hw/vfio/migration.c   | 640 --
> >>>hw/vfio/trace-events  |  25 +-
> >>>include/hw/vfio/vfio-common.h |   8 +-
> >>>migration/migration.c |   5 +
> >>>migration/migration.h |   3 +
> >>>migration/qemu-file.c |  34 ++
> >>>migration/qemu-file.h |   1 +
> >>>9 files changed, 252 insertions(+), 562 deletions(-)
> >>>  
> >> Ping.  
> > Based on the changelog, this seems like a mostly cosmetic spin and I
> > don't see that all of the discussion threads from v1 were resolved to
> > everyone's satisfaction.  I'm certainly still uncomfortable with the
> > pre-copy behavior and I thought there were still some action items to
> > figure out whether an SLA is present and vet the solution with
> > management tools.  Thanks,  
> 
> Yes.
> OK, so let's clear things up and reach an agreement before I prepare the 
> v3 series.
> 
> There are three topics that came up in previous discussion:
> 
>  1. [PATCH v2 01/11] vfio/migration: Fix NULL pointer dereference bug.
> Juan gave his Reviewed-by but he wasn't sure about qemu_file_* usage
> outside migration thread.
> This code existed before and I fixed a NULL pointer dereference that
> I encountered.
> I suggested that later we can refactor VMChangeStateHandler to
> return error.
> I prefer not to do this refactor right now because I am not sure
> it's as straightforward change as it might seem - if some notifier
> fails and we abort do_vm_stop/vm_prepare_start in the middle, can
> this leave the VM in some unstable state?
> We plan to leave it as is and not do the refactor as part of this
> series.
> Are you ok with this?

I'll defer to Juan here, it's not 100% clear to me from the last reply
if he's looking for that sooner than later.  Juan?
 
>  2. [PATCH v2 02/11] vfio/migration: Skip pre-copy if dirty page
> tracking is not supported.
> As previously discussed, this patch doesn't consider the configured
> downtime limit.
> One way to fix it is to allow such migration only when "no SLA" (no
> downtime limit) is set. AFAIK today there is no way that one can set
> "no 

Re: [PATCH v6 6/8] KVM: Handle page fault for private memory

2022-06-17 Thread Sean Christopherson
On Thu, May 19, 2022, Chao Peng wrote:
> @@ -4028,8 +4081,11 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
>   if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
>   return true;
>  
> - return fault->slot &&
> -mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
> + if (fault->is_private)
> + return mmu_notifier_retry(vcpu->kvm, mmu_seq);

Hmm, this is somewhat undesirable, because faulting in private pfns will be 
blocked
by unrelated mmu_notifier updates.  The issue is mitigated to some degree by 
bumping
the sequence count if and only if overlap with a memslot is detected, e.g. 
mapping
changes that affects only userspace won't block the guest.

It probably won't be an issue, but at the same time it's easy to solve, and I 
don't
like piggybacking mmu_notifier_seq as private mappings shouldn't be subject to 
the
mmu_notifier.

That would also fix a theoretical bug in this patch where mmu_notifier_retry()
wouldn't be defined if CONFIG_MEMFILE_NOTIFIER=y && CONFIG_MMU_NOTIFIER=n.a

---
 arch/x86/kvm/mmu/mmu.c   | 11 ++-
 include/linux/kvm_host.h | 16 +++-
 virt/kvm/kvm_main.c  |  2 +-
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0b455c16ec64..a4cbd29433e7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4100,10 +4100,10 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
return true;

if (fault->is_private)
-   return mmu_notifier_retry(vcpu->kvm, mmu_seq);
-   else
-   return fault->slot &&
-   mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
+   return memfile_notifier_retry(vcpu->kvm, mmu_seq);
+
+   return fault->slot &&
+  mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
 }

 static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault 
*fault)
@@ -4127,7 +4127,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, 
struct kvm_page_fault *fault
if (r)
return r;

-   mmu_seq = vcpu->kvm->mmu_notifier_seq;
+   mmu_seq = fault->is_private ? vcpu->kvm->memfile_notifier_seq :
+ vcpu->kvm->mmu_notifier_seq;
smp_rmb();

r = kvm_faultin_pfn(vcpu, fault);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 92afa5bddbc5..31f704c83099 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -773,16 +773,15 @@ struct kvm {
struct hlist_head irq_ack_notifier_list;
 #endif

-#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) ||\
-   defined(CONFIG_MEMFILE_NOTIFIER)
+#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
unsigned long mmu_notifier_seq;
-#endif
-
-#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
struct mmu_notifier mmu_notifier;
long mmu_notifier_count;
unsigned long mmu_notifier_range_start;
unsigned long mmu_notifier_range_end;
+#endif
+#ifdef CONFIG_MEMFILE_NOTIFIER
+   unsigned long memfile_notifier_seq;
 #endif
struct list_head devices;
u64 manual_dirty_log_protect;
@@ -1964,6 +1963,13 @@ static inline int mmu_notifier_retry_hva(struct kvm *kvm,
 }
 #endif

+#ifdef CONFIG_MEMFILE_NOTIFIER
+static inline bool memfile_notifier_retry(struct kvm *kvm, unsigned long 
mmu_seq)
+{
+   return kvm->memfile_notifier_seq != mmu_seq;
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING

 #define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2b416d3bd60e..e6d34c964d51 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -898,7 +898,7 @@ static void kvm_private_mem_notifier_handler(struct 
memfile_notifier *notifier,
KVM_MMU_LOCK(kvm);
if (kvm_unmap_gfn_range(kvm, _range))
kvm_flush_remote_tlbs(kvm);
-   kvm->mmu_notifier_seq++;
+   kvm->memfile_notifier_seq++;
KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(>srcu, idx);
 }

base-commit: 333ef501c7f6c6d4ef2b7678905cad0f8ef3e271
--

> + else
> + return fault->slot &&
> + mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
>  }
>  
>  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault 
> *fault)
> @@ -4088,7 +4144,12 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, 
> struct kvm_page_fault *fault
>   read_unlock(>kvm->mmu_lock);
>   else
>   write_unlock(>kvm->mmu_lock);
> - kvm_release_pfn_clean(fault->pfn);
> +
> + if (fault->is_private)
> + kvm_private_mem_put_pfn(fault->slot, fault->pfn);

Why does the shmem path lock the page, and then unlock it here?

Same question for why this path marks it dirty?  The guest has the 

Re: [PATCH] xen/pass-through: don't create needless register group

2022-06-17 Thread Chuck Zmudzinski

On 6/17/22 9:07 AM, Anthony PERARD wrote:

On Fri, Jun 10, 2022 at 12:23:35PM -0400, Chuck Zmudzinski wrote:

Currently we are creating a register group for the Intel IGD OpRegion
for every device we pass through, but the XEN_PCI_INTEL_OPREGION
register group is only valid for an Intel IGD. Add a check to make
sure the device is an Intel IGD and a check that the administrator has
enabled gfx_passthru in the xl domain configuration. Require both checks
to be true before creating the register group. Use the existing
is_igd_vga_passthrough() function to check for a graphics device from
any vendor and that the administrator enabled gfx_passthru in the xl
domain configuration, but further require that the vendor be Intel,
because only Intel IGD devices have an Intel OpRegion. These are the
same checks hvmloader and libxl do to determine if the Intel OpRegion
needs to be mapped into the guest's memory.

Signed-off-by: Chuck Zmudzinski 
---
  hw/xen/xen_pt_config_init.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index c5c4e943a8..ffd915654c 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -2037,6 +2037,10 @@ void xen_pt_config_init(XenPCIPassthroughState *s, Error 
**errp)
   * therefore the size should be 0xff.
   */

Could you move that comment? I think it would make more sense to comment
the "reg_grp_offset=XEN_PCI_INTEL_OPREGION" line now that the `if` block
also skip setting up the group on non-intel devices.


OK. I just e-mailed interested parties v2 that moves the comment
and mentions that the comment is moved in the commit message.

Best Regards,

Chuck




  if (xen_pt_emu_reg_grps[i].grp_id == XEN_PCI_INTEL_OPREGION) {
+if (!is_igd_vga_passthrough(>real_device) ||
+s->real_device.vendor_id != PCI_VENDOR_ID_INTEL) {
+continue;
+}
  reg_grp_offset = XEN_PCI_INTEL_OPREGION;
  }

Thanks,






Re: [PATCH v6 4/8] KVM: Extend the memslot to support fd-based private memory

2022-06-17 Thread Sean Christopherson
On Fri, Jun 17, 2022, Sean Christopherson wrote:
> > @@ -110,6 +133,7 @@ struct kvm_userspace_memory_region {
> >   */
> >  #define KVM_MEM_LOG_DIRTY_PAGES(1UL << 0)
> >  #define KVM_MEM_READONLY   (1UL << 1)
> > +#define KVM_MEM_PRIVATE(1UL << 2)
> 
> Hmm, KVM_MEM_PRIVATE is technically wrong now that a "private" memslot maps 
> private
> and/or shared memory.  Strictly speaking, we don't actually need a new flag.  
> Valid
> file descriptors must be >=0, so the logic for specifying a memslot that can 
> be
> converted between private and shared could be that "(int)private_fd < 0" means
> "not convertible", i.e. derive the flag from private_fd.
> 
> And looking at the two KVM consumers of the flag, via kvm_slot_is_private(), 
> they're
> both wrong.  Both kvm_faultin_pfn() and kvm_mmu_max_mapping_level() should 
> operate
> on the _fault_, not the slot.  So it would actually be a positive to not have 
> an easy
> way to query if a slot supports conversion.

I take that back, the usage in kvm_faultin_pfn() is correct, but the names ends
up being confusing because it suggests that it always faults in a private pfn.

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b6d75016e48c..e1008f00609d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4045,7 +4045,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct 
kvm_page_fault *fault)
return RET_PF_EMULATE;
}

-   if (fault->is_private) {
+   if (kvm_slot_can_be_private(slot)) {
r = kvm_faultin_pfn_private(vcpu, fault);
if (r != RET_PF_CONTINUE)
return r == RET_PF_FIXED ? RET_PF_CONTINUE : r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 31f704c83099..c5126190fb71 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -583,9 +583,9 @@ struct kvm_memory_slot {
struct kvm *kvm;
 };

-static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot)
+static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
 {
-   return slot && (slot->flags & KVM_MEM_PRIVATE);
+   return slot && !!slot->private_file;
 }

 static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot 
*slot)




Re: [PATCH 03/11] ppc/pnv: use dev->parent_bus->parent to get the PHB

2022-06-17 Thread Daniel Henrique Barboza




On 6/14/22 09:10, Cédric Le Goater wrote:

On 6/13/22 17:44, Daniel Henrique Barboza wrote:

It is not advisable to execute an object_dynamic_cast() to poke into
bus->qbus.parent and follow it up with a C cast into the PnvPHB type we
think we got.

A better way is to access the PnvPHB object via a QOM macro accessing
the existing parent links of the DeviceState. For a given
pnv-phb3/4-root-port 'dev', dev->parent_bus will give us the PHB bus,
and dev->parent_bus->parent is the PHB. Use the adequate QOM macro to
assert the type, and keep the NULL check in case we didn't get the
object we were expecting.

Signed-off-by: Daniel Henrique Barboza 
---
  hw/pci-host/pnv_phb3.c | 10 +++---
  hw/pci-host/pnv_phb4.c | 10 +++---
  2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index 4ba660f8b9..7901d8172c 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -1139,12 +1139,16 @@ static void pnv_phb3_root_port_realize(DeviceState 
*dev, Error **errp)
  {
  PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
  PCIDevice *pci = PCI_DEVICE(dev);
-    PCIBus *bus = pci_get_bus(pci);
  PnvPHB3 *phb = NULL;
  Error *local_err = NULL;
-    phb = (PnvPHB3 *) object_dynamic_cast(OBJECT(bus->qbus.parent),
-  TYPE_PNV_PHB3);
+    /*
+ * dev->parent_bus gives access to the pnv-phb-root bus.
+ * The PnvPHB3 is the owner (parent) of the bus.
+ */
+    if (dev && dev->parent_bus) {
+    phb = PNV_PHB3(dev->parent_bus->parent);
+    }



Couldn't we simply use :

   phb = PNV_PHB3(bus);

?


No. This will give us a reference to a pnv-phb3-root object.


Getting a reference to the PHB by using bus->parent happens in other parts of
code, such as:


hw/pci-host/gpex-acpi.c:crs = 
build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), _range_set,
hw/pci-bridge/pci_expander_bridge.c:main_host = 
PCI_HOST_BRIDGE(pxb_dev_base->parent_bus->parent);


So I believe we're not out of line here.


Thanks,


Daniel




Thanks,

C.


  if (!phb) {
  error_setg(errp,
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index ffd9d8a947..bae9398d86 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1782,12 +1782,16 @@ static void pnv_phb4_root_port_realize(DeviceState 
*dev, Error **errp)
  {
  PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
  PCIDevice *pci = PCI_DEVICE(dev);
-    PCIBus *bus = pci_get_bus(pci);
  PnvPHB4 *phb = NULL;
  Error *local_err = NULL;
-    phb = (PnvPHB4 *) object_dynamic_cast(OBJECT(bus->qbus.parent),
-  TYPE_PNV_PHB4);
+    /*
+ * dev->parent_bus gives access to the pnv-phb-root bus.
+ * The PnvPHB4 is the owner (parent) of the bus.
+ */
+    if (dev && dev->parent_bus) {
+    phb = PNV_PHB4(dev->parent_bus->parent);
+    }
  if (!phb) {
  error_setg(errp, "%s must be connected to pnv-phb4 buses", dev->id);







Re: [PATCH v6 4/8] KVM: Extend the memslot to support fd-based private memory

2022-06-17 Thread Sean Christopherson
On Thu, May 19, 2022, Chao Peng wrote:
> @@ -653,12 +662,12 @@ struct kvm_irq_routing_table {
>  };
>  #endif
>  
> -#ifndef KVM_PRIVATE_MEM_SLOTS
> -#define KVM_PRIVATE_MEM_SLOTS 0
> +#ifndef KVM_INTERNAL_MEM_SLOTS
> +#define KVM_INTERNAL_MEM_SLOTS 0
>  #endif

This rename belongs in a separate patch.

>  #define KVM_MEM_SLOTS_NUM SHRT_MAX
> -#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS)
> +#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS)
>  
>  #ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
>  static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
> @@ -1087,9 +1096,9 @@ enum kvm_mr_change {
>  };
>  
>  int kvm_set_memory_region(struct kvm *kvm,
> -   const struct kvm_userspace_memory_region *mem);
> +   const struct kvm_user_mem_region *mem);
>  int __kvm_set_memory_region(struct kvm *kvm,
> - const struct kvm_userspace_memory_region *mem);
> + const struct kvm_user_mem_region *mem);
>  void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
>  void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
>  int kvm_arch_prepare_memory_region(struct kvm *kvm,
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index e10d131edd80..28cacd3656d4 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -103,6 +103,29 @@ struct kvm_userspace_memory_region {
>   __u64 userspace_addr; /* start of the userspace allocated memory */
>  };
>  
> +struct kvm_userspace_memory_region_ext {
> + struct kvm_userspace_memory_region region;
> + __u64 private_offset;
> + __u32 private_fd;
> + __u32 pad1;
> + __u64 pad2[14];
> +};
> +
> +#ifdef __KERNEL__
> +/* Internal helper, the layout must match above user visible structures */

It's worth explicity calling out which structureso this aliases.  And rather 
than
add a comment about the layout needing to match that, enforce it in code. I
personally wouldn't bother with an expolicit comment about the layout, IMO 
that's
a fairly obvious implication of aliasing.

/*
 * kvm_user_mem_region is a kernel-only alias of kvm_userspace_memory_region_ext
 * that "unpacks" kvm_userspace_memory_region so that KVM can directly access
 * all fields from the top-level "extended" region.
 */


And I think it's in this patch that you missed a conversion to the alias, in the
prototype for check_memory_region_flags() (looks like it gets fixed up later in
the series).

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0f81bf0407be..8765b334477d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1466,7 +1466,7 @@ static void kvm_replace_memslot(struct kvm *kvm,
}
 }

-static int check_memory_region_flags(const struct kvm_userspace_memory_region 
*mem)
+static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
 {
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;

@@ -4514,6 +4514,33 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
return fd;
 }

+#define SANITY_CHECK_MEM_REGION_FIELD(field)   
\
+do {   
\
+   BUILD_BUG_ON(offsetof(struct kvm_user_mem_region, field) != 
\
+offsetof(struct kvm_userspace_memory_region, field));  
\
+   BUILD_BUG_ON(sizeof_field(struct kvm_user_mem_region, field) != 
\
+sizeof_field(struct kvm_userspace_memory_region, field));  
\
+} while (0)
+
+#define SANITY_CHECK_MEM_REGION_EXT_FIELD(field)   
\
+do {   
\
+   BUILD_BUG_ON(offsetof(struct kvm_user_mem_region, field) != 
\
+offsetof(struct kvm_userspace_memory_region_ext, field));  
\
+   BUILD_BUG_ON(sizeof_field(struct kvm_user_mem_region, field) != 
\
+sizeof_field(struct kvm_userspace_memory_region_ext, 
field));  \
+} while (0)
+
+static void kvm_sanity_check_user_mem_region_alias(void)
+{
+   SANITY_CHECK_MEM_REGION_FIELD(slot);
+   SANITY_CHECK_MEM_REGION_FIELD(flags);
+   SANITY_CHECK_MEM_REGION_FIELD(guest_phys_addr);
+   SANITY_CHECK_MEM_REGION_FIELD(memory_size);
+   SANITY_CHECK_MEM_REGION_FIELD(userspace_addr);
+   SANITY_CHECK_MEM_REGION_EXT_FIELD(private_offset);
+   SANITY_CHECK_MEM_REGION_EXT_FIELD(private_fd);
+}
+
 static long kvm_vm_ioctl(struct file *filp,
   unsigned int ioctl, unsigned long arg)
 {
@@ -4541,6 +4568,8 @@ static long kvm_vm_ioctl(struct file *filp,
unsigned long size;
u32 flags;

+   kvm_sanity_check_user_mem_region_alias();
+
memset(, 0, sizeof(mem));

r = -EFAULT;

> +struct 

Re: [PATCH v3 0/2] hw/nvme: Add shadow doorbell buffer support

2022-06-17 Thread Keith Busch
On Thu, Jun 16, 2022 at 08:34:06PM +0800, Jinhao Fan wrote:
> This patch adds shadow doorbell buffer support in NVMe 1.3 to QEMU
> NVMe. The Doorbell Buffer Config admin command is implemented for the
> guest to enable shadow doobell buffer. When this feature is enabled, each
> SQ/CQ is associated with two buffers, i.e., Shadow Doorbell buffer and
> EventIdx buffer. According to the Spec, each queue's doorbell register
> is only updated when the Shadow Doorbell buffer value changes from being
> less than or equal to the value of the corresponding EventIdx buffer
> entry to being greater than that value. Therefore, the number of MMIO's
> on the doorbell registers is greatly reduced.

Looks good to me, and passes my sanity tests.

Reviewed-by: Keith Busch 



Re: [PATCH 03/11] ppc/pnv: use dev->parent_bus->parent to get the PHB

2022-06-17 Thread Daniel Henrique Barboza




On 6/14/22 06:10, Frederic Barrat wrote:



On 13/06/2022 17:44, Daniel Henrique Barboza wrote:

It is not advisable to execute an object_dynamic_cast() to poke into
bus->qbus.parent and follow it up with a C cast into the PnvPHB type we
think we got.

A better way is to access the PnvPHB object via a QOM macro accessing
the existing parent links of the DeviceState. For a given
pnv-phb3/4-root-port 'dev', dev->parent_bus will give us the PHB bus,
and dev->parent_bus->parent is the PHB. Use the adequate QOM macro to
assert the type, and keep the NULL check in case we didn't get the
object we were expecting.

Signed-off-by: Daniel Henrique Barboza 
---
  hw/pci-host/pnv_phb3.c | 10 +++---
  hw/pci-host/pnv_phb4.c | 10 +++---
  2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index 4ba660f8b9..7901d8172c 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -1139,12 +1139,16 @@ static void pnv_phb3_root_port_realize(DeviceState 
*dev, Error **errp)
  {
  PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
  PCIDevice *pci = PCI_DEVICE(dev);
-    PCIBus *bus = pci_get_bus(pci);
  PnvPHB3 *phb = NULL;
  Error *local_err = NULL;
-    phb = (PnvPHB3 *) object_dynamic_cast(OBJECT(bus->qbus.parent),
-  TYPE_PNV_PHB3);
+    /*
+ * dev->parent_bus gives access to the pnv-phb-root bus.
+ * The PnvPHB3 is the owner (parent) of the bus.
+ */
+    if (dev && dev->parent_bus) {
+    phb = PNV_PHB3(dev->parent_bus->parent);
+    }
  if (!phb) {
  error_setg(errp,
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index ffd9d8a947..bae9398d86 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1782,12 +1782,16 @@ static void pnv_phb4_root_port_realize(DeviceState 
*dev, Error **errp)
  {
  PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
  PCIDevice *pci = PCI_DEVICE(dev);
-    PCIBus *bus = pci_get_bus(pci);
  PnvPHB4 *phb = NULL;
  Error *local_err = NULL;
-    phb = (PnvPHB4 *) object_dynamic_cast(OBJECT(bus->qbus.parent),
-  TYPE_PNV_PHB4);
+    /*
+ * dev->parent_bus gives access to the pnv-phb-root bus.
+ * The PnvPHB4 is the owner (parent) of the bus.
+ */
+    if (dev && dev->parent_bus) {



Does it make sense to test 'dev' first when it's the device being realized?


Hmmm not really. I got overzealous here it seems.

I'll keep just the check for dev->parent in v2.


Thanks,


Daniel



   Fred





+    phb = PNV_PHB4(dev->parent_bus->parent);
+    }
  if (!phb) {
  error_setg(errp, "%s must be connected to pnv-phb4 buses", dev->id);






Re: New "IndustryStandard" fw_cfg?

2022-06-17 Thread Dionna Amalie Glaze
I think the option should be boolean since it doesn't look like we're
going to need to tune the number very much.
It all boils down to "does the OS affirmatively support unaccepted
memory?" as in, we have no way to negotiate it, but force unaccepted
memory on.
Ovmf can interpret the existence of an opt/ovmf/unaccepted_memory file
to mean that it's allowed to create unaccepted memory entries in the
memory map.
It's then up to the firmware if it will minimize its use of unaccepted
memory or not. It's not Qemu's place to say.


> >* accept all memory below 4G
> >* accept all memory
> >
> > Possibly we need:
> >
> >* accept all memory below 4G
> >* accept all memory below 4G, plus x GB of high memory.
> >* accept all memory
> >
> > In any case the config option should be designed in a way that we can
> > add a 'automatic' choice later, i.e. we can have ...
> >
> >* automatic (default)
> >* accept all memory below 4G
> >* accept all memory
> >

I think "false" can mean either accept all memory or "do what you need
to" and negotiate if the memory map boot service can create unaccepted
memory entries. Whichever appears supported.
Then "true" can be "do whatever, including creating unaccepted memory
entries in the memory map".

That seems the simplest way to allow a configuration of this feature.

-- 
-Dionna Glaze, PhD (she/her)



Re: [PATCH v2] target/ppc: cpu_init: Clean up stop state on cpu reset

2022-06-17 Thread Daniel Henrique Barboza

Queued in gitlab.com/danielhb/qemu/tree/ppc-next after adding
Cedric's R-b from v1.


Thanks,

Daniel


On 6/17/22 06:52, Frederic Barrat wrote:

The 'resume_as_sreset' attribute of a cpu is set when a thread is
entering a stop state on ppc books. It causes the thread to be
re-routed to vector 0x100 when woken up by an exception. So it must be
cleared on reset or a thread might be re-routed unexpectedly after a
reset, when it was not in a stop state and/or when the appropriate
exception handler isn't set up yet.

Using skiboot, it can be tested by resetting the system when it is
quiet and most threads are idle and in stop state.

After the reset occurs, skiboot elects a primary thread and all the
others wait in secondary_wait. The primary thread does all the system
initialization from main_cpu_entry() and at some point, the
decrementer interrupt starts ticking. The exception vector for the
decrementer interrupt is in place, so that shouldn't be a
problem. However, if that primary thread was in stop state prior to
the reset, and because the resume_as_sreset parameters is still set,
it is re-routed to exception vector 0x100. Which, at that time, is
still defined as the entry point for BML. So that primary thread
restarts as new and ends up being treated like any other secondary
thread. All threads are now waiting in secondary_wait.

It results in a full system hang with no message on the console, as
the uart hasn't been init'ed yet. It's actually not obvious to realise
what's happening if not tracing reset (-d cpu_reset). The fix is
simply to clear the 'resume_as_sreset' attribute on reset.

Signed-off-by: Frederic Barrat 
---
Changelog:
v2: rework commit message


  target/ppc/cpu_init.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 0f891afa04..c16cb8dbe7 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7186,6 +7186,9 @@ static void ppc_cpu_reset(DeviceState *dev)
  }
  pmu_update_summaries(env);
  }
+
+/* clean any pending stop state */
+env->resume_as_sreset = 0;
  #endif
  hreg_compute_hflags(env);
  env->reserve_addr = (target_ulong)-1ULL;




[PATCH v2] xen/pass-through: don't create needless register group

2022-06-17 Thread Chuck Zmudzinski
Currently we are creating a register group for the Intel IGD OpRegion
for every device we pass through, but the XEN_PCI_INTEL_OPREGION
register group is only valid for an Intel IGD. Add a check to make
sure the device is an Intel IGD and a check that the administrator has
enabled gfx_passthru in the xl domain configuration. Require both checks
to be true before creating the register group. Use the existing
is_igd_vga_passthrough() function to check for a graphics device from
any vendor and that the administrator enabled gfx_passthru in the xl
domain configuration, but further require that the vendor be Intel,
because only Intel IGD devices have an Intel OpRegion. These are the
same checks hvmloader and libxl do to determine if the Intel OpRegion
needs to be mapped into the guest's memory. Also, move the comment
about trapping 0xfc for the Intel OpRegion where it belongs after
applying this patch.

Signed-off-by: Chuck Zmudzinski 
---
v2: * Move the comment to an appropriate place after applying this patch
* Mention that the comment is moved in the commit message

v2 addresses the comment by Anthony Perard on the original
version of this patch.

 hw/xen/xen_pt_config_init.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index c5c4e943a8..cad4aeba84 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -2031,12 +2031,16 @@ void xen_pt_config_init(XenPCIPassthroughState *s, 
Error **errp)
 }
 }
 
-/*
- * By default we will trap up to 0x40 in the cfg space.
- * If an intel device is pass through we need to trap 0xfc,
- * therefore the size should be 0xff.
- */
 if (xen_pt_emu_reg_grps[i].grp_id == XEN_PCI_INTEL_OPREGION) {
+if (!is_igd_vga_passthrough(>real_device) ||
+s->real_device.vendor_id != PCI_VENDOR_ID_INTEL) {
+continue;
+}
+/*
+ * By default we will trap up to 0x40 in the cfg space.
+ * If an intel device is pass through we need to trap 0xfc,
+ * therefore the size should be 0xff.
+ */
 reg_grp_offset = XEN_PCI_INTEL_OPREGION;
 }
 
-- 
2.36.1




Re: A new qemu acpi test with bios bits

2022-06-17 Thread Ani Sinha
oops, Phil's redhat email bounced!

On Fri, Jun 17, 2022 at 10:41 PM Ani Sinha  wrote:
>
> Hi :
> I am trying to write a new ACPI/smbios test framework that uses bios
> bits (https://biosbits.org/ ).
> This test will effectively:
> (a) use a pre-built bits software zip file kept somewhere (where to
> keep is TBD).
> (b) unzip it.
> (c) add a python based acpi and smbios test script to the unzipped
> bits directory.
> (d) make an iso out of the directory.
> (e) spawn a qemu instance with the iso. The iso is built such that it
> runs the acpi/smbios tests in batch mode. After running the test, the
> spawned instance is killed automatically (vm shutdown).
> (f) Collect the logs the vm has sent out.
> (g) look for test failures.
>
> My question is, what framework would be best to achieve the above? I
> looked around and it seems closest to the avocado based integration
> tests. I do not think it is qtest and qtest backend is not required
> here.
>
> Suggestions welcome.
> Thanks
> ani



A new qemu acpi test with bios bits

2022-06-17 Thread Ani Sinha
Hi :
I am trying to write a new ACPI/smbios test framework that uses bios
bits (https://biosbits.org/ ).
This test will effectively:
(a) use a pre-built bits software zip file kept somewhere (where to
keep is TBD).
(b) unzip it.
(c) add a python based acpi and smbios test script to the unzipped
bits directory.
(d) make an iso out of the directory.
(e) spawn a qemu instance with the iso. The iso is built such that it
runs the acpi/smbios tests in batch mode. After running the test, the
spawned instance is killed automatically (vm shutdown).
(f) Collect the logs the vm has sent out.
(g) look for test failures.

My question is, what framework would be best to achieve the above? I
looked around and it seems closest to the avocado based integration
tests. I do not think it is qtest and qtest backend is not required
here.

Suggestions welcome.
Thanks
ani



Re: [PATCH qemu v2 0/2] ppc/spapr: Implement H_WATCHDOG

2022-06-17 Thread Daniel Henrique Barboza




On 6/17/22 03:07, Alexey Kardashevskiy wrote:

This implements H_WATCHDOG. More detailed comments are in the patches.

This is based on sha1
96c343cc774b Joel Stanley "linux-user: Add PowerPC ISA 3.1 and MMA to hwcap".

Please comment. Thanks.


This version worked with the kernel side patches you mentioned in patch 2/2,
thanks. Also tested migrating the guest with the WDT active and the guest
rebooted in the destination.


Tested-by: Daniel Henrique Barboza 






Alexey Kardashevskiy (2):
   ppc: Define SETFIELD for the ppc target
   ppc/spapr: Implement H_WATCHDOG

  include/hw/pci-host/pnv_phb3_regs.h |  16 --
  include/hw/ppc/spapr.h  |  29 +++-
  target/ppc/cpu.h|   5 +
  hw/intc/pnv_xive.c  |  20 ---
  hw/intc/pnv_xive2.c |  20 ---
  hw/pci-host/pnv_phb4.c  |  16 --
  hw/ppc/spapr.c  |   4 +
  hw/watchdog/spapr_watchdog.c| 248 
  hw/watchdog/meson.build |   1 +
  hw/watchdog/trace-events|   7 +
  10 files changed, 293 insertions(+), 73 deletions(-)
  create mode 100644 hw/watchdog/spapr_watchdog.c





Re: [PATCH qemu v2 1/2] ppc: Define SETFIELD for the ppc target

2022-06-17 Thread Daniel Henrique Barboza




On 6/17/22 03:07, Alexey Kardashevskiy wrote:

It keeps repeating, move it to the header. This uses __builtin_ctzl() to
allow using the macros in #define.

Signed-off-by: Alexey Kardashevskiy 
---


Reviewed-by: Daniel Henrique Barboza 



  include/hw/pci-host/pnv_phb3_regs.h | 16 
  target/ppc/cpu.h|  5 +
  hw/intc/pnv_xive.c  | 20 
  hw/intc/pnv_xive2.c | 20 
  hw/pci-host/pnv_phb4.c  | 16 
  5 files changed, 5 insertions(+), 72 deletions(-)

diff --git a/include/hw/pci-host/pnv_phb3_regs.h 
b/include/hw/pci-host/pnv_phb3_regs.h
index a174ef1f7045..38f8ce9d7406 100644
--- a/include/hw/pci-host/pnv_phb3_regs.h
+++ b/include/hw/pci-host/pnv_phb3_regs.h
@@ -12,22 +12,6 @@
  
  #include "qemu/host-utils.h"
  
-/*

- * QEMU version of the GETFIELD/SETFIELD macros
- *
- * These are common with the PnvXive model.
- */
-static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
-{
-return (word & mask) >> ctz64(mask);
-}
-
-static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
-uint64_t value)
-{
-return (word & ~mask) | ((value << ctz64(mask)) & mask);
-}
-
  /*
   * PBCQ XSCOM registers
   */
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 6d78078f379d..9a1f1ea3 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -47,6 +47,11 @@
   PPC_BIT32(bs))
  #define PPC_BITMASK8(bs, be)((PPC_BIT8(bs) - PPC_BIT8(be)) | PPC_BIT8(bs))
  
+#define GETFIELD(mask, word)   \

+(((word) & (mask)) >> __builtin_ctzl(mask))
+#define SETFIELD(mask, word, val)   \
+(((word) & ~(mask)) | (((uint64_t)(val) << __builtin_ctzl(mask)) & (mask)))
+
  
/*/
  /* Exception vectors definitions 
*/
  enum {
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index 1ce1d7b07d63..c7b75ed12ee0 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -66,26 +66,6 @@ static const XiveVstInfo vst_infos[] = {
  qemu_log_mask(LOG_GUEST_ERROR, "XIVE[%x] - " fmt "\n",  \
(xive)->chip->chip_id, ## __VA_ARGS__);
  
-/*

- * QEMU version of the GETFIELD/SETFIELD macros
- *
- * TODO: It might be better to use the existing extract64() and
- * deposit64() but this means that all the register definitions will
- * change and become incompatible with the ones found in skiboot.
- *
- * Keep it as it is for now until we find a common ground.
- */
-static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
-{
-return (word & mask) >> ctz64(mask);
-}
-
-static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
-uint64_t value)
-{
-return (word & ~mask) | ((value << ctz64(mask)) & mask);
-}
-
  /*
   * When PC_TCTXT_CHIPID_OVERRIDE is configured, the PC_TCTXT_CHIPID
   * field overrides the hardwired chip ID in the Powerbus operations
diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index a39e070e82d2..3fe349749384 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c
@@ -75,26 +75,6 @@ static const XiveVstInfo vst_infos[] = {
  qemu_log_mask(LOG_GUEST_ERROR, "XIVE[%x] - " fmt "\n",  \
(xive)->chip->chip_id, ## __VA_ARGS__);
  
-/*

- * QEMU version of the GETFIELD/SETFIELD macros
- *
- * TODO: It might be better to use the existing extract64() and
- * deposit64() but this means that all the register definitions will
- * change and become incompatible with the ones found in skiboot.
- *
- * Keep it as it is for now until we find a common ground.
- */
-static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
-{
-return (word & mask) >> ctz64(mask);
-}
-
-static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
-uint64_t value)
-{
-return (word & ~mask) | ((value << ctz64(mask)) & mask);
-}
-
  /*
   * TODO: Document block id override
   */
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 13ba9e45d8b6..0913e7c8f015 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -31,22 +31,6 @@
  qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n",\
(pec)->chip_id, (pec)->index, ## __VA_ARGS__)
  
-/*

- * QEMU version of the GETFIELD/SETFIELD macros
- *
- * These are common with the PnvXive model.
- */
-static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
-{
-return (word & mask) >> ctz64(mask);
-}
-
-static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
-uint64_t value)
-{
-return (word & ~mask) | ((value << ctz64(mask)) & mask);
-}
-
  static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
  {
  PCIHostState *pci = PCI_HOST_BRIDGE(phb);




Re: [PATCH qemu v2 2/2] ppc/spapr: Implement H_WATCHDOG

2022-06-17 Thread Daniel Henrique Barboza




On 6/17/22 03:07, Alexey Kardashevskiy wrote:

The new PAPR 2.12 defines a watchdog facility managed via the new
H_WATCHDOG hypercall.

This adds H_WATCHDOG support which a proposed driver for pseries uses:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120

This was tested by running QEMU with a debug kernel and command line:
-append \
  "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"

and running "echo V > /dev/watchdog0" inside the VM.

Signed-off-by: Alexey Kardashevskiy 
---


LGTM. The watchdogs can be found under /machines/wdtN:

(qemu) info qom-tree /machine/wdt1
/wdt1 (spapr-wdt)
(qemu) info qom-tree /machine/wdt2
/wdt2 (spapr-wdt)
(qemu) info qom-tree /machine/wdt3
/wdt3 (spapr-wdt)
(qemu) info qom-tree /machine/wdt4
/wdt4 (spapr-wdt)


Reviewed-by: Daniel Henrique Barboza 


Changes:
v2:
* QOM'ed timers, "action" and "expire" are available via QMP
* removed @timeout from SpaprWatchdog
* moved the driver to hw/watchdog
* fixed error handling in the hcall handler
* used new SETFIELD/GETFIELD
---
  include/hw/ppc/spapr.h   |  29 +++-
  hw/ppc/spapr.c   |   4 +
  hw/watchdog/spapr_watchdog.c | 248 +++
  hw/watchdog/meson.build  |   1 +
  hw/watchdog/trace-events |   7 +
  5 files changed, 288 insertions(+), 1 deletion(-)
  create mode 100644 hw/watchdog/spapr_watchdog.c

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 072dda2c7265..ef1e38abd5c7 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -164,6 +164,25 @@ struct SpaprMachineClass {
  SpaprIrq *irq;
  };
  
+#define WDT_MAX_WATCHDOGS   4  /* Maximum number of watchdog devices */

+
+#define WDT_HARD_POWER_OFF  0
+#define WDT_HARD_RESTART1
+#define WDT_DUMP_RESTART2
+
+#define TYPE_SPAPR_WDT "spapr-wdt"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprWatchdog, SPAPR_WDT)
+
+typedef struct SpaprWatchdog {
+/*< private >*/
+DeviceState parent_obj;
+/*< public >*/
+
+unsigned num;
+QEMUTimer timer;
+uint8_t action;
+} SpaprWatchdog;
+
  /**
   * SpaprMachineState:
   */
@@ -264,6 +283,8 @@ struct SpaprMachineState {
  uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
  
  Error *fwnmi_migration_blocker;

+
+SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
  };
  
  #define H_SUCCESS 0

@@ -344,6 +365,7 @@ struct SpaprMachineState {
  #define H_P7  -60
  #define H_P8  -61
  #define H_P9  -62
+#define H_NOOP-63
  #define H_UNSUPPORTED -67
  #define H_OVERLAP -68
  #define H_UNSUPPORTED_FLAG -256
@@ -564,8 +586,9 @@ struct SpaprMachineState {
  #define H_SCM_HEALTH0x400
  #define H_RPT_INVALIDATE0x448
  #define H_SCM_FLUSH 0x44C
+#define H_WATCHDOG  0x45C
  
-#define MAX_HCALL_OPCODEH_SCM_FLUSH

+#define MAX_HCALL_OPCODEH_WATCHDOG
  
  /* The hcalls above are standardized in PAPR and implemented by pHyp

   * as well.
@@ -1027,6 +1050,7 @@ extern const VMStateDescription 
vmstate_spapr_cap_large_decr;
  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
  extern const VMStateDescription vmstate_spapr_cap_fwnmi;
  extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
+extern const VMStateDescription vmstate_spapr_wdt;
  
  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)

  {
@@ -1063,4 +1087,7 @@ target_ulong 
spapr_vof_client_architecture_support(MachineState *ms,
 target_ulong ovec_addr);
  void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
  
+/* H_WATCHDOG */

+void spapr_watchdog_init(SpaprMachineState *spapr);
+
  #endif /* HW_SPAPR_H */
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index fd4942e8813c..9a5382d5270f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -898,6 +898,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void 
*fdt)
  add_str(hypertas, "hcall-hpt-resize");
  }
  
+add_str(hypertas, "hcall-watchdog");

+
  _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
   hypertas->str, hypertas->len));
  g_string_free(hypertas, TRUE);
@@ -3051,6 +3053,8 @@ static void spapr_machine_init(MachineState *machine)
  spapr->vof->fw_size = fw_size; /* for claim() on itself */
  spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
  }
+
+spapr_watchdog_init(spapr);
  }
  
  #define DEFAULT_KVM_TYPE "auto"

diff --git a/hw/watchdog/spapr_watchdog.c b/hw/watchdog/spapr_watchdog.c
new file mode 100644
index ..aeaf7c52cbad
--- /dev/null
+++ b/hw/watchdog/spapr_watchdog.c
@@ -0,0 +1,248 @@
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the 

Re: [PATCH v5 4/5] i386/pc: relocate 4g start to 1T where applicable

2022-06-17 Thread Joao Martins
On 6/17/22 13:18, Joao Martins wrote:
> On 6/16/22 15:23, Igor Mammedov wrote:
>> On Fri, 20 May 2022 11:45:31 +0100
>> Joao Martins  wrote:
>>> +}
>>> +
>>> +if (pcmc->has_reserved_memory &&
>>> +   (machine->ram_size < machine->maxram_size)) {
>>> +device_mem_size = machine->maxram_size - machine->ram_size;
>>> +}
>>> +
>>> +base = ROUND_UP(above_4g_mem_start + x86ms->above_4g_mem_size +
>>> +pcms->sgx_epc.size, 1 * GiB);
>>> +
>>> +return base + device_mem_size + pci_hole64_size;
>>
>> it's not guarantied that pci64 hole starts right away device_mem,
>> but you are not 1st doing this assumption in code, maybe instead of
>> all above use existing 
>>pc_pci_hole64_start() + pci_hole64_size
>> to gestimate max address 
>>
> I've switched the block above to that instead.
> 

I had done this, albeit on a second look (and confirmed with testing) this
will crash, provided @device_memory isn't yet initialized. And even without
hotplug, CXL might have had issues.

The problem is largely that pc_pci_hole64_start() that the above check relies
on info we only populate later on in pc_memory_init(), and I don't think I can
move this done to a later point as definitely don't want to re-initialize
MRs or anything.

So we might be left with manually calculating as I was doing in this patch
but maybe try to arrange some form of new helper that has somewhat shared
logic with pc_pci_hole64_start().

  1114  uint64_t pc_pci_hole64_start(void)
  1115  {
  1116  PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
  1117  PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
  1118  MachineState *ms = MACHINE(pcms);
  1119  X86MachineState *x86ms = X86_MACHINE(pcms);
  1120  uint64_t hole64_start = 0;
  1121
  1122  if (pcms->cxl_devices_state.host_mr.addr) {
  1123  hole64_start = pcms->cxl_devices_state.host_mr.addr +
  1124  memory_region_size(>cxl_devices_state.host_mr);
  1125  if (pcms->cxl_devices_state.fixed_windows) {
  1126  GList *it;
  1127  for (it = pcms->cxl_devices_state.fixed_windows; it; it = 
it->next) {
  1128  CXLFixedWindow *fw = it->data;
  1129  hole64_start = fw->mr.addr + 
memory_region_size(>mr);
  1130  }
  1131  }
* 1132  } else if (pcmc->has_reserved_memory && ms->device_memory->base) {
  1133  hole64_start = ms->device_memory->base;
  1134  if (!pcmc->broken_reserved_end) {
  1135  hole64_start += memory_region_size(>device_memory->mr);
  1136  }
  1137  } else if (pcms->sgx_epc.size != 0) {
  1138  hole64_start = sgx_epc_above_4g_end(>sgx_epc);
  1139  } else {
  1140  hole64_start = x86ms->above_4g_mem_start + 
x86ms->above_4g_mem_size;
  1141  }




Re: [PATCH v2 00/10] Improve reliability of VM tests

2022-06-17 Thread John Snow
On Thu, Jun 16, 2022 at 10:27 AM John Snow  wrote:
>
> This patch series attempts to improve the reliability of several of the
> VM test targets. In particular, both CentOS 8 tests are non-functional
> because CentOS 8 was EOL at the beginning of this calendar year, with
> repositories and mirrors going offline.
>
> Notably, I also remove the ubuntu.i386 test because we no longer support
> Ubuntu 18.04 nor do we have explicit need of an i386 build test.
>
> After this series, I am able to successfully run every VM target on an
> x86_64 host, except:
>
> - ubuntu.aarch64: Hangs often during testing, see below.
> - centos.aarch64: Hangs often during testing, see below.

I've since gotten a clean run on centos.aarch64 for the first time in
a long time by upping the memory allocation to be 1G per job; so my 12
thread, 6 core intel laptop used 6GB to complete this test
successfully. With that not having a hard blocker on it anymore, I'll
work on upgrading the ubuntu container.

> - haiku.x86_64: Build failures not addressed by this series, see
>   https://lists.gnu.org/archive/html/qemu-devel/2022-06/msg02103.html
>
> The unit tests that I see fail most often are:
>
> - qom-test: ENOMEM with default config and many cores
> - test-hmp: ENOMEM with default config and many cores
> - virtio-net-failover: Seems to like to hang on openbsd
> - migration-test: Tends to hang under aarch64 tcg
>
> Increasing the default memory (patch 10) has seemingly helped both
> reliability *and* reduced race conditions quite a bit, both on my
> 12-thread intel laptop and on my 32-thread AMD 5950x desktop.
>
> Due to how long it takes to run these tests, though, I can't claim
> perfect reliability. The flakiness of virtio-net-failover and
> migration-test is something that probably still needs to be addressed,
> but it's outside of my expertise and time budget at present to pursue
> it.
>
> Future work (next version? next series?);
>
> - Try to get centos.aarch64 working reliably under TCG
> - Upgrade ubuntu.aarch64 to 20.04 after fixing centos.aarch64
> - Fix the Haiku build test, if possible.
> - Ensure I can reliably run and pass "make vm-build-all".
>   (Remove VMs from this recipe if necessary.)
>
> John Snow (10):
>   tests/qemu-iotests: hotfix for 307, 223 output
>   tests/qemu-iotests: skip 108 when FUSE is not loaded
>   qga: treat get-guest-fsinfo as "best effort"
>   tests/vm: use 'cp' instead of 'ln' for temporary vm images
>   tests/vm: switch CentOS 8 to CentOS 8 Stream
>   tests/vm: switch centos.aarch64 to CentOS 8 Stream
>   tests/vm: update sha256sum for ubuntu.aarch64
>   tests/vm: remove ubuntu.i386 VM test
>   tests/vm: remove duplicate 'centos' VM test
>   tests/vm: add 512MB extra memory per core
>
>  qga/commands-posix.c   |   8 +-
>  tests/qemu-iotests/108 |   5 ++
>  tests/qemu-iotests/223.out |   4 +-
>  tests/qemu-iotests/307.out |   4 +-
>  tests/vm/Makefile.include  |   5 +-
>  tests/vm/basevm.py |   6 ++
>  tests/vm/centos|   8 +-
>  tests/vm/centos.aarch64| 174 +
>  tests/vm/ubuntu.aarch64|   8 +-
>  tests/vm/ubuntu.i386   |  40 -
>  10 files changed, 58 insertions(+), 204 deletions(-)
>  delete mode 100755 tests/vm/ubuntu.i386
>
> --
> 2.34.3
>
>




Re: [PATCH] hmat acpi: Don't require initiator value in -numa when hmat=on

2022-06-17 Thread Brice Goglin

Hello

Could somebody please apply (or reject) this commit?

Thanks

Brice


Le 06/04/2022 à 14:29, Brice Goglin a écrit :

From: Brice Goglin

The "Memory Proximity Domain Attributes" structure of the ACPI HMAT
has a "Processor Proximity Domain Valid" flag that is currently
always set because Qemu -numa requires initiator=X when hmat=on.

Unsetting this flag allows to create more complex memory topologies
by having multiple best initiators for a single memory target.

This patch allows -numa with initiator=X when hmat=on by keeping
the default value MAX_NODES in numa_state->nodes[i].initiator.
All places reading numa_state->nodes[i].initiator already check
whether it's different from MAX_NODES before using it. And
hmat_build_table_structs() already unset the Valid flag when needed.

Tested with
qemu-system-x86_64 -accel kvm \
  -machine pc,hmat=on \
  -drive if=pflash,format=raw,file=./OVMF.fd \
  -drive media=disk,format=qcow2,file=efi.qcow2 \
  -smp 4 \
  -m 3G \
  -object memory-backend-ram,size=1G,id=ram0 \
  -object memory-backend-ram,size=1G,id=ram1 \
  -object memory-backend-ram,size=1G,id=ram2 \
  -numa node,nodeid=0,memdev=ram0,cpus=0-1 \
  -numa node,nodeid=1,memdev=ram1,cpus=2-3 \
  -numa node,nodeid=2,memdev=ram2 \
  -numa 
hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=10
 \
  -numa 
hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=10485760
 \
  -numa 
hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=20
 \
  -numa 
hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=5242880
 \
  -numa 
hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,latency=30
 \
  -numa 
hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=1048576
 \
  -numa 
hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,latency=20
 \
  -numa 
hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=5242880
 \
  -numa 
hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,latency=10
 \
  -numa 
hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=10485760
 \
  -numa 
hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,latency=30
 \
  -numa 
hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,bandwidth=1048576
 \

This exposes NUMA node2 at same distance from both node0 and node1 as seen in 
lstopo:

Machine (2966MB total) + Package P#0
   NUMANode P#2 (979MB)
   Group0
 NUMANode P#0 (980MB)
 Core P#0 + PU P#0
 Core P#1 + PU P#1
   Group0
 NUMANode P#1 (1007MB)
 Core P#2 + PU P#2
 Core P#3 + PU P#3

Signed-off-by: Brice Goglin
---
  hw/core/machine.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index d856485cb4..9884ef7ac6 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1012,9 +1012,7 @@ static void numa_validate_initiator(NumaState *numa_state)
  
  for (i = 0; i < numa_state->num_nodes; i++) {

  if (numa_info[i].initiator == MAX_NODES) {
-error_report("The initiator of NUMA node %d is missing, use "
- "'-numa node,initiator' option to declare it", i);
-exit(1);
+continue;
  }
  
  if (!numa_info[numa_info[i].initiator].present) {

--
2.30.2






OpenPGP_signature
Description: OpenPGP digital signature


Re: [RFC PATCH v2 3/8] qapi: golang: Generate qapi's struct types in Go

2022-06-17 Thread Victor Toso
Hi,

On Fri, Jun 17, 2022 at 03:41:10PM +0100, Daniel P. Berrangé wrote:
> On Fri, Jun 17, 2022 at 02:19:27PM +0200, Victor Toso wrote:
> > This patch handles QAPI struct types and generates the equivalent
> > types in Go.
> > 
> > At the time of this writing, it generates 388 structures.
> > 
> > The highlights of this implementation are:
> > 
> > 1. Generating an Go struct that requires a @base type, the @base type
> >fields are copied over to the Go struct. The advantage of this
> >approach is to not have embed structs in any of the QAPI types.
> >The downside are some generated Types that are likely useless now,
> >like InetSocketAddressBase from InetSocketAddress.
> > 
> > 2. About the Go struct's fields:
> > 
> >   i) They can be either by Value or Reference.
> > 
> >   ii) Every field that is marked as optional in the QAPI specification
> >   are translated to Reference fields in its Go structure. This design
> >   decision is the most straightforward way to check if a given field
> >   was set or not.
> > 
> >   iii) Mandatory fields are always by Value with the exception of QAPI
> >   arrays, which are handled by Reference (to a block of memory) by Go.
> > 
> >   iv) All the fields are named with Uppercase due Golang's export
> >   convention.
> > 
> >   v) In order to avoid any kind of issues when encoding ordecoding, to
> >   or from JSON, we mark all fields with its @name and, when it is
> >   optional, member, with @omitempty
> > 
> > Example:
> > 
> > qapi:
> >   | { 'struct': 'BlockdevCreateOptionsFile',
> >   |   'data': { 'filename': 'str',
> >   | 'size': 'size',
> >   | '*preallocation':   'PreallocMode',
> >   | '*nocow':   'bool',
> >   | '*extent-size-hint':'size'} }
> > 
> > go:
> >   | type BlockdevCreateOptionsFile struct {
> >   | Filename   string`json:"filename"`
> >   | Size   uint64`json:"size"`
> >   | Preallocation  *PreallocMode `json:"preallocation,omitempty"`
> >   | Nocow  *bool `json:"nocow,omitempty"`
> >   | ExtentSizeHint *uint64   `json:"extent-size-hint,omitempty"`
> >   | }
> 
> One thing to bear in mind here
> 
> At the QAPI level, changing a field from mandatory to optional has
> been considered a backwards compatible change by QEMU maintainers,
> because any existing caller can happily continue passing the
> optional field with no downside.
> 
> With this Go design, changing a field from mandatory to optional
> will be an API breakage, because the developer will need to change
> from passing a literal value, to a pointer to the value, when
> initializing the struct.
> 
> IOW, this Go impl provides weaker compat guarantees than even
> QAPI does, and QAPI compat guarantees were already weaker than
> I would like as an app developer.

I think the current draft should be considered an interface that
can work with the QEMU version this was generated from. That is
the first thing we should get right.

> If we want to make ourselves future proof, we would have to
> make all struct fields optional from the start, even if they
> are mandatory at QAPI level. This would make the code less
> self-documenting though, so that's not very appealing either.
 
> If we want to avoid this, we would need the same approach I
> suggested wrt support multiple versions of the API
> concurrently. Namely have versioned structs, so every time
> there's a field change of any kind, we introduce a new struct
> version.

That's more or less what I had in mind. I mentioned it in the
item 8 of the cover-letter. I just did not want to address it at
before deciding what the structs should look like first, for the
version we are generating from.

Just to clarify, so far I plan to follow the suggestion:
https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg02147.html

Of course, If there are other ideas, we can discuss it too.

Cheers,
Victor


signature.asc
Description: PGP signature


Re: [PATCH v2] target/ppc: cpu_init: Clean up stop state on cpu reset

2022-06-17 Thread Fabiano Rosas
Frederic Barrat  writes:

> The 'resume_as_sreset' attribute of a cpu is set when a thread is
> entering a stop state on ppc books. It causes the thread to be
> re-routed to vector 0x100 when woken up by an exception. So it must be
> cleared on reset or a thread might be re-routed unexpectedly after a
> reset, when it was not in a stop state and/or when the appropriate
> exception handler isn't set up yet.
>
> Using skiboot, it can be tested by resetting the system when it is
> quiet and most threads are idle and in stop state.
>
> After the reset occurs, skiboot elects a primary thread and all the
> others wait in secondary_wait. The primary thread does all the system
> initialization from main_cpu_entry() and at some point, the
> decrementer interrupt starts ticking. The exception vector for the
> decrementer interrupt is in place, so that shouldn't be a
> problem. However, if that primary thread was in stop state prior to
> the reset, and because the resume_as_sreset parameters is still set,
> it is re-routed to exception vector 0x100. Which, at that time, is
> still defined as the entry point for BML. So that primary thread
> restarts as new and ends up being treated like any other secondary
> thread. All threads are now waiting in secondary_wait.
>
> It results in a full system hang with no message on the console, as
> the uart hasn't been init'ed yet. It's actually not obvious to realise
> what's happening if not tracing reset (-d cpu_reset). The fix is
> simply to clear the 'resume_as_sreset' attribute on reset.
>
> Signed-off-by: Frederic Barrat 

Reviewed-by: Fabiano Rosas 



[PATCH 2/5] cpus-common: Add run_on_cpu2()

2022-06-17 Thread Peter Xu
This version of run_on_cpu() allows to take an Error** to detect errors.

Signed-off-by: Peter Xu 
---
 cpus-common.c | 27 +++
 include/hw/core/cpu.h | 26 ++
 softmmu/cpus.c|  6 ++
 3 files changed, 59 insertions(+)

diff --git a/cpus-common.c b/cpus-common.c
index 1db788..1d67c0c655 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -167,6 +167,33 @@ void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, 
run_on_cpu_data data,
 }
 }
 
+void do_run_on_cpu2(CPUState *cpu, run_on_cpu_func2 func2, run_on_cpu_data 
data,
+QemuMutex *mutex, Error **errp)
+{
+struct qemu_work_item wi;
+
+if (qemu_cpu_is_self(cpu)) {
+func2(cpu, data, errp);
+return;
+}
+
+wi.func2 = func2;
+wi.data = data;
+wi.done = false;
+wi.free = false;
+wi.exclusive = false;
+wi.has_errp = true;
+wi.errp = errp;
+
+queue_work_on_cpu(cpu, );
+while (!qatomic_mb_read()) {
+CPUState *self_cpu = current_cpu;
+
+qemu_cond_wait(_work_cond, mutex);
+current_cpu = self_cpu;
+}
+}
+
 void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data 
data)
 {
 struct qemu_work_item *wi;
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 7a303576d0..4bb40a03cf 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -709,6 +709,19 @@ bool cpu_is_stopped(CPUState *cpu);
 void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
QemuMutex *mutex);
 
+/**
+ * do_run_on_cpu2:
+ * @cpu: The vCPU to run on.
+ * @func2: The function to be executed.
+ * @data: Data to pass to the function.
+ * @mutex: Mutex to release while waiting for @func2 to run.
+ * @errp: The Error** pointer to be passed into @func2.
+ *
+ * Used internally in the implementation of run_on_cpu2.
+ */
+void do_run_on_cpu2(CPUState *cpu, run_on_cpu_func2 func2, run_on_cpu_data 
data,
+QemuMutex *mutex, Error **errp);
+
 /**
  * run_on_cpu:
  * @cpu: The vCPU to run on.
@@ -719,6 +732,19 @@ void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, 
run_on_cpu_data data,
  */
 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
 
+/**
+ * run_on_cpu2:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ * @errp: The Error** pointer to be passed into @func2.
+ *
+ * Schedules the function @func2 for execution on the vCPU @cpu, capture
+ * any error and put it into *@errp when provided.
+ */
+void run_on_cpu2(CPUState *cpu, run_on_cpu_func2 func2, run_on_cpu_data data,
+ Error **errp);
+
 /**
  * async_run_on_cpu:
  * @cpu: The vCPU to run on.
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 23b30484b2..898363a1d0 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -391,6 +391,12 @@ void run_on_cpu(CPUState *cpu, run_on_cpu_func func, 
run_on_cpu_data data)
 do_run_on_cpu(cpu, func, data, _global_mutex);
 }
 
+void run_on_cpu2(CPUState *cpu, run_on_cpu_func2 func2, run_on_cpu_data data,
+ Error **errp)
+{
+do_run_on_cpu2(cpu, func2, data, _global_mutex, errp);
+}
+
 static void qemu_cpu_stop(CPUState *cpu, bool exit)
 {
 g_assert(qemu_cpu_is_self(cpu));
-- 
2.32.0




[PATCH 0/5] CPU: Detect put cpu register errors for migrations

2022-06-17 Thread Peter Xu
rfc->v1:
- Rebase to master, drop RFC tag.

This series teaches QEMU to detect errors when e.g. putting registers from
QEMU to KVM, and fail migrations properly.

For the rational of this series and why it was posted, please refer to the
bug report here:

https://lore.kernel.org/all/YppVupW+IWsm7Osr@xz-m1.local/

But I'd rather not go into that if the reviewer doesn't have that context,
because we don't really need that complexity..  It can be simple as we
should fail migration early when we see issues happening already, so:

  1) We fail explicitly, rather than afterward with some weird guest
 errors.  In my bug report, it was a guest double fault.  There's
 another bug report that Sean mentioned in the thread from Mike Tancsa
 that can have other sympotons rather than double fault, but anyway
 they'll be hard to diagnose since the processor state can be corrupted
 (please refer to kvm_arch_put_registers() where we stop putting more
 registers to KVM when we see any error).

  2) For precopy, with this early failure the VM won't crash itself since
 we still have a chance to keep running it on src host, while if
 without this patch we will fail later, and it can crash the VM.

In this specific case, when KVM_SET_XSAVE ioctl failed on dest host before
start running the VM there, we should fail the migration already.

After the patchset applied, the above "double fault" issue will become
migration failures, and...

For precopy, we can see some error dumped for precopy on dest, then the VM
will be kept running on src host:

2022-06-07T22:48:48.804234Z qemu-system-x86_64: kvm_arch_put_registers() failed 
with retval=-22
2022-06-07T22:48:48.804588Z qemu-system-x86_64: load of migration failed: 
Invalid argument

For postcopy, currently we'll pause the VM immediately for admin to decide
what to do:

2022-06-07T22:47:49.448192Z qemu-system-x86_64: kvm_arch_put_registers() failed 
with retval=-22
13072@1654642069.518993:runstate_set current_run_state 1 (inmigrate) new_state 
4 (paused)

If something like this series is welcomed, we could do better in the future
by telling the src host about this issue and keep running, because
put-register happens right at the switch-over, so we actually have this
chance (no dirty page on dest host yet).

Comments welcomed.  Thanks,

Peter Xu (5):
  cpus-common: Introduce run_on_cpu_func2 which allows error returns
  cpus-common: Add run_on_cpu2()
  accel: Allow synchronize_post_init() to take an Error**
  cpu: Allow cpu_synchronize_all_post_init() to take an errp
  KVM: Hook kvm_arch_put_registers() errors to the caller

 accel/hvf/hvf-accel-ops.c |  2 +-
 accel/kvm/kvm-all.c   | 15 +++---
 accel/kvm/kvm-cpus.h  |  2 +-
 cpus-common.c | 55 +--
 hw/core/machine.c |  2 +-
 include/hw/core/cpu.h | 28 ++
 include/sysemu/accel-ops.h|  2 +-
 include/sysemu/cpus.h |  2 +-
 include/sysemu/hw_accel.h |  1 +
 migration/savevm.c| 20 +++--
 softmmu/cpus.c| 23 ---
 stubs/cpu-synchronize-state.c |  3 ++
 target/i386/hax/hax-all.c |  2 +-
 target/i386/nvmm/nvmm-all.c   |  2 +-
 target/i386/whpx/whpx-all.c   |  2 +-
 15 files changed, 139 insertions(+), 22 deletions(-)

-- 
2.32.0




[PATCH 4/5] cpu: Allow cpu_synchronize_all_post_init() to take an errp

2022-06-17 Thread Peter Xu
Allow cpu_synchronize_all_post_init() to fail with an errp when it's set.
Modify both precopy and postcopy to try to detect such error.

Signed-off-by: Peter Xu 
---
 hw/core/machine.c |  2 +-
 include/sysemu/cpus.h |  2 +-
 migration/savevm.c| 20 +---
 softmmu/cpus.c|  2 +-
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index a673302cce..e1a072080a 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1419,7 +1419,7 @@ void qemu_remove_machine_init_done_notifier(Notifier 
*notify)
 
 void qdev_machine_creation_done(void)
 {
-cpu_synchronize_all_post_init();
+cpu_synchronize_all_post_init(NULL);
 
 if (current_machine->boot_config.has_once) {
 qemu_boot_set(current_machine->boot_config.once, _fatal);
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index b5c87d48b3..a51ee46441 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -45,7 +45,7 @@ bool cpus_are_resettable(void);
 
 void cpu_synchronize_all_states(void);
 void cpu_synchronize_all_post_reset(void);
-void cpu_synchronize_all_post_init(void);
+void cpu_synchronize_all_post_init(Error **errp);
 void cpu_synchronize_all_pre_loadvm(void);
 
 #ifndef CONFIG_USER_ONLY
diff --git a/migration/savevm.c b/migration/savevm.c
index d9076897b8..1175ddefd4 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2005,7 +2005,17 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
 /* TODO we should move all of this lot into postcopy_ram.c or a shared code
  * in migration.c
  */
-cpu_synchronize_all_post_init();
+cpu_synchronize_all_post_init(_err);
+if (local_err) {
+/*
+ * TODO: a better way to do this is to tell the src that we cannot
+ * run the VM here so hopefully we can keep the VM running on src
+ * and immediately halt the switch-over.  But that needs work.
+ */
+error_report_err(local_err);
+local_err = NULL;
+autostart = false;
+}
 
 trace_loadvm_postcopy_handle_run_bh("after cpu sync");
 
@@ -2772,7 +2782,11 @@ int qemu_loadvm_state(QEMUFile *f)
 }
 
 qemu_loadvm_state_cleanup();
-cpu_synchronize_all_post_init();
+cpu_synchronize_all_post_init(_err);
+if (local_err) {
+error_report_err(local_err);
+return -EINVAL;
+}
 
 return ret;
 }
@@ -2789,7 +2803,7 @@ int qemu_load_device_state(QEMUFile *f)
 return ret;
 }
 
-cpu_synchronize_all_post_init();
+cpu_synchronize_all_post_init(NULL);
 return 0;
 }
 
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 464c06201c..59c70fd496 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -146,7 +146,7 @@ void cpu_synchronize_all_post_reset(void)
 }
 }
 
-void cpu_synchronize_all_post_init(void)
+void cpu_synchronize_all_post_init(Error **errp)
 {
 CPUState *cpu;
 
-- 
2.32.0




[PATCH 3/5] accel: Allow synchronize_post_init() to take an Error**

2022-06-17 Thread Peter Xu
It allows accel->synchronize_post_init() hook to return an error upwards.
Add a new cpu_synchronize_post_init_full() for it, then let the existing
cpu_synchronize_post_init() to call it with errp==NULL.

Signed-off-by: Peter Xu 
---
 accel/hvf/hvf-accel-ops.c |  2 +-
 accel/kvm/kvm-all.c   |  2 +-
 include/sysemu/accel-ops.h|  2 +-
 include/sysemu/hw_accel.h |  1 +
 softmmu/cpus.c| 10 --
 stubs/cpu-synchronize-state.c |  3 +++
 target/i386/hax/hax-all.c |  2 +-
 target/i386/nvmm/nvmm-all.c   |  2 +-
 target/i386/whpx/whpx-all.c   |  2 +-
 9 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c
index 24913ca9c4..dec4446264 100644
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -228,7 +228,7 @@ static void hvf_cpu_synchronize_post_reset(CPUState *cpu)
 run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
 }
 
-static void hvf_cpu_synchronize_post_init(CPUState *cpu)
+static void hvf_cpu_synchronize_post_init(CPUState *cpu, Error **errp)
 {
 run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
 }
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index ba3210b1c1..df4f7c98f3 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2777,7 +2777,7 @@ static void do_kvm_cpu_synchronize_post_init(CPUState 
*cpu, run_on_cpu_data arg)
 cpu->vcpu_dirty = false;
 }
 
-void kvm_cpu_synchronize_post_init(CPUState *cpu)
+void kvm_cpu_synchronize_post_init(CPUState *cpu, Error **errp)
 {
 run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 }
diff --git a/include/sysemu/accel-ops.h b/include/sysemu/accel-ops.h
index a0572ea87a..7e526d3c65 100644
--- a/include/sysemu/accel-ops.h
+++ b/include/sysemu/accel-ops.h
@@ -35,7 +35,7 @@ struct AccelOpsClass {
 bool (*cpu_thread_is_idle)(CPUState *cpu);
 
 void (*synchronize_post_reset)(CPUState *cpu);
-void (*synchronize_post_init)(CPUState *cpu);
+void (*synchronize_post_init)(CPUState *cpu, Error **errp);
 void (*synchronize_state)(CPUState *cpu);
 void (*synchronize_pre_loadvm)(CPUState *cpu);
 void (*synchronize_pre_resume)(bool step_pending);
diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h
index 22903a55f7..3ee3508411 100644
--- a/include/sysemu/hw_accel.h
+++ b/include/sysemu/hw_accel.h
@@ -21,6 +21,7 @@
 void cpu_synchronize_state(CPUState *cpu);
 void cpu_synchronize_post_reset(CPUState *cpu);
 void cpu_synchronize_post_init(CPUState *cpu);
+void cpu_synchronize_post_init_full(CPUState *cpu, Error **errp);
 void cpu_synchronize_pre_loadvm(CPUState *cpu);
 
 #endif /* QEMU_HW_ACCEL_H */
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 898363a1d0..464c06201c 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -178,13 +178,19 @@ void cpu_synchronize_post_reset(CPUState *cpu)
 }
 }
 
-void cpu_synchronize_post_init(CPUState *cpu)
+void cpu_synchronize_post_init_full(CPUState *cpu, Error **errp)
 {
 if (cpus_accel->synchronize_post_init) {
-cpus_accel->synchronize_post_init(cpu);
+cpus_accel->synchronize_post_init(cpu, errp);
 }
 }
 
+void cpu_synchronize_post_init(CPUState *cpu)
+{
+/* errp=NULL means we won't capture any error */
+cpu_synchronize_post_init_full(cpu, NULL);
+}
+
 void cpu_synchronize_pre_loadvm(CPUState *cpu)
 {
 if (cpus_accel->synchronize_pre_loadvm) {
diff --git a/stubs/cpu-synchronize-state.c b/stubs/cpu-synchronize-state.c
index d9211da66c..6d2c9f509a 100644
--- a/stubs/cpu-synchronize-state.c
+++ b/stubs/cpu-synchronize-state.c
@@ -7,3 +7,6 @@ void cpu_synchronize_state(CPUState *cpu)
 void cpu_synchronize_post_init(CPUState *cpu)
 {
 }
+void cpu_synchronize_post_init_full(CPUState *cpu, Error **errp)
+{
+}
diff --git a/target/i386/hax/hax-all.c b/target/i386/hax/hax-all.c
index b185ee8de4..782d83b531 100644
--- a/target/i386/hax/hax-all.c
+++ b/target/i386/hax/hax-all.c
@@ -651,7 +651,7 @@ static void do_hax_cpu_synchronize_post_init(CPUState *cpu, 
run_on_cpu_data arg)
 cpu->vcpu_dirty = false;
 }
 
-void hax_cpu_synchronize_post_init(CPUState *cpu)
+void hax_cpu_synchronize_post_init(CPUState *cpu, Error **errp)
 {
 run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 }
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index b75738ee9c..f429e940af 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -869,7 +869,7 @@ void nvmm_cpu_synchronize_post_reset(CPUState *cpu)
 run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 }
 
-void nvmm_cpu_synchronize_post_init(CPUState *cpu)
+void nvmm_cpu_synchronize_post_init(CPUState *cpu, Error **errp)
 {
 run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 }
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index b22a3314b4..09bf5681ce 100644
--- a/target/i386/whpx/whpx-all.c
+++ 

[PATCH 1/5] cpus-common: Introduce run_on_cpu_func2 which allows error returns

2022-06-17 Thread Peter Xu
run_on_cpu API does not yet support any way to pass over an error message
to above.  Add a new run_on_cpu_func2 hook to grant possibility of that.

Note that this only changes the cpus-common core, no API is yet introduced
for v2 of the run_on_cpu_func function.

Signed-off-by: Peter Xu 
---
 cpus-common.c | 28 +---
 include/hw/core/cpu.h |  2 ++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/cpus-common.c b/cpus-common.c
index db459b41ce..1db788 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -116,9 +116,20 @@ __thread CPUState *current_cpu;
 
 struct qemu_work_item {
 QSIMPLEQ_ENTRY(qemu_work_item) node;
-run_on_cpu_func func;
+union {
+run_on_cpu_func func; /* When has_errp==false */
+run_on_cpu_func2 func2;   /* When has_errp==true  */
+};
 run_on_cpu_data data;
 bool free, exclusive, done;
+
+/*
+ * Below are only used by v2 of work item, where we allow to return
+ * errors for cpu work items.  When has_errp==true, then: (1) we call
+ * func2 rather than func, and (2) we pass in errp into func2() call.
+ */
+bool has_errp;
+Error **errp;
 };
 
 static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
@@ -314,6 +325,17 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func 
func,
 queue_work_on_cpu(cpu, wi);
 }
 
+static void process_one_work_item(struct qemu_work_item *wi, CPUState *cpu)
+{
+if (wi->has_errp) {
+/* V2 of work item, allows errors */
+wi->func2(cpu, wi->data, wi->errp);
+} else {
+/* Old version of work item, no error returned */
+wi->func(cpu, wi->data);
+}
+}
+
 void process_queued_cpu_work(CPUState *cpu)
 {
 struct qemu_work_item *wi;
@@ -336,11 +358,11 @@ void process_queued_cpu_work(CPUState *cpu)
  */
 qemu_mutex_unlock_iothread();
 start_exclusive();
-wi->func(cpu, wi->data);
+process_one_work_item(wi, cpu);
 end_exclusive();
 qemu_mutex_lock_iothread();
 } else {
-wi->func(cpu, wi->data);
+process_one_work_item(wi, cpu);
 }
 qemu_mutex_lock(>work_mutex);
 if (wi->free) {
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 996f94059f..7a303576d0 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -252,6 +252,8 @@ typedef union {
 #define RUN_ON_CPU_NULL   RUN_ON_CPU_HOST_PTR(NULL)
 
 typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
+/* Same as run_on_cpu_func but allows to return an error */
+typedef void (*run_on_cpu_func2)(CPUState *cpu, run_on_cpu_data data, Error 
**errp);
 
 struct qemu_work_item;
 
-- 
2.32.0




[PATCH 5/5] KVM: Hook kvm_arch_put_registers() errors to the caller

2022-06-17 Thread Peter Xu
Leverage the new mechanism to pass over errors to upper stack for
kvm_arch_put_registers() when called for the post_init() accel hook.

Signed-off-by: Peter Xu 
---
 accel/kvm/kvm-all.c  | 13 ++---
 accel/kvm/kvm-cpus.h |  2 +-
 softmmu/cpus.c   |  5 -
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index df4f7c98f3..03e29ab1ed 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2771,15 +2771,22 @@ void kvm_cpu_synchronize_post_reset(CPUState *cpu)
 run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 }
 
-static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data 
arg)
+static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data 
arg,
+ Error **errp)
 {
-kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
+int ret = kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
+
+if (ret) {
+error_setg(errp, "kvm_arch_put_registers() failed with retval=%d", 
ret);
+return;
+}
+
 cpu->vcpu_dirty = false;
 }
 
 void kvm_cpu_synchronize_post_init(CPUState *cpu, Error **errp)
 {
-run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+run_on_cpu2(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL, errp);
 }
 
 static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data 
arg)
diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h
index bf0bd1bee4..c9b8262704 100644
--- a/accel/kvm/kvm-cpus.h
+++ b/accel/kvm/kvm-cpus.h
@@ -16,7 +16,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp);
 int kvm_cpu_exec(CPUState *cpu);
 void kvm_destroy_vcpu(CPUState *cpu);
 void kvm_cpu_synchronize_post_reset(CPUState *cpu);
-void kvm_cpu_synchronize_post_init(CPUState *cpu);
+void kvm_cpu_synchronize_post_init(CPUState *cpu, Error **errp);
 void kvm_cpu_synchronize_pre_loadvm(CPUState *cpu);
 
 #endif /* KVM_CPUS_H */
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 59c70fd496..6c0b5b87f0 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -151,7 +151,10 @@ void cpu_synchronize_all_post_init(Error **errp)
 CPUState *cpu;
 
 CPU_FOREACH(cpu) {
-cpu_synchronize_post_init(cpu);
+cpu_synchronize_post_init_full(cpu, errp);
+if (errp && *errp) {
+break;
+}
 }
 }
 
-- 
2.32.0




Re: [RFC PATCH v2 3/8] qapi: golang: Generate qapi's struct types in Go

2022-06-17 Thread Daniel P . Berrangé
On Fri, Jun 17, 2022 at 02:19:27PM +0200, Victor Toso wrote:
> This patch handles QAPI struct types and generates the equivalent
> types in Go.
> 
> At the time of this writing, it generates 388 structures.
> 
> The highlights of this implementation are:
> 
> 1. Generating an Go struct that requires a @base type, the @base type
>fields are copied over to the Go struct. The advantage of this
>approach is to not have embed structs in any of the QAPI types.
>The downside are some generated Types that are likely useless now,
>like InetSocketAddressBase from InetSocketAddress.
> 
> 2. About the Go struct's fields:
> 
>   i) They can be either by Value or Reference.
> 
>   ii) Every field that is marked as optional in the QAPI specification
>   are translated to Reference fields in its Go structure. This design
>   decision is the most straightforward way to check if a given field
>   was set or not.
> 
>   iii) Mandatory fields are always by Value with the exception of QAPI
>   arrays, which are handled by Reference (to a block of memory) by Go.
> 
>   iv) All the fields are named with Uppercase due Golang's export
>   convention.
> 
>   v) In order to avoid any kind of issues when encoding ordecoding, to
>   or from JSON, we mark all fields with its @name and, when it is
>   optional, member, with @omitempty
> 
> Example:
> 
> qapi:
>   | { 'struct': 'BlockdevCreateOptionsFile',
>   |   'data': { 'filename': 'str',
>   | 'size': 'size',
>   | '*preallocation':   'PreallocMode',
>   | '*nocow':   'bool',
>   | '*extent-size-hint':'size'} }
> 
> go:
>   | type BlockdevCreateOptionsFile struct {
>   | Filename   string`json:"filename"`
>   | Size   uint64`json:"size"`
>   | Preallocation  *PreallocMode `json:"preallocation,omitempty"`
>   | Nocow  *bool `json:"nocow,omitempty"`
>   | ExtentSizeHint *uint64   `json:"extent-size-hint,omitempty"`
>   | }

One thing to bear in mind here

At the QAPI level, changing a field from mandatory to optional has
been considered a backwards compatible change by QEMU maintainers,
because any existing caller can happily continue passing the
optional field with no downside.

With this Go design, changing a field from mandatory to optional
will be an API breakage, because the developer will need to change
from passing a literal value, to a pointer to the value, when
initializing the struct.

IOW, this Go impl provides weaker compat guarantees than even
QAPI does, and QAPI compat guarantees were already weaker than
I would like as an app developer.

If we want to make ourselves future proof, we would have to make
all struct fields optional from the start, even if they are
mandatory at QAPI level. This would make the code less self-documenting
though, so that's not very appealing either.


If we want to avoid this, we would need the same approach I suggested
wrt support multiple versions of the API concurrently. Namely have
versioned structs, so every time there's a field change of any kind,
we introduce a new struct version.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: misaligned-pointer-use libslirp/src/tcp_input.c

2022-06-17 Thread Alexander Bulekov
On 220617 1217, Thomas Huth wrote:
> On 16/06/2022 21.03, Alexander Bulekov wrote:
> > On 220616 0930, Patrick Venture wrote:
> > > On Thu, Jun 16, 2022 at 6:31 AM Alexander Bulekov  wrote:
> > > 
> > > > Is this an --enable-sanitizers build? The virtual-device fuzzer catches
> > > > 
> > > 
> > > Yeah - it should be reproducible with a sanitizers build from HEAD -- I 
> > > can
> > > try to get a manual instance going again without automation to try and
> > > reproduce it.  We're testing on v7.0.0 which is when we started seeing
> > > this, I don't think we saw it in 6.2.0.
> > 
> > Here are a few reproducers (run with --enable-sanitizers):
> > 
> > This one complains about misalignments in ip_header, ipasfrag, qlink,
> > ip...
> > 
> > cat << EOF | ./qemu-system-i386 -display none -machine accel=qtest, -m \
> > 512M,slots=4,maxmem=0x -machine q35 -nodefaults -device \
> > vmxnet3,netdev=net0 -netdev user,id=net0 -object \
> > memory-backend-ram,id=mem1,size=10M -device \
> > pc-dimm,id=nv1,memdev=mem1,addr=0xba19ff -object \
> > memory-backend-ram,id=mem2,size=10M -device \
> > pc-dimm,id=nv2,memdev=mem2,addr=0xbe53e14abaa0 -object \
> > memory-backend-ram,id=mem3,size=10M -device \
> > pc-dimm,id=nv3,memdev=mem3,addr=0xfee9cae0 -object \
> > memory-backend-ram,id=mem4,size=10M -device \
> > pc-dimm,id=nv4,memdev=mem4,addr=0xf0f0f0f -qtest stdio
> > outl 0xcf8 0x8810
> > outl 0xcfc 0xe000
> > outl 0xcf8 0x8814
> > outl 0xcfc 0xe0001000
> > outl 0xcf8 0x8804
> > outw 0xcfc 0x06
> > write 0x3e 0x1 0x02
> > write 0x39 0x1 0x20
> > write 0x29 0x1 0x10
> > write 0x2c 0x1 0x0f
> > write 0x2d 0x1 0x0f
> > write 0x2e 0x1 0x0f
> > write 0x2f 0x1 0x0f
> > write 0xf0f0f0f1012 0x1 0xfe
> > write 0xf0f0f0f1013 0x1 0xca
> > write 0xf0f0f0f1014 0x1 0xe9
> > write 0xf0f0f0f1017 0x1 0xfe
> > write 0xf0f0f0f103a 0x1 0x01
> > write 0xfee9cafe0009 0x1 0x40
> > write 0xfee9cafe0019 0x1 0x40
> > write 0x0 0x1 0xe1
> > write 0x1 0x1 0xfe
> > write 0x2 0x1 0xbe
> > write 0x3 0x1 0xba
> > writel 0xe0001020 0xcafe
> > write 0xfee9cafe0029 0x1 0x40
> > write 0xfee9cafe0039 0x1 0x40
> > write 0xfee9cafe0049 0x1 0x40
> > write 0xfee9cafe0059 0x1 0x40
> > write 0x1f65190b 0x1 0x08
> > write 0x1f65190d 0x1 0x46
> > write 0x1f65190e 0x1 0x03
> > write 0x1f651915 0x1 0x01
> > write 0xfee9cafe0069 0x1 0x40
> > write 0xfee9cafe0079 0x1 0x40
> > write 0xfee9cafe0089 0x1 0x40
> > write 0xfee9cafe0099 0x1 0x40
> > write 0xfee9cafe009d 0x1 0x10
> > write 0xfee9cafe00a0 0x1 0xff
> > write 0xfee9cafe00a1 0x1 0x18
> > write 0xfee9cafe00a2 0x1 0x65
> > write 0xfee9cafe00a3 0x1 0x1f
> > write 0xfee9cafe00a9 0x1 0x40
> > write 0xfee9cafe00ad 0x1 0x1c
> > write 0xe602 0x1 0x00
> > EOF
> > 
> > This one complains about misalignments in ip6_header, ip6_hdrctl...
> > 
> > cat << EOF | ./qemu-system-i386 -display none -machine accel=qtest, -m \
> > 512M,slots=1,maxmem=0x -machine q35 -nodefaults -device \
> > vmxnet3,netdev=net0 -netdev user,id=net0 -object \
> > memory-backend-ram,id=mem1,size=4M -device \
> > pc-dimm,id=nv1,memdev=mem1,addr=0x1dd8600 -qtest stdio
> > outl 0xcf8 0x8810
> > outl 0xcfc 0xe000
> > outl 0xcf8 0x8814
> > outl 0xcfc 0xe0001000
> > outl 0xcf8 0x8804
> > outw 0xcfc 0x06
> > write 0x0 0x1 0xe1
> > write 0x1 0x1 0xfe
> > write 0x2 0x1 0xbe
> > write 0x3 0x1 0xba
> > write 0x3e 0x1 0x01
> > write 0x39 0x1 0x01
> > write 0x28 0x1 0x01
> > write 0x29 0x1 0x01
> > write 0x2d 0x1 0x86
> > write 0x2e 0x1 0xdd
> > write 0x2f 0x1 0x01
> > write 0x1dd86000112 0x1 0x10
> > write 0x1dd8600013c 0x1 0x02
> > writel 0xe0001020 0xcafe
> > write 0x1009 0x1 0x40
> > write 0x100c 0x1 0x86
> > write 0x100d 0x1 0xdd
> > write 0x1011 0x1 0x10
> > write 0x1019 0x1 0x7e
> > write 0x101d 0x1 0x10
> > write 0x4d56 0x1 0x02
> > write 0xe603 0x1 0x00
> > EOF
> 
> Could you please open bugs on
> https://gitlab.freedesktop.org/slirp/libslirp/-/issues so that this
> information does not get lost?

Done:
https://gitlab.freedesktop.org/slirp/libslirp/-/issues/62
https://gitlab.freedesktop.org/slirp/libslirp/-/issues/63

-Alex

> 
>  Thomas
> 
> > > 
> > > > these periodically while fuzzing network-devices. However I don't think
> > > > OSS-Fuzz creates reports for them for some reason. I can create qtest
> > > > reproducers, if that is useful.
> > > > -Alex
> > > > 
> > > > On 220615 0942, Patrick Venture wrote:
> > > > > Hey - I wanted to ask if someone else has seen this or has 
> > > > > suggestions on
> > > > > how to fix it in libslirp / qemu.
> > > > > 
> > > > > libslirp version: 3ad1710a96678fe79066b1469cead4058713a1d9
> > > > > 
> > > > > The blow is line:
> > > > > 
> > > > https://gitlab.freedesktop.org/slirp/libslirp/-/blob/master/src/tcp_input.c#L310
> > > > > 
> > > > > I0614 13:44:44.3040872040 bytestream.cc:22] QEMU:
> > > > > 

Re: [PATCH v2 03/10] qga: treat get-guest-fsinfo as "best effort"

2022-06-17 Thread Daniel P . Berrangé
On Fri, Jun 17, 2022 at 10:04:14AM -0400, John Snow wrote:
> On Fri, Jun 17, 2022, 5:49 AM Daniel P. Berrangé 
> wrote:
> 
> > On Thu, Jun 16, 2022 at 06:35:44PM +0400, Marc-André Lureau wrote:
> > > Hi
> > >
> > > On Thu, Jun 16, 2022 at 6:27 PM John Snow  wrote:
> > >
> > > > In some container environments, there may be references to block
> > devices
> > > > witnessable from a container through /proc/self/mountinfo that
> > reference
> > > > devices we simply don't have access to in the container, and could not
> > > > provide information about.
> > > >
> > > > Instead of failing the entire fsinfo command, return stub information
> > > > for these failed lookups.
> > > >
> > > > This allows test-qga to pass under docker tests, which are in turn used
> > > > by the CentOS VM tests.
> > > >
> > > > Signed-off-by: John Snow 
> > > > ---
> > > >  qga/commands-posix.c | 8 +++-
> > > >  1 file changed, 7 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> > > > index 0469dc409d4..5989d4dca9d 100644
> > > > --- a/qga/commands-posix.c
> > > > +++ b/qga/commands-posix.c
> > > > @@ -1207,7 +1207,13 @@ static void build_guest_fsinfo_for_device(char
> > > > const *devpath,
> > > >
> > > >  syspath = realpath(devpath, NULL);
> > > >  if (!syspath) {
> > > > -error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
> > > > +if (errno == ENOENT) {
> > > > +/* This devpath may not exist because of container config,
> > > > etc. */
> > > > +fprintf(stderr, "realpath(%s) returned NULL/ENOENT\n",
> > > > devpath);
> > > >
> > >
> > > qga uses g_critical() (except for some win32 code paths atm)
> > >
> > >
> > > > +fs->name = y
> > > >
> > >
> > > Hmm, maybe we should make the field optional instead.
> >
> > In my own testing, this method is called in various scenarios.
> > Some example:
> >
> >   devpath==/sys/dev/block/253:0
> >   syspath==/sys/devices/virtual/block/dm-0
> >
> > => fs->name == dm-0
> >
> >   devpath==/sys/devices/virtual/block/dm-0/slaves/nvme0n1p4
> >
> > syspath==/sys/devices/pci:00/:00:1d.0/:02:00.0/nvme/nvme0/nvme0n1/nvme0n1p4
> >
> > => fs->name == nvme0n1p4
> >
> >   devpath==/sys/dev/block/259:2
> >
> > syspath==/sys/devices/pci:00/:00:1d.0/:02:00.0/nvme/nvme0/nvme0n1/nvme0n1p2
> >
> > => fs->name == nvme0n1p2
> >
> > We set fs->name from  basename(syspath)
> >
> > If the realpath call fails, we could use  basename(devpath). That
> > would sometimes give the correct answer, and in other types it
> > would at least give the major:minor number, which an admin can
> > manually correlate if desired via /proc/partitions.
> >
> > If we want to be really advanced, we could just open /proc/partitions
> > and resolve the proper name ourselves, but that's probably overkill
> >
> >   basename(sysfspath)
> >
> > is better than g_strdup("??\?-ENOENT")  IMHO
> >
> 
> Sure! I had something like that initially, but chickened out specifically
> because I thought major:minor was a nonsense kind of reply, so I opted for
> more egregiously obvious nonsense. I figured I'd find strong opinions that
> way ;)

It is a different format but it is semantically giving similar info.

If we want to just leave it empty though that's fine too.

> 
> I'm just not sure how this data is used in practice so I had no insight as
> to what would be best. I can use the basename, sure.
> 
> (Should I also add an optional flag field that indicates the path was not
> resolvable, do you think? I guess we can always add it later if needed, but
> not sure if i need to head that one off at the pass.)
> 
> As for Thomas' comment: I wasn't entirely clear on precisely when we'd run
> into this scenario and I didn't know if it was a good idea to skip the
> entries entirely. Maybe getting platform mount information even if we can't
> access it is still important when working with containers? I don't know one
> way or the other TBQH. I'm not very well traveled with devices,
> filesystems, and permissions where containers are concerned.

I view the primary purpose of this command to be offering a way to
enumerate filesystems. Whether we report what block device the FS
on host is a secondary purpose.  So as long as we can fullfill the
primary purpose, its sufficient IMHO.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH V8 38/39] python/machine: add QEMUMachine accessors

2022-06-17 Thread John Snow
On Wed, Jun 15, 2022, 11:27 AM Steve Sistare 
wrote:

> Provide full_args() to return all command-line arguments used to start a
> vm, some of which are not otherwise visible to QEMUMachine clients.  This
> is needed by the cpr test, which must start a vm, then pass all qemu
> command-line arguments to the cpr-exec monitor call.
>
> Provide reopen_qmp_connection() to reopen a closed monitor connection.
> This is needed by cpr, because qemu-exec closes the monitor socket.
>
> Signed-off-by: Steve Sistare 
> ---
>  python/qemu/machine/machine.py | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/python/qemu/machine/machine.py
> b/python/qemu/machine/machine.py
> index 37191f4..60b934d 100644
> --- a/python/qemu/machine/machine.py
> +++ b/python/qemu/machine/machine.py
> @@ -332,6 +332,11 @@ def args(self) -> List[str]:
>  """Returns the list of arguments given to the QEMU binary."""
>  return self._args
>
> +@property
> +def full_args(self) -> List[str]:
> +"""Returns the full list of arguments used to launch QEMU."""
> +return list(self._qemu_full_args)
> +
>

OK

 def _pre_launch(self) -> None:
>  if self._console_set:
>  self._remove_files.append(self._console_address)
> @@ -486,6 +491,15 @@ def _close_qmp_connection(self) -> None:
>  finally:
>  self._qmp_connection = None
>
> +def reopen_qmp_connection(self):
> +self._close_qmp_connection()
> +self._qmp_connection = QEMUMonitorProtocol(
> +self._monitor_address,
> +server=True,
> +nickname=self._name
> +)
> +self._qmp.accept(self._qmp_timer)
> +
>

Unrelated change, please split into a new commit. (Sorry.)

Seems harmless enough, though. Happy to give RB and AB to both if you split
the commits.

--js


Re: [PATCH v2 03/10] qga: treat get-guest-fsinfo as "best effort"

2022-06-17 Thread John Snow
On Fri, Jun 17, 2022, 5:49 AM Daniel P. Berrangé 
wrote:

> On Thu, Jun 16, 2022 at 06:35:44PM +0400, Marc-André Lureau wrote:
> > Hi
> >
> > On Thu, Jun 16, 2022 at 6:27 PM John Snow  wrote:
> >
> > > In some container environments, there may be references to block
> devices
> > > witnessable from a container through /proc/self/mountinfo that
> reference
> > > devices we simply don't have access to in the container, and could not
> > > provide information about.
> > >
> > > Instead of failing the entire fsinfo command, return stub information
> > > for these failed lookups.
> > >
> > > This allows test-qga to pass under docker tests, which are in turn used
> > > by the CentOS VM tests.
> > >
> > > Signed-off-by: John Snow 
> > > ---
> > >  qga/commands-posix.c | 8 +++-
> > >  1 file changed, 7 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> > > index 0469dc409d4..5989d4dca9d 100644
> > > --- a/qga/commands-posix.c
> > > +++ b/qga/commands-posix.c
> > > @@ -1207,7 +1207,13 @@ static void build_guest_fsinfo_for_device(char
> > > const *devpath,
> > >
> > >  syspath = realpath(devpath, NULL);
> > >  if (!syspath) {
> > > -error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
> > > +if (errno == ENOENT) {
> > > +/* This devpath may not exist because of container config,
> > > etc. */
> > > +fprintf(stderr, "realpath(%s) returned NULL/ENOENT\n",
> > > devpath);
> > >
> >
> > qga uses g_critical() (except for some win32 code paths atm)
> >
> >
> > > +fs->name = y
> > >
> >
> > Hmm, maybe we should make the field optional instead.
>
> In my own testing, this method is called in various scenarios.
> Some example:
>
>   devpath==/sys/dev/block/253:0
>   syspath==/sys/devices/virtual/block/dm-0
>
> => fs->name == dm-0
>
>   devpath==/sys/devices/virtual/block/dm-0/slaves/nvme0n1p4
>
> syspath==/sys/devices/pci:00/:00:1d.0/:02:00.0/nvme/nvme0/nvme0n1/nvme0n1p4
>
> => fs->name == nvme0n1p4
>
>   devpath==/sys/dev/block/259:2
>
> syspath==/sys/devices/pci:00/:00:1d.0/:02:00.0/nvme/nvme0/nvme0n1/nvme0n1p2
>
> => fs->name == nvme0n1p2
>
> We set fs->name from  basename(syspath)
>
> If the realpath call fails, we could use  basename(devpath). That
> would sometimes give the correct answer, and in other types it
> would at least give the major:minor number, which an admin can
> manually correlate if desired via /proc/partitions.
>
> If we want to be really advanced, we could just open /proc/partitions
> and resolve the proper name ourselves, but that's probably overkill
>
>   basename(sysfspath)
>
> is better than g_strdup("??\?-ENOENT")  IMHO
>

Sure! I had something like that initially, but chickened out specifically
because I thought major:minor was a nonsense kind of reply, so I opted for
more egregiously obvious nonsense. I figured I'd find strong opinions that
way ;)

I'm just not sure how this data is used in practice so I had no insight as
to what would be best. I can use the basename, sure.

(Should I also add an optional flag field that indicates the path was not
resolvable, do you think? I guess we can always add it later if needed, but
not sure if i need to head that one off at the pass.)

As for Thomas' comment: I wasn't entirely clear on precisely when we'd run
into this scenario and I didn't know if it was a good idea to skip the
entries entirely. Maybe getting platform mount information even if we can't
access it is still important when working with containers? I don't know one
way or the other TBQH. I'm not very well traveled with devices,
filesystems, and permissions where containers are concerned.

/shrug


> With regards,
> Daniel
> --
> |: https://berrange.com  -o-
> https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-
> https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-
> https://www.instagram.com/dberrange :|
>
>


[PATCH] sphinx: change default language to 'en'

2022-06-17 Thread Martin Liška
Fixes the following Sphinx warning (treated as error) starting
with 5.0 release:

Warning, treated as error:
Invalid configuration value found: 'language = None'. Update your configuration 
to a valid langauge code. Falling back to 'en' (English).

Signed-off-by: Martin Liska 
---
 docs/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/conf.py b/docs/conf.py
index 49dab44cca..e33cf3d381 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -126,7 +126,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = 'en'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-- 
2.36.1




Re: [PATCH 6/7] crypto: Implement ECDSA algorithm by gcrypt

2022-06-17 Thread Daniel P . Berrangé
On Mon, Jun 13, 2022 at 04:45:30PM +0800, Lei He wrote:
> Implement ECDSA algorithm by gcrypt
> 
> Signed-off-by: lei he 
> ---
>  crypto/akcipher-gcrypt.c.inc | 400 
> +++
>  1 file changed, 400 insertions(+)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH 7/7] crypto: Add test suite for ECDSA algorithm

2022-06-17 Thread Daniel P . Berrangé
On Mon, Jun 13, 2022 at 04:45:31PM +0800, Lei He wrote:
> 1. add test suite for ecdsa algorithm.
> 2. use qcrypto_akcihper_max_xxx_len to help create buffers in
> test code.
> 
> Signed-off-by: lei he 
> ---
>  tests/unit/test-crypto-akcipher.c | 227 
> +++---
>  1 file changed, 212 insertions(+), 15 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v5 5/5] i386/pc: restrict AMD only enforcing of valid IOVAs to new machine type

2022-06-17 Thread Joao Martins
On 6/16/22 15:27, Igor Mammedov wrote:
> On Fri, 20 May 2022 11:45:32 +0100
> Joao Martins  wrote:
> 
>> The added enforcing is only relevant in the case of AMD where the
>> range right before the 1TB is restricted and cannot be DMA mapped
>> by the kernel consequently leading to IOMMU INVALID_DEVICE_REQUEST
>> or possibly other kinds of IOMMU events in the AMD IOMMU.
>>
>> Although, there's a case where it may make sense to disable the
>> IOVA relocation/validation when migrating from a
>> non-valid-IOVA-aware qemu to one that supports it.
>>
>> Relocating RAM regions to after the 1Tb hole has consequences for
>> guest ABI because we are changing the memory mapping, so make
>> sure that only new machine enforce but not older ons.
> 
> is old machine with so much ram going to work and not explode
> even without iommu?
> 
Depends on your definition of work.

And that's the purpose of this patch, to still allow graceful
failures on hosts with different hypervisor kernel versions that
would use versioned machine (like pc-q35-7.0 or older)

e.g. if you boot a guest with pc-q35-7.0 on a 4.19 kernel it will boot
whereas on a v5.14 kernel with same pc-q35-7.0, the memory map would
stay the same, but it would fail as a >= 5.4 kernel will validate
whether IOVA.

It will 'work' as before for old machine, meaning you are dependent on the
kernel to validate IOVAs and prevent dma maps or not. Without IOMMU enabled
you don't need this, but you also can't do VFIO (or the like vDPA)

>> Signed-off-by: Joao Martins 
>> ---
>>  hw/i386/pc.c | 7 +--
>>  hw/i386/pc_piix.c| 2 ++
>>  hw/i386/pc_q35.c | 2 ++
>>  include/hw/i386/pc.h | 1 +
>>  4 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 652ae8ff9ccf..62f9af91f19f 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -862,6 +862,7 @@ static hwaddr x86_max_phys_addr(PCMachineState *pcms,
>>  static void x86_update_above_4g_mem_start(PCMachineState *pcms,
>>uint64_t pci_hole64_size)
>>  {
>> +PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>>  X86MachineState *x86ms = X86_MACHINE(pcms);
>>  CPUX86State *env = _CPU(first_cpu)->env;
>>  hwaddr start = x86ms->above_4g_mem_start;
>> @@ -870,9 +871,10 @@ static void 
>> x86_update_above_4g_mem_start(PCMachineState *pcms,
>>  /*
>>   * The HyperTransport range close to the 1T boundary is unique to AMD
>>   * hosts with IOMMUs enabled. Restrict the ram-above-4g relocation
>> - * to above 1T to AMD vCPUs only.
>> + * to above 1T to AMD vCPUs only. @enforce_valid_iova is only false in
>> + * older machine types (<= 7.0) for compatibility purposes.
>>   */
>> -if (!IS_AMD_CPU(env)) {
>> +if (!IS_AMD_CPU(env) || !pcmc->enforce_valid_iova) {
>>  return;
>>  }
>>  
>> @@ -1881,6 +1883,7 @@ static void pc_machine_class_init(ObjectClass *oc, 
>> void *data)
>>  pcmc->has_reserved_memory = true;
>>  pcmc->kvmclock_enabled = true;
>>  pcmc->enforce_aligned_dimm = true;
>> +pcmc->enforce_valid_iova = true;
>>  /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K 
>> reported
>>   * to be used at the moment, 32K should be enough for a while.  */
>>  pcmc->acpi_data_size = 0x2 + 0x8000;
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 57bb5b8f2aea..74176a210d56 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -437,9 +437,11 @@ DEFINE_I440FX_MACHINE(v7_1, "pc-i440fx-7.1", NULL,
>>  
>>  static void pc_i440fx_7_0_machine_options(MachineClass *m)
>>  {
>> +PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>>  pc_i440fx_7_1_machine_options(m);
>>  m->alias = NULL;
>>  m->is_default = false;
>> +pcmc->enforce_valid_iova = false;
>>  compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
>>  compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
>>  }
>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>> index 4d5c2fbd976b..bc38a6ba4c67 100644
>> --- a/hw/i386/pc_q35.c
>> +++ b/hw/i386/pc_q35.c
>> @@ -381,8 +381,10 @@ DEFINE_Q35_MACHINE(v7_1, "pc-q35-7.1", NULL,
>>  
>>  static void pc_q35_7_0_machine_options(MachineClass *m)
>>  {
>> +PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
>>  pc_q35_7_1_machine_options(m);
>>  m->alias = NULL;
>> +pcmc->enforce_valid_iova = false;
>>  compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
>>  compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
>>  }
>> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
>> index 9c847faea2f8..22119131eca7 100644
>> --- a/include/hw/i386/pc.h
>> +++ b/include/hw/i386/pc.h
>> @@ -117,6 +117,7 @@ struct PCMachineClass {
>>  bool has_reserved_memory;
>>  bool enforce_aligned_dimm;
>>  bool broken_reserved_end;
>> +bool enforce_valid_iova;
>>  
>>  /* generate legacy CPU hotplug AML */
>>

Re: [PATCH v5 4/5] i386/pc: relocate 4g start to 1T where applicable

2022-06-17 Thread Joao Martins
On 6/17/22 13:32, Igor Mammedov wrote:
> On Fri, 17 Jun 2022 13:18:38 +0100
> Joao Martins  wrote:
>> On 6/16/22 15:23, Igor Mammedov wrote:
>>> On Fri, 20 May 2022 11:45:31 +0100
>>> Joao Martins  wrote:
 +hwaddr above_4g_mem_start,
 +uint64_t pci_hole64_size)
 +{
 +PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
 +X86MachineState *x86ms = X86_MACHINE(pcms);
 +MachineState *machine = MACHINE(pcms);
 +ram_addr_t device_mem_size = 0;
 +hwaddr base;
 +
 +if (!x86ms->above_4g_mem_size) {
 +   /*
 +* 32-bit pci hole goes from
 +* end-of-low-ram (@below_4g_mem_size) to IOAPIC.
 +*/
 +return IO_APIC_DEFAULT_ADDRESS - 1;  
>>>
>>> lack of above_4g_mem, doesn't mean absence of device_mem_size or anything 
>>> else
>>> that's located above it.
>>>   
>>
>> True. But the intent is to fix 32-bit boundaries as one of the qtests was 
>> failing
>> otherwise. We won't hit the 1T hole, hence a nop.
> 
> I don't get the reasoning, can you clarify it pls?
> 

I was trying to say that what lead me here was a couple of qtests failures 
(from v3->v4).

I was doing this before based on pci_hole64. phys-bits=32 was for example one
of the test failures, and pci-hole64 sits above what 32-bit can reference.

>>  Unless we plan on using
>> pc_max_used_gpa() for something else other than this.
> 
> Even if '!above_4g_mem_sizem', we can still have hotpluggable memory region
> present and that can  hit 1Tb. The same goes for pci64_hole if it's configured
> large enough on CLI.
> 
So hotpluggable memory seems to assume it sits above 4g mem.

pci_hole64 likewise as it uses similar computations as hotplug.

Unless I am misunderstanding something here.

> Looks like guesstimate we could use is taking pci64_hole_end as max used GPA
> 
I think this was what I had before (v3[0]) and did not work.

Let me revisit this edge case again.

[0] https://lore.kernel.org/all/20220223184455.9057-5-joao.m.mart...@oracle.com/



Re: [PATCH] xen/pass-through: don't create needless register group

2022-06-17 Thread Anthony PERARD via
On Fri, Jun 10, 2022 at 12:23:35PM -0400, Chuck Zmudzinski wrote:
> Currently we are creating a register group for the Intel IGD OpRegion
> for every device we pass through, but the XEN_PCI_INTEL_OPREGION
> register group is only valid for an Intel IGD. Add a check to make
> sure the device is an Intel IGD and a check that the administrator has
> enabled gfx_passthru in the xl domain configuration. Require both checks
> to be true before creating the register group. Use the existing
> is_igd_vga_passthrough() function to check for a graphics device from
> any vendor and that the administrator enabled gfx_passthru in the xl
> domain configuration, but further require that the vendor be Intel,
> because only Intel IGD devices have an Intel OpRegion. These are the
> same checks hvmloader and libxl do to determine if the Intel OpRegion
> needs to be mapped into the guest's memory.
> 
> Signed-off-by: Chuck Zmudzinski 
> ---
>  hw/xen/xen_pt_config_init.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
> index c5c4e943a8..ffd915654c 100644
> --- a/hw/xen/xen_pt_config_init.c
> +++ b/hw/xen/xen_pt_config_init.c
> @@ -2037,6 +2037,10 @@ void xen_pt_config_init(XenPCIPassthroughState *s, 
> Error **errp)
>   * therefore the size should be 0xff.
>   */

Could you move that comment? I think it would make more sense to comment
the "reg_grp_offset=XEN_PCI_INTEL_OPREGION" line now that the `if` block
also skip setting up the group on non-intel devices.

>  if (xen_pt_emu_reg_grps[i].grp_id == XEN_PCI_INTEL_OPREGION) {
> +if (!is_igd_vga_passthrough(>real_device) ||
> +s->real_device.vendor_id != PCI_VENDOR_ID_INTEL) {
> +continue;
> +}
>  reg_grp_offset = XEN_PCI_INTEL_OPREGION;
>  }

Thanks,

-- 
Anthony PERARD



Re: [PATCH v3 0/2] hw/nvme: Add shadow doorbell buffer support

2022-06-17 Thread Jinhao Fan
> On Jun 17, 2022, at 8:56 PM, Klaus Jensen  wrote:
> 
> On Jun 17 20:47, Jinhao Fan wrote:
>> 
>> 
>>> On Jun 17, 2022, at 7:54 PM, Klaus Jensen  wrote:
>>> 
>>> LGTM,
>>> 
>>> Reviewed-by: Klaus Jensen 
>>> 
>> 
>> Great!
>> 
>> I have two questions:
>> 
>> How many “Reviewed-by”’s do I need to get my patch applied?
>> 
> 
> That depends ;) The maintainers decide that.
> 
>> Do I need to post a v4 patch to add the “Reviewed-by”’s in my commit 
>> message?
>> 
> 
> Nope, the maintainer will pick that up when applying.

Gotcha! Thanks!



Re: [PATCH v3 0/2] hw/nvme: Add shadow doorbell buffer support

2022-06-17 Thread Klaus Jensen
On Jun 17 20:47, Jinhao Fan wrote:
> 
> 
> > On Jun 17, 2022, at 7:54 PM, Klaus Jensen  wrote:
> > 
> > LGTM,
> > 
> > Reviewed-by: Klaus Jensen 
> > 
> 
> Great!
> 
> I have two questions:
> 
> How many “Reviewed-by”’s do I need to get my patch applied?
> 

That depends ;) The maintainers decide that.

> Do I need to post a v4 patch to add the “Reviewed-by”’s in my commit 
> message?
> 

Nope, the maintainer will pick that up when applying.


signature.asc
Description: PGP signature


Re: [PATCH v3 0/2] hw/nvme: Add shadow doorbell buffer support

2022-06-17 Thread Jinhao Fan



> On Jun 17, 2022, at 7:54 PM, Klaus Jensen  wrote:
> 
> On Jun 16 20:34, Jinhao Fan wrote:
>> This patch adds shadow doorbell buffer support in NVMe 1.3 to QEMU
>> NVMe. The Doorbell Buffer Config admin command is implemented for the
>> guest to enable shadow doobell buffer. When this feature is enabled, each
>> SQ/CQ is associated with two buffers, i.e., Shadow Doorbell buffer and
>> EventIdx buffer. According to the Spec, each queue's doorbell register
>> is only updated when the Shadow Doorbell buffer value changes from being
>> less than or equal to the value of the corresponding EventIdx buffer
>> entry to being greater than that value. Therefore, the number of MMIO's
>> on the doorbell registers is greatly reduced.
>> 
>> This patch is adapted from Huaicheng Li's patch[1] in 2018.
>> 
>> [1] 
>> https://patchwork.kernel.org/project/qemu-devel/patch/20180305194906.ga3...@gmail.com/
>> 
>> IOPS comparison with FIO:
>> 
>> iodepth1  2  4  8
>>  QEMU   25.1k  25.9k  24.5k  24.0k
>> +dbbuf  29.1k  60.1k  99.8k  82.5k
>> 
>> MMIO's per IO measured by perf-kvm:
>> 
>> iodepth1  2  4  8
>>  QEMU   2.01   1.99   1.99   1.99
>> +dbbuf  1.00   0.52   0.27   0.46
>> 
>> The tests are done on Ubuntu 22.04 with 5.15.0-33 kernel with Intel(R) 
>> Xeon(R) Gold 6248R CPU @ 3.00GHz.
>> 
>> QEMU set up:
>> 
>> bin/x86_64-softmmu/qemu-system-x86_64 \
>>-name "nvme-test" \
>>-machine accel=kvm \
>>-cpu host \
>>-smp 4 \
>>-m 8G \
>>-daemonize \
>>-device virtio-scsi-pci,id=scsi0 \
>>-device scsi-hd,drive=hd0 \
>>-drive 
>> file=$OSIMGF,if=none,aio=native,cache=none,format=qcow2,id=hd0,snapshot=on \
>>-drive "id=nvm,if=none,file=null-co://,file.read-zeroes=on,format=raw" \
>>-device nvme,serial=deadbeef,drive=nvm \
>>-net user,hostfwd=tcp::8080-:22 \
>>-net nic,model=virtio
>> 
>> FIO configuration:
>> 
>> [global]
>> ioengine=libaio
>> filename=/dev/nvme0n1
>> thread=1
>> group_reporting=1
>> direct=1
>> verify=0
>> time_based=1
>> ramp_time=0
>> runtime=30
>> ;size=1G
>> ;iodepth=1
>> rw=randread
>> bs=4k
>> 
>> [test]
>> numjobs=1
>> 
>> Changes since v2:
>>  - Do not ignore admin queue updates in nvme_process_db and nvme_post_cqes
>>  - Calculate db_addr and ei_addr in hard-coded way
>> 
>> Changes since v1:
>>  - Add compatibility with hosts that do not use admin queue shadow doorbell
>> 
>> Jinhao Fan (2):
>>  hw/nvme: Implement shadow doorbell buffer support
>>  hw/nvme: Add trace events for shadow doorbell buffer
>> 
>> hw/nvme/ctrl.c   | 118 ++-
>> hw/nvme/nvme.h   |   8 +++
>> hw/nvme/trace-events |   5 ++
>> include/block/nvme.h |   2 +
>> 4 files changed, 132 insertions(+), 1 deletion(-)
>> 
>> -- 
>> 2.25.1
>> 
> 
> LGTM,
> 
> Reviewed-by: Klaus Jensen 
> 

Great!

I have two questions:

How many “Reviewed-by”’s do I need to get my patch applied?

Do I need to post a v4 patch to add the “Reviewed-by”’s in my commit 
message?

Thanks,
Jinhao Fan




Re: [PATCH v1] MAINTAINERS: Collect memory device files in "Memory devices"

2022-06-17 Thread Jonathan Cameron via
On Fri, 17 Jun 2022 14:31:51 +0200
David Hildenbrand  wrote:

> Xiao Guangrong doesn't have enough time to actively review or contribute
> to our NVDIMM implementation. Let's dissolve the "NVDIMM" section, moving
> relevant ACPI parts to "ACPI/SMBIOS" and moving memory device stuff into a
> new "Memory devices" section. Make that new section cover other memory
> device stuff as well.
> 
> We can now drop the "hw/mem/*" rule from "ACPI/SMBIOS". Note that
> hw/acpi/nvdimm.c is already covered by "ACPI/SMBIOS".
> 
> The following files in hw/mem don't fall into the TYPE_MEMPORY_DEVICE
> category:
> * hw/mem/cxl_type3.c is CXL specific and belongs to "Compute Express Link"
> * hw/mem/sparse-mem.c is already covered by "Device Fuzzing"
> * hw/mem/npcm7xx_mc.c is already covered by "Nuvoton NPCM7xx"
> 
> Thanks Xiao for your work on NVDIMM!
> 
> Cc: Ben Widawsky 
> Cc: Jonathan Cameron 
> Cc: Michael S. Tsirkin 
> Cc: Igor Mammedov 
> Cc: Ani Sinha 
> Cc: Xiao Guangrong 
> Cc: "Philippe Mathieu-Daudé" 
> Cc: Richard Henderson 
> Cc: Peter Maydell 
> Cc: Julia Suvorova 
> Signed-off-by: David Hildenbrand 

Acked-by: Jonathan Cameron 

> ---
>  MAINTAINERS | 25 +++--
>  1 file changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index aaa649a50d..909e8dbb1b 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1840,7 +1840,6 @@ R: Ani Sinha 
>  S: Supported
>  F: include/hw/acpi/*
>  F: include/hw/firmware/smbios.h
> -F: hw/mem/*
>  F: hw/acpi/*
>  F: hw/smbios/*
>  F: hw/i386/acpi-build.[hc]
> @@ -1851,6 +1850,7 @@ F: tests/qtest/acpi-utils.[hc]
>  F: tests/data/acpi/
>  F: docs/specs/acpi_cpu_hotplug.rst
>  F: docs/specs/acpi_mem_hotplug.rst
> +F: docs/specs/acpi_nvdimm.rst
>  F: docs/specs/acpi_pci_hotplug.rst
>  F: docs/specs/acpi_hw_reduced_hotplug.rst
>  
> @@ -2158,15 +2158,6 @@ F: qapi/rocker.json
>  F: tests/rocker/
>  F: docs/specs/rocker.txt
>  
> -NVDIMM
> -M: Xiao Guangrong 
> -S: Maintained
> -F: hw/acpi/nvdimm.c
> -F: hw/mem/nvdimm.c
> -F: include/hw/mem/nvdimm.h
> -F: docs/nvdimm.txt
> -F: docs/specs/acpi_nvdimm.rst
> -
>  e1000x
>  M: Dmitry Fleytman 
>  S: Maintained
> @@ -2588,6 +2579,7 @@ M: Ben Widawsky 
>  M: Jonathan Cameron 
>  S: Supported
>  F: hw/cxl/
> +F: hw/mem/cxl_type3.c
>  F: include/hw/cxl/
>  
>  Dirty Bitmaps
> @@ -2704,6 +2696,19 @@ F: softmmu/physmem.c
>  F: include/exec/memory-internal.h
>  F: scripts/coccinelle/memory-region-housekeeping.cocci
>  
> +Memory devices
> +M: David Hildenbrand 
> +M: Igor Mammedov 
> +R: Xiao Guangrong 
> +S: Supported
> +F: hw/mem/memory-device.c
> +F: hw/mem/nvdimm.c
> +F: hw/mem/pc-dimm.c
> +F: include/hw/mem/memory-device.h
> +F: include/hw/mem/nvdimm.h
> +F: include/hw/mem/pc-dimm.h
> +F: docs/nvdimm.txt
> +
>  SPICE
>  M: Gerd Hoffmann 
>  S: Odd Fixes




Re: [PATCH v2] xen/pass-through: merge emulated bits correctly

2022-06-17 Thread Anthony PERARD via
On Sat, Jun 11, 2022 at 12:43:29PM -0400, Chuck Zmudzinski wrote:
> In xen_pt_config_reg_init(), there is an error in the merging of the
> emulated data with the host value. With the current Qemu, instead of
> merging the emulated bits with the host bits as defined by emu_mask,
> the emulated bits are merged with the host bits as defined by the
> inverse of emu_mask. In some cases, depending on the data in the
> registers on the host, the way the registers are setup, and the
> initial values of the emulated bits, the end result will be that
> the register is initialized with the wrong value.
> 
> To correct this error, use the XEN_PT_MERGE_VALUE macro to help ensure
> the merge is done correctly.
> 
> This correction is needed to resolve Qemu project issue #1061, which
> describes the failure of Xen HVM Linux guests to boot in certain
> configurations with passed through PCI devices, that is, when this error
> disables instead of enables the PCI_STATUS_CAP_LIST bit of the
> PCI_STATUS register of a passed through PCI device, which in turn
> disables the MSI-X capability of the device in Linux guests with the end
> result being that the Linux guest never completes the boot process.
> 
> Fixes: 2e87512eccf3
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1061
> Buglink: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988333
> 
> Signed-off-by: Chuck Zmudzinski 

Reviewed-by: Anthony PERARD 

Thank you, looks like it's been a long quest to figure this one out.

-- 
Anthony PERARD



Re: [PATCH v1] MAINTAINERS: Collect memory device files in "Memory devices"

2022-06-17 Thread Igor Mammedov
On Fri, 17 Jun 2022 14:31:51 +0200
David Hildenbrand  wrote:

> Xiao Guangrong doesn't have enough time to actively review or contribute
> to our NVDIMM implementation. Let's dissolve the "NVDIMM" section, moving
> relevant ACPI parts to "ACPI/SMBIOS" and moving memory device stuff into a
> new "Memory devices" section. Make that new section cover other memory
> device stuff as well.
> 
> We can now drop the "hw/mem/*" rule from "ACPI/SMBIOS". Note that
> hw/acpi/nvdimm.c is already covered by "ACPI/SMBIOS".
> 
> The following files in hw/mem don't fall into the TYPE_MEMPORY_DEVICE
> category:
> * hw/mem/cxl_type3.c is CXL specific and belongs to "Compute Express Link"
> * hw/mem/sparse-mem.c is already covered by "Device Fuzzing"
> * hw/mem/npcm7xx_mc.c is already covered by "Nuvoton NPCM7xx"
> 
> Thanks Xiao for your work on NVDIMM!
> 
> Cc: Ben Widawsky 
> Cc: Jonathan Cameron 
> Cc: Michael S. Tsirkin 
> Cc: Igor Mammedov 
> Cc: Ani Sinha 
> Cc: Xiao Guangrong 
> Cc: "Philippe Mathieu-Daudé" 
> Cc: Richard Henderson 
> Cc: Peter Maydell 
> Cc: Julia Suvorova 
> Signed-off-by: David Hildenbrand 


Acked-by: Igor Mammedov 

> ---
>  MAINTAINERS | 25 +++--
>  1 file changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index aaa649a50d..909e8dbb1b 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1840,7 +1840,6 @@ R: Ani Sinha 
>  S: Supported
>  F: include/hw/acpi/*
>  F: include/hw/firmware/smbios.h
> -F: hw/mem/*
>  F: hw/acpi/*
>  F: hw/smbios/*
>  F: hw/i386/acpi-build.[hc]
> @@ -1851,6 +1850,7 @@ F: tests/qtest/acpi-utils.[hc]
>  F: tests/data/acpi/
>  F: docs/specs/acpi_cpu_hotplug.rst
>  F: docs/specs/acpi_mem_hotplug.rst
> +F: docs/specs/acpi_nvdimm.rst
>  F: docs/specs/acpi_pci_hotplug.rst
>  F: docs/specs/acpi_hw_reduced_hotplug.rst
>  
> @@ -2158,15 +2158,6 @@ F: qapi/rocker.json
>  F: tests/rocker/
>  F: docs/specs/rocker.txt
>  
> -NVDIMM
> -M: Xiao Guangrong 
> -S: Maintained
> -F: hw/acpi/nvdimm.c
> -F: hw/mem/nvdimm.c
> -F: include/hw/mem/nvdimm.h
> -F: docs/nvdimm.txt
> -F: docs/specs/acpi_nvdimm.rst
> -
>  e1000x
>  M: Dmitry Fleytman 
>  S: Maintained
> @@ -2588,6 +2579,7 @@ M: Ben Widawsky 
>  M: Jonathan Cameron 
>  S: Supported
>  F: hw/cxl/
> +F: hw/mem/cxl_type3.c
>  F: include/hw/cxl/
>  
>  Dirty Bitmaps
> @@ -2704,6 +2696,19 @@ F: softmmu/physmem.c
>  F: include/exec/memory-internal.h
>  F: scripts/coccinelle/memory-region-housekeeping.cocci
>  
> +Memory devices
> +M: David Hildenbrand 
> +M: Igor Mammedov 
> +R: Xiao Guangrong 
> +S: Supported
> +F: hw/mem/memory-device.c
> +F: hw/mem/nvdimm.c
> +F: hw/mem/pc-dimm.c
> +F: include/hw/mem/memory-device.h
> +F: include/hw/mem/nvdimm.h
> +F: include/hw/mem/pc-dimm.h
> +F: docs/nvdimm.txt
> +
>  SPICE
>  M: Gerd Hoffmann 
>  S: Odd Fixes




Re: Corrupted display changing screen colour depth in qemu-system-ppc/MacOS

2022-06-17 Thread Marc-André Lureau
Hi

On Fri, Jun 17, 2022 at 1:56 PM Gerd Hoffmann  wrote:
>
>   Hi,
>
> > > Can you try ditch the QEMU_ALLOCATED_FLAG check added by the commit?
> >
> > Commit cb8962c146 drops the QEMU_ALLOCATED_FLAG check: if I add it back in
> > with the following diff on top then everything works again:
>
> Ah, the other way around.
>
> > diff --git a/ui/console.c b/ui/console.c
> > index 365a2c14b8..decae4287f 100644
> > --- a/ui/console.c
> > +++ b/ui/console.c
> > @@ -2400,11 +2400,12 @@ static void vc_chr_open(Chardev *chr,
> >
> >  void qemu_console_resize(QemuConsole *s, int width, int height)
> >  {
> > -DisplaySurface *surface;
> > +DisplaySurface *surface = qemu_console_surface(s);
> >
> >  assert(s->console_type == GRAPHIC_CONSOLE);
> >
> > -if (qemu_console_get_width(s, -1) == width &&
> > +if (surface && (surface->flags & QEMU_ALLOCATED_FLAG) &&
> > +qemu_console_get_width(s, -1) == width &&
> >  qemu_console_get_height(s, -1) == height) {
> >  return;
> >  }
> >
> > > Which depth changes triggers this?  Going from direct color to a
> > > paletted mode?
> >
> > A quick test suggests anything that isn't 32-bit colour is affected.
>
> Hmm, I think the commit should simply be reverted.
>
> Short-cutting the qemu_console_resize() call is only valid in case the
> current surface was created by qemu_console_resize() too.  When it is
> something else -- typically a surface backed by vga vram -- it's not.
> Looking at the QEMU_ALLOCATED_FLAG checks exactly that ...

Oh ok, it might be worth adding a comment to clarify that. By
reverting, we are going back to the situation where
qemu_console_resize() will create a needless surface when rendering
with GL. As I tried to explain in the commit message, it will need
more changes to prevent that. I can take a look later.




[RFC PATCH v2 7/8] qapi: golang: Add CommandResult type to Go

2022-06-17 Thread Victor Toso
This patch adds a struct type in Go that will handle return values for
QAPI's command types.

The return value of a Command is, encouraged to be, QAPI's complex
types or an Array of those.

Every Command has a underlying CommandResult. The EmptyCommandReturn
is for those that don't expect any data e.g: `{ "return": {} }`.

All CommandReturn types implement the CommandResult interface.

Example:
qapi:
  | { 'command': 'query-sev', 'returns': 'SevInfo',
  |   'if': 'TARGET_I386' }

go:
  | type QuerySevCommandReturn struct {
  | CommandId string `json:"id,omitempty"`
  | Result*SevInfo   `json:"return"`
  | Error *QapiError `json:"error,omitempty"`
  | }

usage:
  | // One can use QuerySevCommandReturn directly or
  | // command's interface GetReturnType() instead.
  |
  | input := `{ "return": { "enabled": true, "api-major" : 0,` +
  |`"api-minor" : 0, "build-id" : 0,` +
  |`"policy" : 0, "state" : "running",` +
  |`"handle" : 1 } } `
  | ret := QuerySevCommandReturn{}
  | err := json.Unmarshal([]byte(input), )
  | if ret.Error != nil {
  | // Handle command failure {"error": { ...}}
  | } else if ret.Result != nil {
  | // ret.Result.Enable == true
  | }

Signed-off-by: Victor Toso 
---
 scripts/qapi/golang.py | 73 --
 1 file changed, 70 insertions(+), 3 deletions(-)

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
index 123179cced..ab91cf124f 100644
--- a/scripts/qapi/golang.py
+++ b/scripts/qapi/golang.py
@@ -89,7 +89,8 @@
 }}
 '''
 
-# Only variable is @unm_cases to handle all command's names and associated 
types.
+# Only variable is @unm_cases to handle
+# all command's names and associated types.
 TEMPLATE_COMMAND = '''
 type Command interface {{
 GetId() string
@@ -145,10 +146,49 @@
 }}
 '''
 
+TEMPLATE_COMMAND_RETURN = '''
+type CommandReturn interface {
+GetId()  string
+GetCommandName() string
+GetError()   error
+}
+
+type EmptyCommandReturn struct {
+CommandId string  `json:"id,omitempty"`
+Error *QapiError  `json:"error,omitempty"`
+Name  string  `json:"-"`
+}
+
+func (r EmptyCommandReturn) MarshalJSON() ([]byte, error) {
+return []byte(`{"return":{}}`), nil
+}
+
+func (r *EmptyCommandReturn) GetId() string {
+return r.CommandId
+}
+
+func (r *EmptyCommandReturn) GetCommandName() string {
+return r.Name
+}
+
+func (r *EmptyCommandReturn) GetError() error {
+return r.Error
+}
+'''
+
 TEMPLATE_HELPER = '''
 // Alias for go version lower than 1.18
 type Any = interface{}
 
+type QapiError struct {
+Class   string `json:"class"`
+Description string `json:"desc"`
+}
+
+func (err *QapiError) Error() string {
+return fmt.Sprintf("%s: %s", err.Class, err.Description)
+}
+
 // Creates a decoder that errors on unknown Fields
 // Returns true if successfully decoded @from string @into type
 // Returns false without error is failed with "unknown field"
@@ -176,6 +216,7 @@ def __init__(self, prefix: str):
 self.schema = None
 self.events = {}
 self.commands = {}
+self.command_results = {}
 self.golang_package_name = "qapi"
 
 def visit_begin(self, schema):
@@ -224,6 +265,7 @@ def visit_end(self):
 '''
 self.target["command"] += TEMPLATE_COMMAND.format(unm_cases=unm_cases)
 
+self.target["command"] += TEMPLATE_COMMAND_RETURN
 
 def visit_object_type(self: QAPISchemaGenGolangVisitor,
   name: str,
@@ -390,6 +432,31 @@ def visit_command(self,
 self.commands[name] = type_name
 command_ret = ""
 init_ret_type_name = f'''EmptyCommandReturn {{ Name: "{name}" }}'''
+if ret_type:
+cmd_ret_name = qapi_to_go_type_name(name, "command return")
+ret_type_name = qapi_schema_type_to_go_type(ret_type.name)
+init_ret_type_name = f'''{cmd_ret_name}{{}}'''
+isptr = "*" if ret_type_name[0] not in "*[" else ""
+self.command_results[name] = ret_type_name
+command_ret = f'''
+type {cmd_ret_name} struct {{
+CommandId  string`json:"id,omitempty"`
+Result{isptr}{ret_type_name} `json:"return"`
+Error *QapiError `json:"error,omitempty"`
+}}
+
+func (r *{cmd_ret_name}) GetCommandName() string {{
+return "{name}"
+}}
+
+func (r *{cmd_ret_name}) GetId() string {{
+return r.CommandId
+}}
+
+func (r *{cmd_ret_name}) GetError() error {{
+return r.Error
+}}
+'''
 
 self_contained = True
 if arg_type and arg_type.name.startswith("q_obj"):
@@ -423,7 +490,7 @@ def visit_command(self,
 return &{init_ret_type_name}
 }}
 '''
-self.target["command"] += content + methods
+self.target["command"] += content + methods + command_ret
 
 def visit_event(self, name, info, ifcond, features, 

[RFC PATCH v2 6/8] qapi: golang: Generate qapi's command types in Go

2022-06-17 Thread Victor Toso
This patch handles QAPI command types and generates data structures in
Go that decodes from QMP JSON Object to Go data structure and vice
versa.

Simlar to Event, this patch adds a Command interface and two helper
functions MarshalCommand and UnmarshalCommand.

At the time of this writing, it generates 209 structures.

Example:

qapi:
  | { 'command': 'set_password',
  |   'boxed': true,
  |   'data': 'SetPasswordOptions' }

go:
  | type SetPasswordCommand struct {
  | SetPasswordOptions
  | CommandId string `json:"-"`
  | }

usage:
  | input := `{"execute":"set_password",` +
  | `"arguments":{"protocol":"vnc","password":"secret"}}`
  | c, err := UnmarshalCommand([]byte(input))
  | if err != nil {
  | panic(err)
  | }
  | if c.GetName() == `set_password` {
  | m := c.(*SetPasswordCommand)
  | // m.Password == "secret"
  | }

Signed-off-by: Victor Toso 
---
 scripts/qapi/golang.py | 123 -
 1 file changed, 120 insertions(+), 3 deletions(-)

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
index b2e08cebdf..123179cced 100644
--- a/scripts/qapi/golang.py
+++ b/scripts/qapi/golang.py
@@ -88,6 +88,63 @@
 return nil, errors.New("Failed to recognize event")
 }}
 '''
+
+# Only variable is @unm_cases to handle all command's names and associated 
types.
+TEMPLATE_COMMAND = '''
+type Command interface {{
+GetId() string
+GetName()   string
+GetReturnType() CommandReturn
+}}
+
+func MarshalCommand(c Command) ([]byte, error) {{
+baseStruct := struct {{
+CommandId   string `json:"id,omitempty"`
+Namestring `json:"execute"`
+}}{{
+CommandId: c.GetId(),
+Name:  c.GetName(),
+}}
+base, err := json.Marshal(baseStruct)
+if err != nil {{
+return []byte{{}}, err
+}}
+
+argsStruct := struct {{
+Args Command `json:"arguments,omitempty"`
+}}{{
+Args: c,
+}}
+args, err := json.Marshal(argsStruct)
+if err != nil {{
+return []byte{{}}, err
+}}
+
+if len(args) == len(`{{"arguments":{{`) {{
+return base, nil
+}}
+
+// Combines Event's base and data in a single JSON object
+result := fmt.Sprintf("%s,%s", base[:len(base)-1], args[1:])
+return []byte(result), nil
+}}
+
+func UnmarshalCommand(data []byte) (Command, error) {{
+base := struct {{
+CommandId string `json:"id,omitempty"`
+Name  string `json:"execute"`
+}}{{}}
+if err := json.Unmarshal(data, ); err != nil {{
+return nil, errors.New(fmt.Sprintf("Failed to decode command: %s", 
string(data)))
+}}
+
+switch base.Name {{
+{unm_cases}
+}}
+return nil, errors.New("Failed to recognize command")
+}}
+'''
+
 TEMPLATE_HELPER = '''
 // Alias for go version lower than 1.18
 type Any = interface{}
@@ -112,12 +169,13 @@ class QAPISchemaGenGolangVisitor(QAPISchemaVisitor):
 
 def __init__(self, prefix: str):
 super().__init__()
-self.target = {name: "" for name in ["alternate", "enum",
+self.target = {name: "" for name in ["alternate", "command", "enum",
  "event", "helper", "struct",
  "union"]}
 self.objects_seen = {}
 self.schema = None
 self.events = {}
+self.commands = {}
 self.golang_package_name = "qapi"
 
 def visit_begin(self, schema):
@@ -149,6 +207,23 @@ def visit_end(self):
 '''
 self.target["event"] += TEMPLATE_EVENT.format(unm_cases=unm_cases)
 
+unm_cases = ""
+for name in sorted(self.commands):
+case_type = self.commands[name]
+unm_cases += f'''
+case "{name}":
+command := struct {{
+Args {case_type} `json:"arguments"`
+}}{{}}
+
+if err := json.Unmarshal(data, ); err != nil {{
+return nil, errors.New(fmt.Sprintf("Failed to unmarshal: %s", 
string(data)))
+}}
+command.Args.CommandId = base.CommandId
+return , nil
+'''
+self.target["command"] += TEMPLATE_COMMAND.format(unm_cases=unm_cases)
+
 
 def visit_object_type(self: QAPISchemaGenGolangVisitor,
   name: str,
@@ -308,7 +383,47 @@ def visit_command(self,
   allow_oob: bool,
   allow_preconfig: bool,
   coroutine: bool) -> None:
-pass
+# Safety check
+assert name == info.defn_name
+
+type_name = qapi_to_go_type_name(name, info.defn_meta)
+self.commands[name] = type_name
+command_ret = ""
+init_ret_type_name = f'''EmptyCommandReturn {{ Name: "{name}" }}'''
+
+self_contained = True
+if arg_type and arg_type.name.startswith("q_obj"):
+self_contained = False
+
+content = ""
+if boxed or self_contained:
+args = "" if 

[PATCH v1] MAINTAINERS: Collect memory device files in "Memory devices"

2022-06-17 Thread David Hildenbrand
Xiao Guangrong doesn't have enough time to actively review or contribute
to our NVDIMM implementation. Let's dissolve the "NVDIMM" section, moving
relevant ACPI parts to "ACPI/SMBIOS" and moving memory device stuff into a
new "Memory devices" section. Make that new section cover other memory
device stuff as well.

We can now drop the "hw/mem/*" rule from "ACPI/SMBIOS". Note that
hw/acpi/nvdimm.c is already covered by "ACPI/SMBIOS".

The following files in hw/mem don't fall into the TYPE_MEMPORY_DEVICE
category:
* hw/mem/cxl_type3.c is CXL specific and belongs to "Compute Express Link"
* hw/mem/sparse-mem.c is already covered by "Device Fuzzing"
* hw/mem/npcm7xx_mc.c is already covered by "Nuvoton NPCM7xx"

Thanks Xiao for your work on NVDIMM!

Cc: Ben Widawsky 
Cc: Jonathan Cameron 
Cc: Michael S. Tsirkin 
Cc: Igor Mammedov 
Cc: Ani Sinha 
Cc: Xiao Guangrong 
Cc: "Philippe Mathieu-Daudé" 
Cc: Richard Henderson 
Cc: Peter Maydell 
Cc: Julia Suvorova 
Signed-off-by: David Hildenbrand 
---
 MAINTAINERS | 25 +++--
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index aaa649a50d..909e8dbb1b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1840,7 +1840,6 @@ R: Ani Sinha 
 S: Supported
 F: include/hw/acpi/*
 F: include/hw/firmware/smbios.h
-F: hw/mem/*
 F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
@@ -1851,6 +1850,7 @@ F: tests/qtest/acpi-utils.[hc]
 F: tests/data/acpi/
 F: docs/specs/acpi_cpu_hotplug.rst
 F: docs/specs/acpi_mem_hotplug.rst
+F: docs/specs/acpi_nvdimm.rst
 F: docs/specs/acpi_pci_hotplug.rst
 F: docs/specs/acpi_hw_reduced_hotplug.rst
 
@@ -2158,15 +2158,6 @@ F: qapi/rocker.json
 F: tests/rocker/
 F: docs/specs/rocker.txt
 
-NVDIMM
-M: Xiao Guangrong 
-S: Maintained
-F: hw/acpi/nvdimm.c
-F: hw/mem/nvdimm.c
-F: include/hw/mem/nvdimm.h
-F: docs/nvdimm.txt
-F: docs/specs/acpi_nvdimm.rst
-
 e1000x
 M: Dmitry Fleytman 
 S: Maintained
@@ -2588,6 +2579,7 @@ M: Ben Widawsky 
 M: Jonathan Cameron 
 S: Supported
 F: hw/cxl/
+F: hw/mem/cxl_type3.c
 F: include/hw/cxl/
 
 Dirty Bitmaps
@@ -2704,6 +2696,19 @@ F: softmmu/physmem.c
 F: include/exec/memory-internal.h
 F: scripts/coccinelle/memory-region-housekeeping.cocci
 
+Memory devices
+M: David Hildenbrand 
+M: Igor Mammedov 
+R: Xiao Guangrong 
+S: Supported
+F: hw/mem/memory-device.c
+F: hw/mem/nvdimm.c
+F: hw/mem/pc-dimm.c
+F: include/hw/mem/memory-device.h
+F: include/hw/mem/nvdimm.h
+F: include/hw/mem/pc-dimm.h
+F: docs/nvdimm.txt
+
 SPICE
 M: Gerd Hoffmann 
 S: Odd Fixes
-- 
2.35.3




Re: [PATCH v5 4/5] i386/pc: relocate 4g start to 1T where applicable

2022-06-17 Thread Igor Mammedov
On Fri, 17 Jun 2022 13:18:38 +0100
Joao Martins  wrote:

> On 6/16/22 15:23, Igor Mammedov wrote:
> > On Fri, 20 May 2022 11:45:31 +0100
> > Joao Martins  wrote:
> >   
> >> It is assumed that the whole GPA space is available to be DMA
> >> addressable, within a given address space limit, expect for a  
> >^^^ typo?
> >   
> Yes, it should have been 'except'.
> 
> >> tiny region before the 4G. Since Linux v5.4, VFIO validates
> >> whether the selected GPA is indeed valid i.e. not reserved by
> >> IOMMU on behalf of some specific devices or platform-defined
> >> restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with
> >>  -EINVAL.
> >>
> >> AMD systems with an IOMMU are examples of such platforms and
> >> particularly may only have these ranges as allowed:
> >>
> >> - fedf (0  .. 3.982G)
> >>fef0 - 00fc (3.983G .. 1011.9G)
> >>0100 -  (1Tb.. 16Pb[*])
> >>
> >> We already account for the 4G hole, albeit if the guest is big
> >> enough we will fail to allocate a guest with  >1010G due to the
> >> ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT).
> >>
> >> [*] there is another reserved region unrelated to HT that exists
> >> in the 256T boundaru in Fam 17h according to Errata #1286,  
> >   ^ ditto
> >   
> Fixed.
> 
> >> documeted also in "Open-Source Register Reference for AMD Family
> >> 17h Processors (PUB)"
> >>
> >> When creating the region above 4G, take into account that on AMD
> >> platforms the HyperTransport range is reserved and hence it
> >> cannot be used either as GPAs. On those cases rather than
> >> establishing the start of ram-above-4g to be 4G, relocate instead
> >> to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical
> >> Topology", for more information on the underlying restriction of
> >> IOVAs.
> >>
> >> After accounting for the 1Tb hole on AMD hosts, mtree should
> >> look like:
> >>
> >> -7fff (prio 0, i/o):
> >> alias ram-below-4g @pc.ram -7fff
> >> 0100-01ff7fff (prio 0, i/o):
> >>alias ram-above-4g @pc.ram 8000-00ff
> >>
> >> If the relocation is done, we also add the the reserved HT
> >> e820 range as reserved.
> >>
> >> Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough
> >> to address 1Tb (0xff  ). On AMD platforms, if a
> >> ram-above-4g relocation may be desired and the CPU wasn't configured
> >> with a big enough phys-bits, print an error message to the user
> >> and do not make the relocation of the above-4g-region if phys-bits
> >> is too low.
> >>
> >> Suggested-by: Igor Mammedov 
> >> Signed-off-by: Joao Martins 
> >> ---
> >>  hw/i386/pc.c | 111 +++
> >>  1 file changed, 111 insertions(+)
> >>
> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> >> index af52d4ff89ef..652ae8ff9ccf 100644
> >> --- a/hw/i386/pc.c
> >> +++ b/hw/i386/pc.c
> >> @@ -796,6 +796,110 @@ void xen_load_linux(PCMachineState *pcms)
> >>  #define PC_ROM_ALIGN   0x800
> >>  #define PC_ROM_SIZE(PC_ROM_MAX - PC_ROM_MIN_VGA)
> >>  
> >> +/*
> >> + * AMD systems with an IOMMU have an additional hole close to the
> >> + * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
> >> + * on kernel version, VFIO may or may not let you DMA map those ranges.
> >> + * Starting Linux v5.4 we validate it, and can't create guests on AMD 
> >> machines
> >> + * with certain memory sizes. It's also wrong to use those IOVA ranges
> >> + * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
> >> + * The ranges reserved for Hyper-Transport are:
> >> + *
> >> + * FD__h - FF__h
> >> + *
> >> + * The ranges represent the following:
> >> + *
> >> + * Base Address   Top Address  Use
> >> + *
> >> + * FD__h FD_F7FF_h Reserved interrupt address space
> >> + * FD_F800_h FD_F8FF_h Interrupt/EOI IntCtl
> >> + * FD_F900_h FD_F90F_h Legacy PIC IACK
> >> + * FD_F910_h FD_F91F_h System Management
> >> + * FD_F920_h FD_FAFF_h Reserved Page Tables
> >> + * FD_FB00_h FD_FBFF_h Address Translation
> >> + * FD_FC00_h FD_FDFF_h I/O Space
> >> + * FD_FE00_h FD__h Configuration
> >> + * FE__h FE_1FFF_h Extended Configuration/Device Messages
> >> + * FE_2000_h FF__h Reserved
> >> + *
> >> + * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
> >> + * Table 3: Special Address Controls (GPA) for more information.
> >> + */
> >> +#define AMD_HT_START 0xfdUL
> >> +#define AMD_HT_END   0xffUL
> >> +#define AMD_ABOVE_1TB_START  (AMD_HT_END + 1)
> >> +#define AMD_HT_SIZE  (AMD_ABOVE_1TB_START - AMD_HT_START)
> >> +
> >> +static hwaddr x86_max_phys_addr(PCMachineState *pcms,  
> > 
> > 

[RFC PATCH v2 3/8] qapi: golang: Generate qapi's struct types in Go

2022-06-17 Thread Victor Toso
This patch handles QAPI struct types and generates the equivalent
types in Go.

At the time of this writing, it generates 388 structures.

The highlights of this implementation are:

1. Generating an Go struct that requires a @base type, the @base type
   fields are copied over to the Go struct. The advantage of this
   approach is to not have embed structs in any of the QAPI types.
   The downside are some generated Types that are likely useless now,
   like InetSocketAddressBase from InetSocketAddress.

2. About the Go struct's fields:

  i) They can be either by Value or Reference.

  ii) Every field that is marked as optional in the QAPI specification
  are translated to Reference fields in its Go structure. This design
  decision is the most straightforward way to check if a given field
  was set or not.

  iii) Mandatory fields are always by Value with the exception of QAPI
  arrays, which are handled by Reference (to a block of memory) by Go.

  iv) All the fields are named with Uppercase due Golang's export
  convention.

  v) In order to avoid any kind of issues when encoding ordecoding, to
  or from JSON, we mark all fields with its @name and, when it is
  optional, member, with @omitempty

Example:

qapi:
  | { 'struct': 'BlockdevCreateOptionsFile',
  |   'data': { 'filename': 'str',
  | 'size': 'size',
  | '*preallocation':   'PreallocMode',
  | '*nocow':   'bool',
  | '*extent-size-hint':'size'} }

go:
  | type BlockdevCreateOptionsFile struct {
  | Filename   string`json:"filename"`
  | Size   uint64`json:"size"`
  | Preallocation  *PreallocMode `json:"preallocation,omitempty"`
  | Nocow  *bool `json:"nocow,omitempty"`
  | ExtentSizeHint *uint64   `json:"extent-size-hint,omitempty"`
  | }

Signed-off-by: Victor Toso 
---
 scripts/qapi/golang.py | 117 -
 1 file changed, 115 insertions(+), 2 deletions(-)

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
index 37d7c062c9..1ab0c0bb46 100644
--- a/scripts/qapi/golang.py
+++ b/scripts/qapi/golang.py
@@ -53,7 +53,7 @@ class QAPISchemaGenGolangVisitor(QAPISchemaVisitor):
 
 def __init__(self, prefix: str):
 super().__init__()
-self.target = {name: "" for name in ["alternate", "enum", "helper"]}
+self.target = {name: "" for name in ["alternate", "enum", "helper", 
"struct"]}
 self.objects_seen = {}
 self.schema = None
 self.golang_package_name = "qapi"
@@ -79,7 +79,37 @@ def visit_object_type(self: QAPISchemaGenGolangVisitor,
   members: List[QAPISchemaObjectTypeMember],
   variants: Optional[QAPISchemaVariants]
   ) -> None:
-pass
+# Do not handle anything besides structs
+if (name == self.schema.the_empty_object_type.name or
+not isinstance(name, str) or
+info.defn_meta not in ["struct"]):
+return
+
+# Safety checks.
+assert name not in self.objects_seen
+self.objects_seen[name] = True
+
+# visit all inner objects as well, they are not going to be
+# called by python's generator.
+if variants:
+for var in variants.variants:
+assert isinstance(var.type, QAPISchemaObjectType)
+self.visit_object_type(self,
+   var.type.name,
+   var.type.info,
+   var.type.ifcond,
+   var.type.base,
+   var.type.local_members,
+   var.type.variants)
+
+# Save generated Go code to be written later
+self.target[info.defn_meta] += qapi_to_golang_struct(name,
+ info,
+ ifcond,
+ features,
+ base,
+ members,
+ variants)
 
 def visit_alternate_type(self: QAPISchemaGenGolangVisitor,
  name: str,
@@ -223,6 +253,72 @@ def generate_struct_type(type_name, args="") -> str:
 '''
 
 
+# Helper function that is used for most of QAPI types
+def qapi_to_golang_struct(name: str,
+  info: Optional[QAPISourceInfo],
+  ifcond: QAPISchemaIfCond,
+  features: List[QAPISchemaFeature],
+  base: Optional[QAPISchemaObjectType],
+  members: 

[RFC PATCH v2 8/8] qapi: golang: document skip function visit_array_types

2022-06-17 Thread Victor Toso
Signed-off-by: Victor Toso 
---
 scripts/qapi/golang.py | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
index ab91cf124f..f37014f52b 100644
--- a/scripts/qapi/golang.py
+++ b/scripts/qapi/golang.py
@@ -410,7 +410,12 @@ def visit_enum_type(self: QAPISchemaGenGolangVisitor,
 '''
 
 def visit_array_type(self, name, info, ifcond, element_type):
-pass
+# TLDR: We don't need to any extra boilerplate in Go to handle Arrays.
+#
+# This function is implemented just to be sure that:
+# 1. Every array type ends with List
+# 2. Every array type's element is the array type without 'List'
+assert name.endswith("List") and name[:-4] == element_type.name
 
 def visit_command(self,
   name: str,
-- 
2.36.1




[RFC PATCH v2 1/8] qapi: golang: Generate qapi's enum types in Go

2022-06-17 Thread Victor Toso
This patch handles QAPI enum types and generates its equivalent in Go.

Basically, Enums are being handled as strings in Golang.

1. For each QAPI enum, we will define a string type in Go to be the
   assigned type of this specific enum.

2. Naming: CamelCase will be used in any identifier that we want to
   export [0], which is everything.

[0] https://go.dev/ref/spec#Exported_identifiers

Example:

qapi:
  | { 'enum': 'DisplayProtocol',
  |   'data': [ 'vnc', 'spice' ] }

go:
  | type DisplayProtocol string
  |
  | const (
  | DisplayProtocolVnc   DisplayProtocol = "vnc"
  | DisplayProtocolSpice DisplayProtocol = "spice"
  | )

Signed-off-by: Victor Toso 
---
 scripts/qapi/golang.py | 134 +
 scripts/qapi/main.py   |   2 +
 2 files changed, 136 insertions(+)
 create mode 100644 scripts/qapi/golang.py

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
new file mode 100644
index 00..f2776520a1
--- /dev/null
+++ b/scripts/qapi/golang.py
@@ -0,0 +1,134 @@
+"""
+Golang QAPI generator
+"""
+# Copyright (c) 2022 Red Hat Inc.
+#
+# Authors:
+#  Victor Toso 
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+# Just for type hint on self
+from __future__ import annotations
+
+import os
+from typing import List, Optional
+
+from .schema import (
+QAPISchema,
+QAPISchemaType,
+QAPISchemaVisitor,
+QAPISchemaEnumMember,
+QAPISchemaFeature,
+QAPISchemaIfCond,
+QAPISchemaObjectType,
+QAPISchemaObjectTypeMember,
+QAPISchemaVariants,
+)
+from .source import QAPISourceInfo
+
+
+class QAPISchemaGenGolangVisitor(QAPISchemaVisitor):
+
+def __init__(self, prefix: str):
+super().__init__()
+self.target = {name: "" for name in ["enum"]}
+self.schema = None
+self.golang_package_name = "qapi"
+
+def visit_begin(self, schema):
+self.schema = schema
+
+# Every Go file needs to reference its package name
+for target in self.target:
+self.target[target] = f"package {self.golang_package_name}\n"
+
+def visit_end(self):
+self.schema = None
+
+def visit_object_type(self: QAPISchemaGenGolangVisitor,
+  name: str,
+  info: Optional[QAPISourceInfo],
+  ifcond: QAPISchemaIfCond,
+  features: List[QAPISchemaFeature],
+  base: Optional[QAPISchemaObjectType],
+  members: List[QAPISchemaObjectTypeMember],
+  variants: Optional[QAPISchemaVariants]
+  ) -> None:
+pass
+
+def visit_alternate_type(self: QAPISchemaGenGolangVisitor,
+ name: str,
+ info: Optional[QAPISourceInfo],
+ ifcond: QAPISchemaIfCond,
+ features: List[QAPISchemaFeature],
+ variants: QAPISchemaVariants
+ ) -> None:
+pass
+
+def visit_enum_type(self: QAPISchemaGenGolangVisitor,
+name: str,
+info: Optional[QAPISourceInfo],
+ifcond: QAPISchemaIfCond,
+features: List[QAPISchemaFeature],
+members: List[QAPISchemaEnumMember],
+prefix: Optional[str]
+) -> None:
+
+value = qapi_to_field_name_enum(members[0].name)
+fields = ""
+for member in members:
+value = qapi_to_field_name_enum(member.name)
+fields += f'''\t{name}{value} {name} = "{member.name}"\n'''
+
+self.target["enum"] += f'''
+type {name} string
+const (
+{fields[:-1]}
+)
+'''
+
+def visit_array_type(self, name, info, ifcond, element_type):
+pass
+
+def visit_command(self,
+  name: str,
+  info: Optional[QAPISourceInfo],
+  ifcond: QAPISchemaIfCond,
+  features: List[QAPISchemaFeature],
+  arg_type: Optional[QAPISchemaObjectType],
+  ret_type: Optional[QAPISchemaType],
+  gen: bool,
+  success_response: bool,
+  boxed: bool,
+  allow_oob: bool,
+  allow_preconfig: bool,
+  coroutine: bool) -> None:
+pass
+
+def visit_event(self, name, info, ifcond, features, arg_type, boxed):
+pass
+
+def write(self, output_dir: str) -> None:
+for module_name, content in self.target.items():
+go_module = module_name + "s.go"
+go_dir = "go"
+pathname = os.path.join(output_dir, go_dir, go_module)
+odir = os.path.dirname(pathname)
+

[RFC PATCH v2 4/8] qapi: golang: Generate qapi's union types in Go

2022-06-17 Thread Victor Toso
This patch handles QAPI union types and generates the equivalent data
structures and methods in Go to handle it.

At the moment of this writing, it generates 38 structures.

The QAPI union type has two types of fields: The @base and the
@variants members. The @base fields can be considered common members
for the union while only one field maximum is set for the @variants.

In the QAPI specification, it defines a @discriminator field, which is
an Enum type. The purpose of the  @discriminator is to identify which
@variant type is being used. The @discriminator is not used in the
generated union Go structs as it suffices to check which one of the
@variants fields were set.

The union types implement the Marshaler and Unmarshaler interfaces to
seamless decode from JSON objects to Golang structs and vice versa.

qapi:
  | { 'union': 'SetPasswordOptions',
  |   'base': { 'protocol': 'DisplayProtocol',
  | 'password': 'str',
  | '*connected': 'SetPasswordAction' },
  |   'discriminator': 'protocol',
  |   'data': { 'vnc': 'SetPasswordOptionsVnc' } }

go:
  | type SetPasswordOptions struct {
  | // Base fields
  | Password  string `json:"password"`
  | Connected *SetPasswordAction `json:"connected,omitempty"`
  |
  | // Variants fields
  | Vnc *SetPasswordOptionsVnc `json:"-"`
  | }

Signed-off-by: Victor Toso 
---
 scripts/qapi/golang.py | 112 ++---
 1 file changed, 105 insertions(+), 7 deletions(-)

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
index 1ab0c0bb46..6c6a5cea97 100644
--- a/scripts/qapi/golang.py
+++ b/scripts/qapi/golang.py
@@ -53,7 +53,8 @@ class QAPISchemaGenGolangVisitor(QAPISchemaVisitor):
 
 def __init__(self, prefix: str):
 super().__init__()
-self.target = {name: "" for name in ["alternate", "enum", "helper", 
"struct"]}
+self.target = {name: "" for name in ["alternate", "enum", "helper", 
"struct",
+ "union"]}
 self.objects_seen = {}
 self.schema = None
 self.golang_package_name = "qapi"
@@ -79,10 +80,14 @@ def visit_object_type(self: QAPISchemaGenGolangVisitor,
   members: List[QAPISchemaObjectTypeMember],
   variants: Optional[QAPISchemaVariants]
   ) -> None:
-# Do not handle anything besides structs
+# Do not handle anything besides struct and unions.
 if (name == self.schema.the_empty_object_type.name or
 not isinstance(name, str) or
-info.defn_meta not in ["struct"]):
+info.defn_meta not in ["struct", "union"]):
+return
+
+# Base structs are embed
+if qapi_name_is_base(name):
 return
 
 # Safety checks.
@@ -110,6 +115,10 @@ def visit_object_type(self: QAPISchemaGenGolangVisitor,
  base,
  members,
  variants)
+if info.defn_meta == "union":
+self.target[info.defn_meta] += qapi_to_golang_methods_union(name,
+info,
+
variants)
 
 def visit_alternate_type(self: QAPISchemaGenGolangVisitor,
  name: str,
@@ -311,14 +320,99 @@ def qapi_to_golang_struct(name: str,
 # Variant's are handled in the Marshal/Unmarshal methods
 fieldtag = '`json:"-"`'
 fields += f"\t{field} *{member_type}{fieldtag}\n"
-member_type = qapi_schema_type_to_go_type(var.type.name)
-# Variant's are handled in the Marshal/Unmarshal methods
-fieldtag = '`json:"-"`'
-fields += f"\t{field} *{member_type}{fieldtag}\n"
 
 return generate_struct_type(type_name, fields)
 
 
+def qapi_to_golang_methods_union(name: str,
+ info: Optional[QAPISourceInfo],
+ variants: Optional[QAPISchemaVariants]
+ ) -> str:
+
+type_name = qapi_to_go_type_name(name, info.defn_meta)
+
+driverCases = ""
+checkFields = ""
+if variants:
+for var in variants.variants:
+if var.type.is_implicit():
+continue
+
+field = qapi_to_field_name(var.name)
+member_type = qapi_schema_type_to_go_type(var.type.name)
+
+if len(checkFields) > 0:
+checkFields += "\t} else "
+checkFields += f'''if s.{field} != nil {{
+driver = "{var.name}"
+payload, err = json.Marshal(s.{field})
+'''
+# for Unmarshal method
+driverCases += f'''
+case "{var.name}":
+s.{field} = 

[RFC PATCH v2 2/8] qapi: golang: Generate qapi's alternate types in Go

2022-06-17 Thread Victor Toso
This patch handles QAPI alternate types and generates data structures
in Go that handles it.

At this moment, there are 5 alternates in qemu/qapi, they are:
 * BlockDirtyBitmapMergeSource
 * Qcow2OverlapChecks
 * BlockdevRef
 * BlockdevRefOrNull
 * StrOrNull

Alternate types are similar to Union but without a discriminator that
can be used to identify the underlying value on the wire. It is needed
to infer it. In Go, all the types are mapped as optional fields and
Marshal and Unmarshal methods will be handling the data checks.

Example:

qapi:
  | { 'alternate': 'BlockdevRef',
  |   'data': { 'definition': 'BlockdevOptions',
  | 'reference': 'str' } }

go:
  | type BlockdevRef struct {
  | Definition *BlockdevOptions
  | Reference  *string
  | }

usage:
  | input := `{"driver":"qcow2","data-file":"/some/place/my-image"}`
  | k := BlockdevRef{}
  | err := json.Unmarshal([]byte(input), )
  | if err != nil {
  | panic(err)
  | }
  | // *k.Definition.Qcow2.DataFile.Reference == "/some/place/my-image"

Signed-off-by: Victor Toso 
---
 scripts/qapi/golang.py | 119 -
 1 file changed, 117 insertions(+), 2 deletions(-)

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
index f2776520a1..37d7c062c9 100644
--- a/scripts/qapi/golang.py
+++ b/scripts/qapi/golang.py
@@ -29,11 +29,32 @@
 from .source import QAPISourceInfo
 
 
+TEMPLATE_HELPER = '''
+// Alias for go version lower than 1.18
+type Any = interface{}
+
+// Creates a decoder that errors on unknown Fields
+// Returns true if successfully decoded @from string @into type
+// Returns false without error is failed with "unknown field"
+// Returns false with error is a different error was found
+func StrictDecode(into interface{}, from []byte) error {
+dec := json.NewDecoder(strings.NewReader(string(from)))
+dec.DisallowUnknownFields()
+
+if err := dec.Decode(into); err != nil {
+return err
+}
+return nil
+}
+'''
+
+
 class QAPISchemaGenGolangVisitor(QAPISchemaVisitor):
 
 def __init__(self, prefix: str):
 super().__init__()
-self.target = {name: "" for name in ["enum"]}
+self.target = {name: "" for name in ["alternate", "enum", "helper"]}
+self.objects_seen = {}
 self.schema = None
 self.golang_package_name = "qapi"
 
@@ -44,6 +65,8 @@ def visit_begin(self, schema):
 for target in self.target:
 self.target[target] = f"package {self.golang_package_name}\n"
 
+self.target["helper"] += TEMPLATE_HELPER
+
 def visit_end(self):
 self.schema = None
 
@@ -65,7 +88,69 @@ def visit_alternate_type(self: QAPISchemaGenGolangVisitor,
  features: List[QAPISchemaFeature],
  variants: QAPISchemaVariants
  ) -> None:
-pass
+assert name not in self.objects_seen
+self.objects_seen[name] = True
+
+marshal_return_default = f'nil, errors.New("{name} has empty fields")'
+marshal_check_fields = ""
+unmarshal_check_fields = ""
+variant_fields = ""
+
+# We need to check if the Alternate type supports NULL as that
+# means that JSON to Go would allow all fields to be empty.
+# Alternate that don't support NULL, would fail to convert
+# to JSON if all fields were empty.
+return_on_null = f"errors.New(`null not supported for {name}`)"
+
+# Assembly the fields and all the checks for Marshal and
+# Unmarshal methods
+for var in variants.variants:
+# Nothing to generate on null types. We update some
+# variables to handle json-null on marshalling methods.
+if var.type.name == "null":
+marshal_return_default = '[]byte("null"), nil'
+return_on_null = "nil"
+continue
+
+var_name = qapi_to_field_name(var.name)
+var_type = qapi_schema_type_to_go_type(var.type.name)
+variant_fields += f"\t{var_name} *{var_type}\n"
+
+if len(marshal_check_fields) > 0:
+marshal_check_fields += "} else "
+
+marshal_check_fields += f'''if s.{var_name} != nil {{
+return json.Marshal(s.{var_name})
+'''
+
+unmarshal_check_fields += f'''// Check for {var_type}
+{{
+s.{var_name} = new({var_type})
+if err := StrictDecode(s.{var_name}, data); err == nil {{
+return nil
+}}
+s.{var_name} = nil
+}}
+'''
+
+marshal_check_fields += "}"
+
+self.target["alternate"] += generate_struct_type(name, variant_fields)
+self.target["alternate"] += f'''
+func (s {name}) MarshalJSON() ([]byte, error) {{
+{marshal_check_fields}
+return {marshal_return_default}
+}}
+
+func (s *{name}) UnmarshalJSON(data []byte) error {{
+// Check for json-null first
+

[RFC PATCH v2 0/8] qapi: add generator for Golang interface

2022-06-17 Thread Victor Toso
Hi,

This is the second iteration of RFC v1:
  https://lists.gnu.org/archive/html/qemu-devel/2022-04/msg00226.html


# What this is about?

To generate a simple Golang interface that could communicate with QEMU
over QMP. The Go code that is generated is meant to be used as the bare
bones to exchange QMP messages.

The goal is to have this as a Go module in QEMU gitlab namespace,
similar to what have been done to pyhon-qemu-qmp
  https://gitlab.com/qemu-project/python-qemu-qmp


# Issues raised in RFC v1

  The leading '*' for issues I addressed in this iteration

* 1) Documentation was removed to avoid License issues, by Daniel
 Thread: 
https://lists.nongnu.org/archive/html/qemu-devel/2022-05/msg01889.html

 It is important for the generated Go module to be compatible with
 Licenses used by projects that would be using this. Copying the
 documentation of the QAPI spec might conflict with GPLv2+.

 I have not proposed another license in this iteration, but I'm
 planning to go with MIT No Attribution, aka MIT-0 [0]. Does it make
 sense to bind the generated code's license to MIT-0 already at
 generator level?

 [0] https://github.com/aws/mit-0/blob/master/MIT-0

  2) Inconsistent generated Names, by Andrea + Markus
 Thread: https://lists.gnu.org/archive/html/qemu-devel/2022-04/msg05026.html
 Example 1:
 |qapi|Go |  Expected |
 | @logappend | Logappend | LogAppend |
 
 Example 2:
 (acronyms) VncInfo and DisplayReloadOptionsVNC
 
 This was not addressed in RFC v2 mainly because it seems to need
 more metadata support from the QAPI spec to handle specific
 scenarios. The solution seems either an extra metadata proposal by
 Andrea [1] or reviving Kevin's work [2]
 
 [1] https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg00127.html
 [2] https://lists.gnu.org/archive/html/qemu-devel/2021-09/msg04703.html

* 3) Better type safety, by Andrea + Daniel
 Thread: https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg01906.html
 
 Most of the 'Any' type (interface {}) has been removed. The only
 place it still exists is for fields that uses QAPI's any type, like
 with command qom-set or the struct type ObjectPropertyInfo.

* 4) QAPI enums mapped to String instead of Int type, by Daniel.
 Thread: https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg01904.html

 I'm still over the fence about using string here, mostly by the
 same issue reported here:
 
https://gitlab.com/libvirt/libvirt-go-module/-/merge_requests/30#note_975517740

* 5) Events and Commands as interface, by Daniel
 Thread: https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg01914.html

 So, instead of having a Command/Event struct with a Any type for
 the Arguments (which could be set with SetPasswordCommand struct
 type for example); now we have a Command interface which all
 previous structs that behaved as Arguments implement.

 I've included Marshal{Command Event} and Unmarshal{Command Event}
 helper functions that operate on top of each interface.

* 6) Removing Any from Unions, by Daniel
 Thread: https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg01912.html

 I basically followed the above suggestion to all other types that
 used Any. Specifically to unions were the removal of the
 'discriminator' field, as proposed also in the above link.

* 7) Flat structs by removing embed types. Discussion with Andrea
 Thread: 
https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg01590.html 

 No one required it but I decided to give it a try. Major issue that
 I see with this approach is to have generated a few 'Base' structs
 that are now useless. Overall, less nested structs seems better to
 me. Opnions?

 Example:
  | /* This is now useless, should be removed? */
  | type InetSocketAddressBase struct {
  | Host string `json:"host"`
  | Port string `json:"port"`
  | }
  |
  | type InetSocketAddress struct {
  | // Base fields
  | Host string `json:"host"`
  | Port string `json:"port"`
  |
  |
  | Numeric   *bool   `json:"numeric,omitempty"`
  | To*uint16 `json:"to,omitempty"`
  | Ipv4  *bool   `json:"ipv4,omitempty"`
  | Ipv6  *bool   `json:"ipv6,omitempty"`
  | KeepAlive *bool   `json:"keep-alive,omitempty"`
  | Mptcp *bool   `json:"mptcp,omitempty"`
  | }

  8) Supporting multiple versions
 Thread: https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg02147.html

 I'm keen to working on the proposed solution above as it seems a
 good compromise to make code that can be compatible with multiple
 versions of qmp/qemu.

 But the basis needs to be defined first, so this is for the future.

* 9) Handling { "error": { ... } }
 This was missing in the RFC v1. 

[RFC PATCH v2 5/8] qapi: golang: Generate qapi's event types in Go

2022-06-17 Thread Victor Toso
This patch handles QAPI event types and generates data structures in
Go that handles it.

We also define a Event interface and two helper functions MarshalEvent
and UnmarshalEvent.

At the moment of this writing, this patch generates 51 structures (50
events)

Example:

qapi:
  | { 'event': 'MEMORY_DEVICE_SIZE_CHANGE',
  |   'data': { '*id': 'str', 'size': 'size', 'qom-path' : 'str'} }

go:
  | type MemoryDeviceSizeChangeEvent struct {
  | EventTimestamp Timestamp `json:"-"`
  | Id *string   `json:"id,omitempty"`
  | Size   uint64`json:"size"`
  | QomPathstring`json:"qom-path"`
  | }

usage:
  | input := `{"event":"MEMORY_DEVICE_SIZE_CHANGE",` +
  | `"timestamp":{"seconds":1588168529,"microseconds":201316},` +
  | 
`"data":{"id":"vm0","size":1073741824,"qom-path":"/machine/unattached/device[2]"}}`
  | e, err := UnmarshalEvent([]byte(input)
  | if err != nil {
  | panic(err)
  | }
  | if e.GetName() == `MEMORY_DEVICE_SIZE_CHANGE` {
  | m := e.(*MemoryDeviceSizeChangeEvent)
  | // m.QomPath == "/machine/unattached/device[2]"
  | }

Signed-off-by: Victor Toso 
---
 scripts/qapi/golang.py | 120 -
 1 file changed, 118 insertions(+), 2 deletions(-)

diff --git a/scripts/qapi/golang.py b/scripts/qapi/golang.py
index 6c6a5cea97..b2e08cebdf 100644
--- a/scripts/qapi/golang.py
+++ b/scripts/qapi/golang.py
@@ -28,7 +28,66 @@
 )
 from .source import QAPISourceInfo
 
+# Only variable is @unm_cases to handle
+# all events's names and associated types.
+TEMPLATE_EVENT = '''
+type Timestamp struct {{
+Seconds  int64 `json:"seconds"`
+Microseconds int64 `json:"microseconds"`
+}}
+
+type Event interface {{
+GetName()  string
+GetTimestamp() Timestamp
+}}
 
+func MarshalEvent(e Event) ([]byte, error) {{
+baseStruct := struct {{
+Name   string`json:"event"`
+EventTimestamp Timestamp `json:"timestamp"`
+}}{{
+Name:   e.GetName(),
+EventTimestamp: e.GetTimestamp(),
+}}
+base, err := json.Marshal(baseStruct)
+if err != nil {{
+return []byte{{}}, err
+}}
+
+dataStruct := struct {{
+Payload Event `json:"data"`
+}}{{
+Payload: e,
+}}
+data, err := json.Marshal(dataStruct)
+if err != nil {{
+return []byte{{}}, err
+}}
+
+if len(data) == len(`{{"data":{{`) {{
+return base, nil
+}}
+
+// Combines Event's base and data in a single JSON object
+result := fmt.Sprintf("%s,%s", base[:len(base)-1], data[1:])
+return []byte(result), nil
+}}
+
+func UnmarshalEvent(data []byte) (Event, error) {{
+base := struct {{
+Name   string`json:"event"`
+EventTimestamp Timestamp `json:"timestamp"`
+}}{{}}
+if err := json.Unmarshal(data, ); err != nil {{
+return nil, errors.New(fmt.Sprintf("Failed to decode event: %s", 
string(data)))
+}}
+
+switch base.Name {{
+{unm_cases}
+}}
+return nil, errors.New("Failed to recognize event")
+}}
+'''
 TEMPLATE_HELPER = '''
 // Alias for go version lower than 1.18
 type Any = interface{}
@@ -53,10 +112,12 @@ class QAPISchemaGenGolangVisitor(QAPISchemaVisitor):
 
 def __init__(self, prefix: str):
 super().__init__()
-self.target = {name: "" for name in ["alternate", "enum", "helper", 
"struct",
+self.target = {name: "" for name in ["alternate", "enum",
+ "event", "helper", "struct",
  "union"]}
 self.objects_seen = {}
 self.schema = None
+self.events = {}
 self.golang_package_name = "qapi"
 
 def visit_begin(self, schema):
@@ -71,6 +132,24 @@ def visit_begin(self, schema):
 def visit_end(self):
 self.schema = None
 
+unm_cases = ""
+for name in sorted(self.events):
+case_type = self.events[name]
+unm_cases += f'''
+case "{name}":
+event := struct {{
+Data {case_type} `json:"data"`
+}}{{}}
+
+if err := json.Unmarshal(data, ); err != nil {{
+return nil, errors.New(fmt.Sprintf("Failed to unmarshal: %s", 
string(data)))
+}}
+event.Data.EventTimestamp = base.EventTimestamp
+return , nil
+'''
+self.target["event"] += TEMPLATE_EVENT.format(unm_cases=unm_cases)
+
+
 def visit_object_type(self: QAPISchemaGenGolangVisitor,
   name: str,
   info: Optional[QAPISourceInfo],
@@ -232,7 +311,37 @@ def visit_command(self,
 pass
 
 def visit_event(self, name, info, ifcond, features, arg_type, boxed):
-pass
+assert name == info.defn_name
+type_name = qapi_to_go_type_name(name, info.defn_meta)
+self.events[name] = type_name
+
+self_contained = True
+if arg_type and 

Re: [PATCH v5 4/5] i386/pc: relocate 4g start to 1T where applicable

2022-06-17 Thread Joao Martins



On 6/16/22 15:23, Igor Mammedov wrote:
> On Fri, 20 May 2022 11:45:31 +0100
> Joao Martins  wrote:
> 
>> It is assumed that the whole GPA space is available to be DMA
>> addressable, within a given address space limit, expect for a
>^^^ typo?
> 
Yes, it should have been 'except'.

>> tiny region before the 4G. Since Linux v5.4, VFIO validates
>> whether the selected GPA is indeed valid i.e. not reserved by
>> IOMMU on behalf of some specific devices or platform-defined
>> restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with
>>  -EINVAL.
>>
>> AMD systems with an IOMMU are examples of such platforms and
>> particularly may only have these ranges as allowed:
>>
>>   - fedf (0  .. 3.982G)
>>  fef0 - 00fc (3.983G .. 1011.9G)
>>  0100 -  (1Tb.. 16Pb[*])
>>
>> We already account for the 4G hole, albeit if the guest is big
>> enough we will fail to allocate a guest with  >1010G due to the
>> ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT).
>>
>> [*] there is another reserved region unrelated to HT that exists
>> in the 256T boundaru in Fam 17h according to Errata #1286,
>   ^ ditto
> 
Fixed.

>> documeted also in "Open-Source Register Reference for AMD Family
>> 17h Processors (PUB)"
>>
>> When creating the region above 4G, take into account that on AMD
>> platforms the HyperTransport range is reserved and hence it
>> cannot be used either as GPAs. On those cases rather than
>> establishing the start of ram-above-4g to be 4G, relocate instead
>> to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical
>> Topology", for more information on the underlying restriction of
>> IOVAs.
>>
>> After accounting for the 1Tb hole on AMD hosts, mtree should
>> look like:
>>
>> -7fff (prio 0, i/o):
>>   alias ram-below-4g @pc.ram -7fff
>> 0100-01ff7fff (prio 0, i/o):
>>  alias ram-above-4g @pc.ram 8000-00ff
>>
>> If the relocation is done, we also add the the reserved HT
>> e820 range as reserved.
>>
>> Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough
>> to address 1Tb (0xff  ). On AMD platforms, if a
>> ram-above-4g relocation may be desired and the CPU wasn't configured
>> with a big enough phys-bits, print an error message to the user
>> and do not make the relocation of the above-4g-region if phys-bits
>> is too low.
>>
>> Suggested-by: Igor Mammedov 
>> Signed-off-by: Joao Martins 
>> ---
>>  hw/i386/pc.c | 111 +++
>>  1 file changed, 111 insertions(+)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index af52d4ff89ef..652ae8ff9ccf 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -796,6 +796,110 @@ void xen_load_linux(PCMachineState *pcms)
>>  #define PC_ROM_ALIGN   0x800
>>  #define PC_ROM_SIZE(PC_ROM_MAX - PC_ROM_MIN_VGA)
>>  
>> +/*
>> + * AMD systems with an IOMMU have an additional hole close to the
>> + * 1Tb, which are special GPAs that cannot be DMA mapped. Depending
>> + * on kernel version, VFIO may or may not let you DMA map those ranges.
>> + * Starting Linux v5.4 we validate it, and can't create guests on AMD 
>> machines
>> + * with certain memory sizes. It's also wrong to use those IOVA ranges
>> + * in detriment of leading to IOMMU INVALID_DEVICE_REQUEST or worse.
>> + * The ranges reserved for Hyper-Transport are:
>> + *
>> + * FD__h - FF__h
>> + *
>> + * The ranges represent the following:
>> + *
>> + * Base Address   Top Address  Use
>> + *
>> + * FD__h FD_F7FF_h Reserved interrupt address space
>> + * FD_F800_h FD_F8FF_h Interrupt/EOI IntCtl
>> + * FD_F900_h FD_F90F_h Legacy PIC IACK
>> + * FD_F910_h FD_F91F_h System Management
>> + * FD_F920_h FD_FAFF_h Reserved Page Tables
>> + * FD_FB00_h FD_FBFF_h Address Translation
>> + * FD_FC00_h FD_FDFF_h I/O Space
>> + * FD_FE00_h FD__h Configuration
>> + * FE__h FE_1FFF_h Extended Configuration/Device Messages
>> + * FE_2000_h FF__h Reserved
>> + *
>> + * See AMD IOMMU spec, section 2.1.2 "IOMMU Logical Topology",
>> + * Table 3: Special Address Controls (GPA) for more information.
>> + */
>> +#define AMD_HT_START 0xfdUL
>> +#define AMD_HT_END   0xffUL
>> +#define AMD_ABOVE_1TB_START  (AMD_HT_END + 1)
>> +#define AMD_HT_SIZE  (AMD_ABOVE_1TB_START - AMD_HT_START)
>> +
>> +static hwaddr x86_max_phys_addr(PCMachineState *pcms,
> 
> s/x86_max_phys_addr/pc_max_used_gpa/
> 
Fixed.

>> +hwaddr above_4g_mem_start,
>> +uint64_t pci_hole64_size)
>> +{
>> +PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
>> +X86MachineState *x86ms = X86_MACHINE(pcms);
>> +

Re: [PATCH v5 3/5] i386/pc: pass pci_hole64_size to pc_memory_init()

2022-06-17 Thread Igor Mammedov
On Fri, 17 Jun 2022 12:13:45 +0100
Joao Martins  wrote:

> On 6/16/22 14:30, Igor Mammedov wrote:
> > On Fri, 20 May 2022 11:45:30 +0100
> > Joao Martins  wrote:
> >   
> >> Use the pre-initialized pci-host qdev and fetch the
> >> pci-hole64-size into pc_memory_init() newly added argument.
> >> piix needs a bit of care given all the !pci_enabled()
> >> and that the pci_hole64_size is private to i440fx.
> >>
> >> This is in preparation to determine that host-phys-bits are
> >> enough and for pci-hole64-size to be considered to relocate
> >> ram-above-4g to be at 1T (on AMD platforms).  
> > 
> > modulo nit blow
> > 
> > Reviewed-by: Igor Mammedov 
> >   
> 
> I haven't tackled the initialization nit below but I would assume
> you agree with the rest of the patch. Let me know if I should still
> add the Rb tag.

My ack still stands
 
> >>
> >> Signed-off-by: Joao Martins 
> >> ---
> >>  hw/i386/pc.c | 3 ++-
> >>  hw/i386/pc_piix.c| 5 -
> >>  hw/i386/pc_q35.c | 8 +++-
> >>  hw/pci-host/i440fx.c | 7 +++
> >>  include/hw/i386/pc.h | 3 ++-
> >>  include/hw/pci-host/i440fx.h | 1 +
> >>  6 files changed, 23 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> >> index f7da1d5dd40d..af52d4ff89ef 100644
> >> --- a/hw/i386/pc.c
> >> +++ b/hw/i386/pc.c
> >> @@ -799,7 +799,8 @@ void xen_load_linux(PCMachineState *pcms)
> >>  void pc_memory_init(PCMachineState *pcms,
> >>  MemoryRegion *system_memory,
> >>  MemoryRegion *rom_memory,
> >> -MemoryRegion **ram_memory)
> >> +MemoryRegion **ram_memory,
> >> +uint64_t pci_hole64_size)
> >>  {
> >>  int linux_boot, i;
> >>  MemoryRegion *option_rom_mr;
> >> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> >> index 12d4a279c793..57bb5b8f2aea 100644
> >> --- a/hw/i386/pc_piix.c
> >> +++ b/hw/i386/pc_piix.c
> >> @@ -91,6 +91,7 @@ static void pc_init1(MachineState *machine,
> >>  MemoryRegion *pci_memory;
> >>  MemoryRegion *rom_memory;
> >>  ram_addr_t lowmem;
> >> +uint64_t hole64_size;  
> > 
> > init it to 0 right here to avoid chance of run amok uninitialized variable?
> >   
> I haven't done this given that mst disagreed, plus the fact that the code 
> style of
> the function seems to place the NULL initialization mostly left to else 
> conditional
> clause. Part of the reason I haven't inited @i440fx_dev to NULL here as well 
> (now
> i440fx_host. The location we use hole64_size is also the same location we are 
> using
> @i440fx_host.
> 
> >>  DeviceState *i440fx_dev;
> >>  
> >>  /*
> >> @@ -166,10 +167,12 @@ static void pc_init1(MachineState *machine,
> >>  memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
> >>  rom_memory = pci_memory;
> >>  i440fx_dev = qdev_new(host_type);
> >> +hole64_size = i440fx_pci_hole64_size(i440fx_dev);
> >>  } else {
> >>  pci_memory = NULL;
> >>  rom_memory = system_memory;
> >>  i440fx_dev = NULL;
> >> +hole64_size = 0;
> >>  }
> >>  
> >>  pc_guest_info_init(pcms);
> >> @@ -186,7 +189,7 @@ static void pc_init1(MachineState *machine,
> >>  /* allocate ram and load rom/bios */
> >>  if (!xen_enabled()) {
> >>  pc_memory_init(pcms, system_memory,
> >> -   rom_memory, _memory);
> >> +   rom_memory, _memory, hole64_size);
> >>  } else {
> >>  pc_system_flash_cleanup_unused(pcms);
> >>  if (machine->kernel_filename != NULL) {
> >> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
> >> index 8d867bdb274a..4d5c2fbd976b 100644
> >> --- a/hw/i386/pc_q35.c
> >> +++ b/hw/i386/pc_q35.c
> >> @@ -138,6 +138,7 @@ static void pc_q35_init(MachineState *machine)
> >>  MachineClass *mc = MACHINE_GET_CLASS(machine);
> >>  bool acpi_pcihp;
> >>  bool keep_pci_slot_hpc;
> >> +uint64_t pci_hole64_size = 0;
> >>  
> >>  /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
> >>   * and 256 Mbytes for PCI Express Enhanced Configuration Access 
> >> Mapping
> >> @@ -206,8 +207,13 @@ static void pc_q35_init(MachineState *machine)
> >>  /* create pci host bus */
> >>  q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE));
> >>  
> >> +if (pcmc->pci_enabled) {
> >> +pci_hole64_size = q35_host->mch.pci_hole64_size;
> >> +}
> >> +
> >>  /* allocate ram and load rom/bios */
> >> -pc_memory_init(pcms, get_system_memory(), rom_memory, _memory);
> >> +pc_memory_init(pcms, get_system_memory(), rom_memory, _memory,
> >> +   pci_hole64_size);
> >>  
> >>  object_property_add_child(qdev_get_machine(), "q35", 
> >> OBJECT(q35_host));
> >>  object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
> >> diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c
> >> index 

Re: [PATCH] hw/mem/nvdimm: fix error message for 'unarmed' flag

2022-06-17 Thread Xiao Guangrong
On Wed, Jun 15, 2022 at 7:49 PM David Hildenbrand  wrote:
>
> On 15.06.22 13:17, Xiao Guangrong wrote:
> > On Wed, Jun 15, 2022 at 4:24 PM David Hildenbrand 
wrote:
> >
>  Is that a temporary or a permanent thing? Do we know?
> >>>
> >>> No idea. But his last signed-off was three years ago.
> >>
> >> I sent a patch to Xiao, asking if he's still active in QEMU. If I don't
>
> s/patch/mail/ :)
>
> >> get a reply this week, I'll move forward with proposing an update to
> >> MAINTAINERS as described.
> >>
> >
> > Okay, please do it.
> >
> > Sorry, I am just roughly reading the mailing list of qemu & kvm usually,
> > and do not get enough time to actively review or contribute on these
> > fields. :-(
>
> Not an issue, thanks for that information and thanks for your work in
> the past on that!
>
> Should I keep you entered as a reviewer for the new section?

Okay, that is good for me! :)


Re: [PULL v2 25/86] hw/cxl/component: Implement host bridge MMIO (8.2.5, table 142)

2022-06-17 Thread Igor Mammedov
On Fri, 17 Jun 2022 11:51:44 +0100
Jonathan Cameron  wrote:

> On Thu, 16 Jun 2022 16:45:00 +0200
> Igor Mammedov  wrote:
> 
> > On Mon, 16 May 2022 16:51:34 -0400
> > "Michael S. Tsirkin"  wrote:
> >   
> > > From: Ben Widawsky 
> > > 
> > > CXL host bridges themselves may have MMIO. Since host bridges don't have
> > > a BAR they are treated as special for MMIO.  This patch includes
> > > i386/pc support.
> > > Also hook up the device reset now that we have have the MMIO
> > > space in which the results are visible.
> > > 
> > > Note that we duplicate the PCI express case for the aml_build but
> > > the implementations will diverge when the CXL specific _OSC is
> > > introduced.
> > > 
> > > Signed-off-by: Ben Widawsky 
> > > Co-developed-by: Jonathan Cameron 
> > > Signed-off-by: Jonathan Cameron 
> > > Reviewed-by: Alex Bennée 
> > > Message-Id: <20220429144110.25167-24-jonathan.came...@huawei.com>
> > > Reviewed-by: Michael S. Tsirkin 
> > > Signed-off-by: Michael S. Tsirkin 
> > > ---
> > >  include/hw/cxl/cxl.h| 14 ++
> > >  hw/i386/acpi-build.c| 25 ++-
> > >  hw/i386/pc.c| 27 +++-
> > >  hw/pci-bridge/pci_expander_bridge.c | 66 ++---
> > >  4 files changed, 122 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
> > > index 31af92fd5e..8d1a7245d0 100644
> > > --- a/include/hw/cxl/cxl.h
> > > +++ b/include/hw/cxl/cxl.h
> > > @@ -10,6 +10,7 @@
> > >  #ifndef CXL_H
> > >  #define CXL_H
> > >  
> > > +#include "hw/pci/pci_host.h"
> > >  #include "cxl_pci.h"
> > >  #include "cxl_component.h"
> > >  #include "cxl_device.h"
> > > @@ -17,8 +18,21 @@
> > >  #define CXL_COMPONENT_REG_BAR_IDX 0
> > >  #define CXL_DEVICE_REG_BAR_IDX 2
> > >  
> > > +#define CXL_WINDOW_MAX 10
> > > +
> > >  typedef struct CXLState {
> > >  bool is_enabled;
> > > +MemoryRegion host_mr;
> > > +unsigned int next_mr_idx;
> > >  } CXLState;
> > >  
> > > +struct CXLHost {
> > > +PCIHostState parent_obj;
> > > +
> > > +CXLComponentState cxl_cstate;
> > > +};
> > > +
> > > +#define TYPE_PXB_CXL_HOST "pxb-cxl-host"
> > > +OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST)
> > > +
> > >  #endif
> > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > index dcf6ece3d0..2d81b0f40c 100644
> > > --- a/hw/i386/acpi-build.c
> > > +++ b/hw/i386/acpi-build.c
> > > @@ -28,6 +28,7 @@
> > >  #include "qemu/bitmap.h"
> > >  #include "qemu/error-report.h"
> > >  #include "hw/pci/pci.h"
> > > +#include "hw/cxl/cxl.h"
> > >  #include "hw/core/cpu.h"
> > >  #include "target/i386/cpu.h"
> > >  #include "hw/misc/pvpanic.h"
> > > @@ -1572,10 +1573,21 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> > >  }
> > >  
> > >  scope = aml_scope("\\_SB");
> > > -dev = aml_device("PC%.02X", bus_num);
> > > +
> > > +if (pci_bus_is_cxl(bus)) {
> > > +dev = aml_device("CL%.02X", bus_num);
> > > +} else {
> > > +dev = aml_device("PC%.02X", bus_num);
> > > +}
> > >  aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
> > >  aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
> > > -if (pci_bus_is_express(bus)) {
> > > +if (pci_bus_is_cxl(bus)) {
> > > +aml_append(dev, aml_name_decl("_HID", 
> > > aml_eisaid("PNP0A08")));
> > > +aml_append(dev, aml_name_decl("_CID", 
> > > aml_eisaid("PNP0A03")));
> > > +
> > > +/* Expander bridges do not have ACPI PCI Hot-plug 
> > > enabled */
> > > +aml_append(dev, build_q35_osc_method(true));
> > > +} else if (pci_bus_is_express(bus)) {
> > >  aml_append(dev, aml_name_decl("_HID", 
> > > aml_eisaid("PNP0A08")));
> > >  aml_append(dev, aml_name_decl("_CID", 
> > > aml_eisaid("PNP0A03")));
> > >  
> > > @@ -1595,6 +1607,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> > >  aml_append(dev, aml_name_decl("_CRS", crs));
> > >  aml_append(scope, dev);
> > >  aml_append(dsdt, scope);
> > > +
> > > +/* Handle the ranges for the PXB expanders */
> > > +if (pci_bus_is_cxl(bus)) {
> > > +MemoryRegion *mr = >cxl_devices_state->host_mr;
> > > +uint64_t base = mr->addr;
> > > +
> > > +crs_range_insert(crs_range_set.mem_ranges, base,
> > > + base + memory_region_size(mr) - 1);
> > > +}
> > >  }
> > >  }
> > >  
> > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > > index 45e2d6092f..03d14f6564 100644
> > > --- a/hw/i386/pc.c
> > > +++ b/hw/i386/pc.c
> > > @@ -75,6 +75,7 @@
> > >  #include "acpi-build.h"
> > >  #include "hw/mem/pc-dimm.h"
> > >  #include "hw/mem/nvdimm.h"
> > > +#include "hw/cxl/cxl.h"
> > >  #include 

Re: [PATCH v3 0/2] hw/nvme: Add shadow doorbell buffer support

2022-06-17 Thread Klaus Jensen
On Jun 16 20:34, Jinhao Fan wrote:
> This patch adds shadow doorbell buffer support in NVMe 1.3 to QEMU
> NVMe. The Doorbell Buffer Config admin command is implemented for the
> guest to enable shadow doobell buffer. When this feature is enabled, each
> SQ/CQ is associated with two buffers, i.e., Shadow Doorbell buffer and
> EventIdx buffer. According to the Spec, each queue's doorbell register
> is only updated when the Shadow Doorbell buffer value changes from being
> less than or equal to the value of the corresponding EventIdx buffer
> entry to being greater than that value. Therefore, the number of MMIO's
> on the doorbell registers is greatly reduced.
> 
> This patch is adapted from Huaicheng Li's patch[1] in 2018.
> 
> [1] 
> https://patchwork.kernel.org/project/qemu-devel/patch/20180305194906.ga3...@gmail.com/
> 
> IOPS comparison with FIO:
> 
> iodepth1  2  4  8
>   QEMU   25.1k  25.9k  24.5k  24.0k
>  +dbbuf  29.1k  60.1k  99.8k  82.5k
> 
> MMIO's per IO measured by perf-kvm:
> 
> iodepth1  2  4  8
>   QEMU   2.01   1.99   1.99   1.99
>  +dbbuf  1.00   0.52   0.27   0.46
> 
> The tests are done on Ubuntu 22.04 with 5.15.0-33 kernel with Intel(R) 
> Xeon(R) Gold 6248R CPU @ 3.00GHz.
> 
> QEMU set up:
> 
> bin/x86_64-softmmu/qemu-system-x86_64 \
> -name "nvme-test" \
> -machine accel=kvm \
> -cpu host \
> -smp 4 \
> -m 8G \
> -daemonize \
> -device virtio-scsi-pci,id=scsi0 \
> -device scsi-hd,drive=hd0 \
> -drive 
> file=$OSIMGF,if=none,aio=native,cache=none,format=qcow2,id=hd0,snapshot=on \
> -drive "id=nvm,if=none,file=null-co://,file.read-zeroes=on,format=raw" \
> -device nvme,serial=deadbeef,drive=nvm \
> -net user,hostfwd=tcp::8080-:22 \
> -net nic,model=virtio
> 
> FIO configuration:
> 
> [global]
> ioengine=libaio
> filename=/dev/nvme0n1
> thread=1
> group_reporting=1
> direct=1
> verify=0
> time_based=1
> ramp_time=0
> runtime=30
> ;size=1G
> ;iodepth=1
> rw=randread
> bs=4k
> 
> [test]
> numjobs=1
> 
> Changes since v2:
>   - Do not ignore admin queue updates in nvme_process_db and nvme_post_cqes
>   - Calculate db_addr and ei_addr in hard-coded way
> 
> Changes since v1:
>   - Add compatibility with hosts that do not use admin queue shadow doorbell
> 
> Jinhao Fan (2):
>   hw/nvme: Implement shadow doorbell buffer support
>   hw/nvme: Add trace events for shadow doorbell buffer
> 
>  hw/nvme/ctrl.c   | 118 ++-
>  hw/nvme/nvme.h   |   8 +++
>  hw/nvme/trace-events |   5 ++
>  include/block/nvme.h |   2 +
>  4 files changed, 132 insertions(+), 1 deletion(-)
> 
> -- 
> 2.25.1
> 

LGTM,

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature


Re: [PATCH 5/7] crypto: Implement ECDSA algorithm by hogweed

2022-06-17 Thread Daniel P . Berrangé
On Mon, Jun 13, 2022 at 04:45:29PM +0800, Lei He wrote:
> Implement ECDSA algorithm by hogweed and nettle.
> 
> Signed-off-by: lei he 
> ---
>  crypto/akcipher-nettle.c.inc | 268 
> +++
>  1 file changed, 268 insertions(+)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH 4/7] crypto: Add ECDSA key parser

2022-06-17 Thread Daniel P . Berrangé
On Mon, Jun 13, 2022 at 04:45:28PM +0800, Lei He wrote:
> Add ECDSA key parser and ECDSA signautre parser.

 typo:  'signature'

> 
> Signed-off-by: lei he 
> ---
>  crypto/ecdsakey-builtin.c.inc | 248 
> ++
>  crypto/ecdsakey.c | 118 
>  crypto/ecdsakey.h |  66 +++
>  crypto/meson.build|   1 +
>  4 files changed, 433 insertions(+)
>  create mode 100644 crypto/ecdsakey-builtin.c.inc
>  create mode 100644 crypto/ecdsakey.c
>  create mode 100644 crypto/ecdsakey.h
> 
> diff --git a/crypto/ecdsakey-builtin.c.inc b/crypto/ecdsakey-builtin.c.inc
> new file mode 100644
> index 00..5da317ec44
> --- /dev/null
> +++ b/crypto/ecdsakey-builtin.c.inc
> @@ -0,0 +1,248 @@
> +/*
> + * QEMU Crypto akcipher algorithms
> + *
> + * Copyright (c) 2022 Bytedance
> + * Author: lei he 
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see 
> .
> + *
> + */
> +
> +#include "der.h"
> +#include "ecdsakey.h"
> +
> +#define QCRYPTO_ECDSA_PUBKEY_FMT_UNCOMPRESSED 0x04
> +
> +static int extract_mpi(void *ctx, const uint8_t *value,
> +   size_t vlen, Error **errp)
> +{
> +QCryptoAkCipherMPI *mpi = (QCryptoAkCipherMPI *)ctx;
> +if (vlen == 0) {
> +error_setg(errp, "Empty mpi field");
> +return -1;
> +}
> +mpi->data = g_memdup2(value, vlen);
> +mpi->len = vlen;
> +return 0;
> +}
> +
> +static int extract_version(void *ctx, const uint8_t *value,
> +   size_t vlen, Error **errp)
> +{
> +uint8_t *version = (uint8_t *)ctx;
> +if (vlen != 1 || *value > 1) {
> +error_setg(errp, "Invalid rsakey version");
> +return -1;
> +}
> +*version = *value;
> +return 0;
> +}
> +
> +static int extract_cons_content(void *ctx, const uint8_t *value,
> +size_t vlen, Error **errp)
> +{
> +const uint8_t **content = (const uint8_t **)ctx;
> +if (vlen == 0) {
> +error_setg(errp, "Empty sequence");
> +return -1;
> +}
> +*content = value;
> +return 0;
> +}
> +
> +static int __qcrypto_akcipher_builtin_ecdsa_pubkey_parse(
> +QCryptoAkCipherECDSAKey *ecdsa,
> +const uint8_t *key, size_t keylen, Error **errp);

It is not good practice to use '_' on the start of method
names in apps, as names with a leading '_' are reserved.

> +
> +static int extract_pubkey(void *ctx, const uint8_t *value,
> +  size_t vlen, Error **errp)
> +{
> +QCryptoAkCipherECDSAKey *ecdsa = (QCryptoAkCipherECDSAKey *)ctx;
> +if (vlen < 4) {
> +error_setg(errp, "Public key part too short");
> +return -1;
> +}
> +/* Skip meta bit of BIT STRING */
> +value++;
> +vlen--;
> +return __qcrypto_akcipher_builtin_ecdsa_pubkey_parse(
> +ecdsa, value, vlen, errp);
> +}
> +
> +/**
> + *
> + *ECDSASignature ::= SEQUENCE {
> + * r   INTEGER
> + * s   INTEGER
> + * }
> + */
> +QCryptoAkCipherECDSASig *qcrypto_akcipher_ecdsasig_parse(
> +const uint8_t *signature, size_t len, Error **errp)
> +{
> +QCryptoAkCipherECDSASig *sig = g_new0(QCryptoAkCipherECDSASig, 1);

Use  g_autoptr(QCryptoAkCipherECDSASig) sig  here

> +const uint8_t *seq;
> +size_t seq_length;
> +int decode_ret;
> +
> +decode_ret = qcrypto_der_decode_seq(, ,
> +extract_cons_content, , errp);
> +
> +if (decode_ret < 0 || len != 0) {
> +goto error;
> +}

If 'decode_ret < 0' then errp should be set by qcrypto_der_decode_seq
which is fine.  For len != 0, we need to report an error ourselves.
I see you pushed it to the error label so later codepath can share it.
I think it is better to do it here though, because it makes it clear
to the reader which codepaths are triggering this generic error
messages. So

 if (decode_ret < 0)
 goto error;
 }
 if (len != 0) {
 error_setg(errp, "Invalid RSA public key");
 }


> +seq_length = decode_ret;
> +
> +if (qcrypto_der_decode_int(, _length, extract_mpi,
> +   >r, errp) < 0 ||
> +qcrypto_der_decode_int(, _length, extract_mpi,
> +   >s, errp) < 

Re: [PATCH v5 2/5] i386/pc: create pci-host qdev prior to pc_memory_init()

2022-06-17 Thread Joao Martins
On 6/16/22 14:21, Reviewed-by: Igor Mammedov wrote:
> On Fri, 20 May 2022 11:45:29 +0100
> Joao Martins  wrote:
> 
>> At the start of pc_memory_init() we usually pass a range of
>> 0..UINT64_MAX as pci_memory, when really its 2G (i440fx) or
>> 32G (q35). To get the real user value, we need to get pci-host
>> passed property for default pci_hole64_size. Thus to get that,
>> create the qdev prior to memory init to better make estimations
>> on max used/phys addr.
>>
>> This is in preparation to determine that host-phys-bits are
>> enough and also for pci-hole64-size to be considered to relocate
>> ram-above-4g to be at 1T (on AMD platforms).
> 
> with comments below fixed
> Reviewed-by: Igor Mammedov 
>  
Having fixed your comments, I added this thanks!

>> Signed-off-by: Joao Martins 
>> ---
>>  hw/i386/pc_piix.c| 5 -
>>  hw/i386/pc_q35.c | 6 +++---
>>  hw/pci-host/i440fx.c | 3 +--
>>  include/hw/pci-host/i440fx.h | 2 +-
>>  4 files changed, 9 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 578e537b3525..12d4a279c793 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -91,6 +91,7 @@ static void pc_init1(MachineState *machine,
>>  MemoryRegion *pci_memory;
>>  MemoryRegion *rom_memory;
>>  ram_addr_t lowmem;
>> +DeviceState *i440fx_dev;
>>  
>>  /*
>>   * Calculate ram split, for memory below and above 4G.  It's a bit
>> @@ -164,9 +165,11 @@ static void pc_init1(MachineState *machine,
>>  pci_memory = g_new(MemoryRegion, 1);
>>  memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
>>  rom_memory = pci_memory;
>> +i440fx_dev = qdev_new(host_type);
>>  } else {
>>  pci_memory = NULL;
>>  rom_memory = system_memory;
>> +i440fx_dev = NULL;
>>  }
>>  
>>  pc_guest_info_init(pcms);
>> @@ -199,7 +202,7 @@ static void pc_init1(MachineState *machine,
>>  
>>  pci_bus = i440fx_init(host_type,
>>pci_type,
>> -  _state,
>> +  i440fx_dev, _state,
> confusing names, suggest to rename i440fx_state -> pci_i440fx and i440fx_dev 
> -> i440fx_host
> or something like this
> 
I've changed i440fx_dev as that's what I add in this patch.

>>system_memory, system_io, machine->ram_size,
>>x86ms->below_4g_mem_size,
>>x86ms->above_4g_mem_size,
>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>> index 42eb8b97079a..8d867bdb274a 100644
>> --- a/hw/i386/pc_q35.c
>> +++ b/hw/i386/pc_q35.c
>> @@ -203,12 +203,12 @@ static void pc_q35_init(MachineState *machine)
>>  pcms->smbios_entry_point_type);
>>  }
>>  
>> -/* allocate ram and load rom/bios */
>> -pc_memory_init(pcms, get_system_memory(), rom_memory, _memory);
>> -
>>  /* create pci host bus */
>>  q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE));
>>  
>> +/* allocate ram and load rom/bios */
>> +pc_memory_init(pcms, get_system_memory(), rom_memory, _memory);
>> +
>>  object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host));
>>  object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
>>   OBJECT(ram_memory), NULL);
>> diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c
>> index e08716142b6e..5c1bab5c58ed 100644
>> --- a/hw/pci-host/i440fx.c
>> +++ b/hw/pci-host/i440fx.c
>> @@ -238,6 +238,7 @@ static void i440fx_realize(PCIDevice *dev, Error **errp)
>>  }
>>  
>>  PCIBus *i440fx_init(const char *host_type, const char *pci_type,
> 
> does it still need 'host_type'?
> 
I've removed it.

>> +DeviceState *dev,
>>  PCII440FXState **pi440fx_state,
>>  MemoryRegion *address_space_mem,
>>  MemoryRegion *address_space_io,
>> @@ -247,7 +248,6 @@ PCIBus *i440fx_init(const char *host_type, const char 
>> *pci_type,
>>  MemoryRegion *pci_address_space,
>>  MemoryRegion *ram_memory)
>>  {
>> -DeviceState *dev;
>>  PCIBus *b;
>>  PCIDevice *d;
>>  PCIHostState *s;
>> @@ -255,7 +255,6 @@ PCIBus *i440fx_init(const char *host_type, const char 
>> *pci_type,
>>  unsigned i;
>>  I440FXState *i440fx;
>>  
>> -dev = qdev_new(host_type);
>>  s = PCI_HOST_BRIDGE(dev);
>>  b = pci_root_bus_new(dev, NULL, pci_address_space,
>>   address_space_io, 0, TYPE_PCI_BUS);
>> diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h
>> index f068aaba8fda..c4710445e30a 100644
>> --- a/include/hw/pci-host/i440fx.h
>> +++ b/include/hw/pci-host/i440fx.h
>> @@ -36,7 +36,7 @@ struct PCII440FXState {
>>  #define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX"
>>  
>>  PCIBus *i440fx_init(const char *host_type, 

Re: [PATCH 3/7] crypto: remove "qemu/osdep.h" in rsakey.h

2022-06-17 Thread Daniel P . Berrangé
On Mon, Jun 13, 2022 at 04:45:27PM +0800, Lei He wrote:
> Move 'include "qemu/osdep.h"' from rsakey.h to rsakey.c.
> 
> Signed-off-by: lei he 
> ---
>  crypto/rsakey.c | 1 +
>  crypto/rsakey.h | 1 -
>  2 files changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v5 1/5] hw/i386: add 4g boundary start to X86MachineState

2022-06-17 Thread Joao Martins
On 6/16/22 14:05, Igor Mammedov wrote:
> On Fri, 20 May 2022 11:45:28 +0100
> Joao Martins  wrote:
>> Rather than hardcoding the 4G boundary everywhere, introduce a
>> X86MachineState property @above_4g_mem_start and use it
> so far it's just field not a property /fix commit message/
> 
Fixed.

>> accordingly.
>>
>> This is in preparation for relocating ram-above-4g to be
>> dynamically start at 1T on AMD platforms.
> 
> possibly needs to be rebased on top of current master to include cxl_base
> 
Yeap. I fxed the cxl_base as following:

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 82cfafc1c3b6..a9d1bf95649a 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -930,7 +930,7 @@ void pc_memory_init(PCMachineState *pcms,
 } else if (pcms->sgx_epc.size != 0) {
 cxl_base = sgx_epc_above_4g_end(>sgx_epc);
 } else {
-cxl_base = 0x1ULL + x86ms->above_4g_mem_size;
+cxl_base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
 }

 e820_add_entry(cxl_base, cxl_size, E820_RESERVED);


> with comments fixed
> 
> Reviewed-by: Igor Mammedov 
> 

I added this -- Thanks a lot!

>>
>> Signed-off-by: Joao Martins 
>> ---
>>  hw/i386/acpi-build.c  | 2 +-
>>  hw/i386/pc.c  | 9 +
>>  hw/i386/sgx.c | 2 +-
>>  hw/i386/x86.c | 1 +
>>  include/hw/i386/x86.h | 3 +++
>>  5 files changed, 11 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>> index c125939ed6f9..3160b20c9574 100644
>> --- a/hw/i386/acpi-build.c
>> +++ b/hw/i386/acpi-build.c
>> @@ -2120,7 +2120,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
>> MachineState *machine)
>>  build_srat_memory(table_data, mem_base, mem_len, i - 1,
>>MEM_AFFINITY_ENABLED);
>>  }
>> -mem_base = 1ULL << 32;
>> +mem_base = x86ms->above_4g_mem_start;
>>  mem_len = next_base - x86ms->below_4g_mem_size;
>>  next_base = mem_base + mem_len;
>>  }
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 7c39c913355b..f7da1d5dd40d 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -832,9 +832,10 @@ void pc_memory_init(PCMachineState *pcms,
>>   machine->ram,
>>   x86ms->below_4g_mem_size,
>>   x86ms->above_4g_mem_size);
>> -memory_region_add_subregion(system_memory, 0x1ULL,
>> +memory_region_add_subregion(system_memory, 
>> x86ms->above_4g_mem_start,
>>  ram_above_4g);
>> -e820_add_entry(0x1ULL, x86ms->above_4g_mem_size, E820_RAM);
>> +e820_add_entry(x86ms->above_4g_mem_start, x86ms->above_4g_mem_size,
>> +   E820_RAM);
>>  }
>>  
>>  if (pcms->sgx_epc.size != 0) {
>> @@ -875,7 +876,7 @@ void pc_memory_init(PCMachineState *pcms,
>>  machine->device_memory->base = 
>> sgx_epc_above_4g_end(>sgx_epc);
>>  } else {
>>  machine->device_memory->base =
>> -0x1ULL + x86ms->above_4g_mem_size;
>> +x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
>>  }
>>  
>>  machine->device_memory->base =
>> @@ -1019,7 +1020,7 @@ uint64_t pc_pci_hole64_start(void)
>>  } else if (pcms->sgx_epc.size != 0) {
>>  hole64_start = sgx_epc_above_4g_end(>sgx_epc);
>>  } else {
>> -hole64_start = 0x1ULL + x86ms->above_4g_mem_size;
>> +hole64_start = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
>>  }
>>  
>>  return ROUND_UP(hole64_start, 1 * GiB);
>> diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
>> index a44d66ba2afc..09d9c7c73d9f 100644
>> --- a/hw/i386/sgx.c
>> +++ b/hw/i386/sgx.c
>> @@ -295,7 +295,7 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms)
>>  return;
>>  }
>>  
>> -sgx_epc->base = 0x1ULL + x86ms->above_4g_mem_size;
>> +sgx_epc->base = x86ms->above_4g_mem_start + x86ms->above_4g_mem_size;
>>  
>>  memory_region_init(_epc->mr, OBJECT(pcms), "sgx-epc", UINT64_MAX);
>>  memory_region_add_subregion(get_system_memory(), sgx_epc->base,
>> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
>> index 78b05ab7a2d1..af3c790a2830 100644
>> --- a/hw/i386/x86.c
>> +++ b/hw/i386/x86.c
>> @@ -1373,6 +1373,7 @@ static void x86_machine_initfn(Object *obj)
>>  x86ms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
>>  x86ms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
>>  x86ms->bus_lock_ratelimit = 0;
>> +x86ms->above_4g_mem_start = 0x1ULL;
> 
> s/0x.../4 * GiB/
> 
Fixed.

>>  }
>>  
>>  static void x86_machine_class_init(ObjectClass *oc, void *data)
>> diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
>> index 9089bdd99c3a..df82c5fd4252 100644
>> --- a/include/hw/i386/x86.h
>> +++ b/include/hw/i386/x86.h
>> @@ -56,6 +56,9 @@ struct 

Re: [PATCH 2/7] crypto: Support more ASN.1 types

2022-06-17 Thread Daniel P . Berrangé
On Mon, Jun 13, 2022 at 04:45:26PM +0800, Lei He wrote:
> 1. support decoding of 'bit string','octet string',
> 'object id', and 'context specific tag' for DER decoder.
> 2. support encoding of int and sequence for DER decoder.
> 3. add test suites for the above changes.
> 
> Signed-off-by: lei he 
> ---
>  crypto/der.c | 174 
> +--
>  crypto/der.h | 128 ++-
>  tests/unit/test-crypto-der.c | 126 +--
>  3 files changed, 382 insertions(+), 46 deletions(-)
> 
> diff --git a/crypto/der.c b/crypto/der.c
> index f877390bbb..edf2c6c313 100644
> --- a/crypto/der.c
> +++ b/crypto/der.c
> @@ -27,15 +27,68 @@ enum QCryptoDERTypeTag {
>  QCRYPTO_DER_TYPE_TAG_INT = 0x2,
>  QCRYPTO_DER_TYPE_TAG_BIT_STR = 0x3,
>  QCRYPTO_DER_TYPE_TAG_OCT_STR = 0x4,
> -QCRYPTO_DER_TYPE_TAG_OCT_NULL = 0x5,
> -QCRYPTO_DER_TYPE_TAG_OCT_OID = 0x6,
> +QCRYPTO_DER_TYPE_TAG_NULL = 0x5,
> +QCRYPTO_DER_TYPE_TAG_OID = 0x6,
>  QCRYPTO_DER_TYPE_TAG_SEQ = 0x10,
>  QCRYPTO_DER_TYPE_TAG_SET = 0x11,
>  };
>  
> -#define QCRYPTO_DER_CONSTRUCTED_MASK 0x20
> +enum QCryptoDERTagClass {
> +QCRYPTO_DER_TAG_CLASS_UNIV = 0x0,
> +QCRYPTO_DER_TAG_CLASS_APPL = 0x1,
> +QCRYPTO_DER_TAG_CLASS_CONT = 0x2,
> +QCRYPTO_DER_TAG_CLASS_PRIV = 0x3,
> +};
> +
> +enum QCryptoDERTagEnc {
> +QCRYPTO_DER_TAG_ENC_PRIM = 0x0,
> +QCRYPTO_DER_TAG_ENC_CONS = 0x1,
> +};
> +
> +#define QCRYPTO_DER_TAG_ENC_MASK 0x20
> +#define QCRYPTO_DER_TAG_ENC_SHIFT 5
> +
> +#define QCRYPTO_DER_TAG_CLASS_MASK 0xc0
> +#define QCRYPTO_DER_TAG_CLASS_SHIFT 6
> +
> +#define QCRYPTO_DER_TAG_VAL_MASK 0x1f
>  #define QCRYPTO_DER_SHORT_LEN_MASK 0x80
>  
> +#define QCRYPTO_DER_TAG(class, enc, val)\
> +(((class) << QCRYPTO_DER_TAG_CLASS_SHIFT) | \
> + ((enc) << QCRYPTO_DER_TAG_ENC_SHIFT) | val)
> +
> +static void qcrypto_der_encode_data(const uint8_t *src, size_t src_len,
> +uint8_t *dst, size_t *dst_len)
> +{
> +size_t max_length = 0xFF;
> +uint8_t length_bytes = 0, header_byte;
> +
> +if (src_len < QCRYPTO_DER_SHORT_LEN_MASK) {
> +header_byte = src_len;
> +*dst_len = src_len + 1;
> +} else {
> +for (length_bytes = 1; max_length < src_len;) {
> +length_bytes++;
> +max_length = (max_length << 8) + max_length;
> +}

Can't length_bytes++ be in the for clause like:

 for (length_bytes = 1; max_length < src_len;length_bytes++) {
 max_length = (max_length << 8) + max_length;
 }

?


Aside from that minor nitpick

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v5 3/5] i386/pc: pass pci_hole64_size to pc_memory_init()

2022-06-17 Thread Joao Martins
On 6/16/22 14:30, Igor Mammedov wrote:
> On Fri, 20 May 2022 11:45:30 +0100
> Joao Martins  wrote:
> 
>> Use the pre-initialized pci-host qdev and fetch the
>> pci-hole64-size into pc_memory_init() newly added argument.
>> piix needs a bit of care given all the !pci_enabled()
>> and that the pci_hole64_size is private to i440fx.
>>
>> This is in preparation to determine that host-phys-bits are
>> enough and for pci-hole64-size to be considered to relocate
>> ram-above-4g to be at 1T (on AMD platforms).
> 
> modulo nit blow
> 
> Reviewed-by: Igor Mammedov 
> 

I haven't tackled the initialization nit below but I would assume
you agree with the rest of the patch. Let me know if I should still
add the Rb tag.

>>
>> Signed-off-by: Joao Martins 
>> ---
>>  hw/i386/pc.c | 3 ++-
>>  hw/i386/pc_piix.c| 5 -
>>  hw/i386/pc_q35.c | 8 +++-
>>  hw/pci-host/i440fx.c | 7 +++
>>  include/hw/i386/pc.h | 3 ++-
>>  include/hw/pci-host/i440fx.h | 1 +
>>  6 files changed, 23 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index f7da1d5dd40d..af52d4ff89ef 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -799,7 +799,8 @@ void xen_load_linux(PCMachineState *pcms)
>>  void pc_memory_init(PCMachineState *pcms,
>>  MemoryRegion *system_memory,
>>  MemoryRegion *rom_memory,
>> -MemoryRegion **ram_memory)
>> +MemoryRegion **ram_memory,
>> +uint64_t pci_hole64_size)
>>  {
>>  int linux_boot, i;
>>  MemoryRegion *option_rom_mr;
>> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
>> index 12d4a279c793..57bb5b8f2aea 100644
>> --- a/hw/i386/pc_piix.c
>> +++ b/hw/i386/pc_piix.c
>> @@ -91,6 +91,7 @@ static void pc_init1(MachineState *machine,
>>  MemoryRegion *pci_memory;
>>  MemoryRegion *rom_memory;
>>  ram_addr_t lowmem;
>> +uint64_t hole64_size;
> 
> init it to 0 right here to avoid chance of run amok uninitialized variable?
> 
I haven't done this given that mst disagreed, plus the fact that the code style 
of
the function seems to place the NULL initialization mostly left to else 
conditional
clause. Part of the reason I haven't inited @i440fx_dev to NULL here as well 
(now
i440fx_host. The location we use hole64_size is also the same location we are 
using
@i440fx_host.

>>  DeviceState *i440fx_dev;
>>  
>>  /*
>> @@ -166,10 +167,12 @@ static void pc_init1(MachineState *machine,
>>  memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
>>  rom_memory = pci_memory;
>>  i440fx_dev = qdev_new(host_type);
>> +hole64_size = i440fx_pci_hole64_size(i440fx_dev);
>>  } else {
>>  pci_memory = NULL;
>>  rom_memory = system_memory;
>>  i440fx_dev = NULL;
>> +hole64_size = 0;
>>  }
>>  
>>  pc_guest_info_init(pcms);
>> @@ -186,7 +189,7 @@ static void pc_init1(MachineState *machine,
>>  /* allocate ram and load rom/bios */
>>  if (!xen_enabled()) {
>>  pc_memory_init(pcms, system_memory,
>> -   rom_memory, _memory);
>> +   rom_memory, _memory, hole64_size);
>>  } else {
>>  pc_system_flash_cleanup_unused(pcms);
>>  if (machine->kernel_filename != NULL) {
>> diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
>> index 8d867bdb274a..4d5c2fbd976b 100644
>> --- a/hw/i386/pc_q35.c
>> +++ b/hw/i386/pc_q35.c
>> @@ -138,6 +138,7 @@ static void pc_q35_init(MachineState *machine)
>>  MachineClass *mc = MACHINE_GET_CLASS(machine);
>>  bool acpi_pcihp;
>>  bool keep_pci_slot_hpc;
>> +uint64_t pci_hole64_size = 0;
>>  
>>  /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory
>>   * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping
>> @@ -206,8 +207,13 @@ static void pc_q35_init(MachineState *machine)
>>  /* create pci host bus */
>>  q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE));
>>  
>> +if (pcmc->pci_enabled) {
>> +pci_hole64_size = q35_host->mch.pci_hole64_size;
>> +}
>> +
>>  /* allocate ram and load rom/bios */
>> -pc_memory_init(pcms, get_system_memory(), rom_memory, _memory);
>> +pc_memory_init(pcms, get_system_memory(), rom_memory, _memory,
>> +   pci_hole64_size);
>>  
>>  object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host));
>>  object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM,
>> diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c
>> index 5c1bab5c58ed..c5cc28250d5c 100644
>> --- a/hw/pci-host/i440fx.c
>> +++ b/hw/pci-host/i440fx.c
>> @@ -237,6 +237,13 @@ static void i440fx_realize(PCIDevice *dev, Error **errp)
>>  }
>>  }
>>  
>> +uint64_t i440fx_pci_hole64_size(DeviceState *i440fx_dev)
>> +{
>> +I440FXState *i440fx = I440FX_PCI_HOST_BRIDGE(i440fx_dev);
>> +
>> +return 

Re: [PATCH 1/7] crypto: Introduce ECDSA algorithm API

2022-06-17 Thread Daniel P . Berrangé
On Mon, Jun 13, 2022 at 04:45:25PM +0800, Lei He wrote:
> Introduce ECDSA related structures to qapi-crypto.
> 
> Signed-off-by: lei he 
> ---
>  qapi/crypto.json | 28 ++--
>  1 file changed, 26 insertions(+), 2 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PULL v2 25/86] hw/cxl/component: Implement host bridge MMIO (8.2.5, table 142)

2022-06-17 Thread Jonathan Cameron via
On Thu, 16 Jun 2022 16:45:00 +0200
Igor Mammedov  wrote:

> On Mon, 16 May 2022 16:51:34 -0400
> "Michael S. Tsirkin"  wrote:
> 
> > From: Ben Widawsky 
> > 
> > CXL host bridges themselves may have MMIO. Since host bridges don't have
> > a BAR they are treated as special for MMIO.  This patch includes
> > i386/pc support.
> > Also hook up the device reset now that we have have the MMIO
> > space in which the results are visible.
> > 
> > Note that we duplicate the PCI express case for the aml_build but
> > the implementations will diverge when the CXL specific _OSC is
> > introduced.
> > 
> > Signed-off-by: Ben Widawsky 
> > Co-developed-by: Jonathan Cameron 
> > Signed-off-by: Jonathan Cameron 
> > Reviewed-by: Alex Bennée 
> > Message-Id: <20220429144110.25167-24-jonathan.came...@huawei.com>
> > Reviewed-by: Michael S. Tsirkin 
> > Signed-off-by: Michael S. Tsirkin 
> > ---
> >  include/hw/cxl/cxl.h| 14 ++
> >  hw/i386/acpi-build.c| 25 ++-
> >  hw/i386/pc.c| 27 +++-
> >  hw/pci-bridge/pci_expander_bridge.c | 66 ++---
> >  4 files changed, 122 insertions(+), 10 deletions(-)
> > 
> > diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h
> > index 31af92fd5e..8d1a7245d0 100644
> > --- a/include/hw/cxl/cxl.h
> > +++ b/include/hw/cxl/cxl.h
> > @@ -10,6 +10,7 @@
> >  #ifndef CXL_H
> >  #define CXL_H
> >  
> > +#include "hw/pci/pci_host.h"
> >  #include "cxl_pci.h"
> >  #include "cxl_component.h"
> >  #include "cxl_device.h"
> > @@ -17,8 +18,21 @@
> >  #define CXL_COMPONENT_REG_BAR_IDX 0
> >  #define CXL_DEVICE_REG_BAR_IDX 2
> >  
> > +#define CXL_WINDOW_MAX 10
> > +
> >  typedef struct CXLState {
> >  bool is_enabled;
> > +MemoryRegion host_mr;
> > +unsigned int next_mr_idx;
> >  } CXLState;
> >  
> > +struct CXLHost {
> > +PCIHostState parent_obj;
> > +
> > +CXLComponentState cxl_cstate;
> > +};
> > +
> > +#define TYPE_PXB_CXL_HOST "pxb-cxl-host"
> > +OBJECT_DECLARE_SIMPLE_TYPE(CXLHost, PXB_CXL_HOST)
> > +
> >  #endif
> > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > index dcf6ece3d0..2d81b0f40c 100644
> > --- a/hw/i386/acpi-build.c
> > +++ b/hw/i386/acpi-build.c
> > @@ -28,6 +28,7 @@
> >  #include "qemu/bitmap.h"
> >  #include "qemu/error-report.h"
> >  #include "hw/pci/pci.h"
> > +#include "hw/cxl/cxl.h"
> >  #include "hw/core/cpu.h"
> >  #include "target/i386/cpu.h"
> >  #include "hw/misc/pvpanic.h"
> > @@ -1572,10 +1573,21 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> >  }
> >  
> >  scope = aml_scope("\\_SB");
> > -dev = aml_device("PC%.02X", bus_num);
> > +
> > +if (pci_bus_is_cxl(bus)) {
> > +dev = aml_device("CL%.02X", bus_num);
> > +} else {
> > +dev = aml_device("PC%.02X", bus_num);
> > +}
> >  aml_append(dev, aml_name_decl("_UID", aml_int(bus_num)));
> >  aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
> > -if (pci_bus_is_express(bus)) {
> > +if (pci_bus_is_cxl(bus)) {
> > +aml_append(dev, aml_name_decl("_HID", 
> > aml_eisaid("PNP0A08")));
> > +aml_append(dev, aml_name_decl("_CID", 
> > aml_eisaid("PNP0A03")));
> > +
> > +/* Expander bridges do not have ACPI PCI Hot-plug enabled 
> > */
> > +aml_append(dev, build_q35_osc_method(true));
> > +} else if (pci_bus_is_express(bus)) {
> >  aml_append(dev, aml_name_decl("_HID", 
> > aml_eisaid("PNP0A08")));
> >  aml_append(dev, aml_name_decl("_CID", 
> > aml_eisaid("PNP0A03")));
> >  
> > @@ -1595,6 +1607,15 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
> >  aml_append(dev, aml_name_decl("_CRS", crs));
> >  aml_append(scope, dev);
> >  aml_append(dsdt, scope);
> > +
> > +/* Handle the ranges for the PXB expanders */
> > +if (pci_bus_is_cxl(bus)) {
> > +MemoryRegion *mr = >cxl_devices_state->host_mr;
> > +uint64_t base = mr->addr;
> > +
> > +crs_range_insert(crs_range_set.mem_ranges, base,
> > + base + memory_region_size(mr) - 1);
> > +}
> >  }
> >  }
> >  
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 45e2d6092f..03d14f6564 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -75,6 +75,7 @@
> >  #include "acpi-build.h"
> >  #include "hw/mem/pc-dimm.h"
> >  #include "hw/mem/nvdimm.h"
> > +#include "hw/cxl/cxl.h"
> >  #include "qapi/error.h"
> >  #include "qapi/qapi-visit-common.h"
> >  #include "qapi/qapi-visit-machine.h"
> > @@ -816,6 +817,7 @@ void pc_memory_init(PCMachineState *pcms,
> >  MachineClass *mc = MACHINE_GET_CLASS(machine);
> >  PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
> >  X86MachineState *x86ms = X86_MACHINE(pcms);
> > +   

Re: misaligned-pointer-use libslirp/src/tcp_input.c

2022-06-17 Thread Thomas Huth

On 16/06/2022 21.03, Alexander Bulekov wrote:

On 220616 0930, Patrick Venture wrote:

On Thu, Jun 16, 2022 at 6:31 AM Alexander Bulekov  wrote:


Is this an --enable-sanitizers build? The virtual-device fuzzer catches



Yeah - it should be reproducible with a sanitizers build from HEAD -- I can
try to get a manual instance going again without automation to try and
reproduce it.  We're testing on v7.0.0 which is when we started seeing
this, I don't think we saw it in 6.2.0.


Here are a few reproducers (run with --enable-sanitizers):

This one complains about misalignments in ip_header, ipasfrag, qlink,
ip...

cat << EOF | ./qemu-system-i386 -display none -machine accel=qtest, -m \
512M,slots=4,maxmem=0x -machine q35 -nodefaults -device \
vmxnet3,netdev=net0 -netdev user,id=net0 -object \
memory-backend-ram,id=mem1,size=10M -device \
pc-dimm,id=nv1,memdev=mem1,addr=0xba19ff -object \
memory-backend-ram,id=mem2,size=10M -device \
pc-dimm,id=nv2,memdev=mem2,addr=0xbe53e14abaa0 -object \
memory-backend-ram,id=mem3,size=10M -device \
pc-dimm,id=nv3,memdev=mem3,addr=0xfee9cae0 -object \
memory-backend-ram,id=mem4,size=10M -device \
pc-dimm,id=nv4,memdev=mem4,addr=0xf0f0f0f -qtest stdio
outl 0xcf8 0x8810
outl 0xcfc 0xe000
outl 0xcf8 0x8814
outl 0xcfc 0xe0001000
outl 0xcf8 0x8804
outw 0xcfc 0x06
write 0x3e 0x1 0x02
write 0x39 0x1 0x20
write 0x29 0x1 0x10
write 0x2c 0x1 0x0f
write 0x2d 0x1 0x0f
write 0x2e 0x1 0x0f
write 0x2f 0x1 0x0f
write 0xf0f0f0f1012 0x1 0xfe
write 0xf0f0f0f1013 0x1 0xca
write 0xf0f0f0f1014 0x1 0xe9
write 0xf0f0f0f1017 0x1 0xfe
write 0xf0f0f0f103a 0x1 0x01
write 0xfee9cafe0009 0x1 0x40
write 0xfee9cafe0019 0x1 0x40
write 0x0 0x1 0xe1
write 0x1 0x1 0xfe
write 0x2 0x1 0xbe
write 0x3 0x1 0xba
writel 0xe0001020 0xcafe
write 0xfee9cafe0029 0x1 0x40
write 0xfee9cafe0039 0x1 0x40
write 0xfee9cafe0049 0x1 0x40
write 0xfee9cafe0059 0x1 0x40
write 0x1f65190b 0x1 0x08
write 0x1f65190d 0x1 0x46
write 0x1f65190e 0x1 0x03
write 0x1f651915 0x1 0x01
write 0xfee9cafe0069 0x1 0x40
write 0xfee9cafe0079 0x1 0x40
write 0xfee9cafe0089 0x1 0x40
write 0xfee9cafe0099 0x1 0x40
write 0xfee9cafe009d 0x1 0x10
write 0xfee9cafe00a0 0x1 0xff
write 0xfee9cafe00a1 0x1 0x18
write 0xfee9cafe00a2 0x1 0x65
write 0xfee9cafe00a3 0x1 0x1f
write 0xfee9cafe00a9 0x1 0x40
write 0xfee9cafe00ad 0x1 0x1c
write 0xe602 0x1 0x00
EOF

This one complains about misalignments in ip6_header, ip6_hdrctl...

cat << EOF | ./qemu-system-i386 -display none -machine accel=qtest, -m \
512M,slots=1,maxmem=0x -machine q35 -nodefaults -device \
vmxnet3,netdev=net0 -netdev user,id=net0 -object \
memory-backend-ram,id=mem1,size=4M -device \
pc-dimm,id=nv1,memdev=mem1,addr=0x1dd8600 -qtest stdio
outl 0xcf8 0x8810
outl 0xcfc 0xe000
outl 0xcf8 0x8814
outl 0xcfc 0xe0001000
outl 0xcf8 0x8804
outw 0xcfc 0x06
write 0x0 0x1 0xe1
write 0x1 0x1 0xfe
write 0x2 0x1 0xbe
write 0x3 0x1 0xba
write 0x3e 0x1 0x01
write 0x39 0x1 0x01
write 0x28 0x1 0x01
write 0x29 0x1 0x01
write 0x2d 0x1 0x86
write 0x2e 0x1 0xdd
write 0x2f 0x1 0x01
write 0x1dd86000112 0x1 0x10
write 0x1dd8600013c 0x1 0x02
writel 0xe0001020 0xcafe
write 0x1009 0x1 0x40
write 0x100c 0x1 0x86
write 0x100d 0x1 0xdd
write 0x1011 0x1 0x10
write 0x1019 0x1 0x7e
write 0x101d 0x1 0x10
write 0x4d56 0x1 0x02
write 0xe603 0x1 0x00
EOF


Could you please open bugs on 
https://gitlab.freedesktop.org/slirp/libslirp/-/issues so that this 
information does not get lost?


 Thomas




these periodically while fuzzing network-devices. However I don't think
OSS-Fuzz creates reports for them for some reason. I can create qtest
reproducers, if that is useful.
-Alex

On 220615 0942, Patrick Venture wrote:

Hey - I wanted to ask if someone else has seen this or has suggestions on
how to fix it in libslirp / qemu.

libslirp version: 3ad1710a96678fe79066b1469cead4058713a1d9

The blow is line:


https://gitlab.freedesktop.org/slirp/libslirp/-/blob/master/src/tcp_input.c#L310


I0614 13:44:44.3040872040 bytestream.cc:22] QEMU:
third_party/libslirp/src/tcp_input.c:310:56: runtime error: member access
within misaligned address 0x9a4000f4 for type 'struct qlink', which
requires 8 byte alignment
I0614 13:44:44.3041562040 bytestream.cc:22] QEMU: 0x9a4000f4:

note:

pointer points here
I0614 13:44:44.3041842040 bytestream.cc:22] QEMU:   00 00 00 00 00 00
00 02  20 02 0a 00 00 01 42 01  0a 00 02 02 42 01 0a 00  00 01 86 dd 60

02

dd 79
I0614 13:44:44.3042042040 bytestream.cc:22] QEMU:   ^
I0614 13:44:44.6411732040 bytestream.cc:22] QEMU: #0

0xcbe34bd8

in tcp_input third_party/libslirp/src/tcp_input.c:310:56
I0614 13:44:44.6412392040 bytestream.cc:22] QEMU: #1

0xcbe22a94

in ip6_input third_party/libslirp/src/ip6_input.c:74:9
I0614 13:44:44.6412622040 bytestream.cc:22] 

Re: [PULL 20/33] configure: handle host compiler in probe_target_compiler

2022-06-17 Thread Paolo Bonzini
Hi Matheus,

could you please test the tests-tcg-next branch at
https://gitlab.com/bonzini/qemu?

Thanks,

Paolo

On Thu, Jun 16, 2022 at 3:23 AM Alex Bennée  wrote:
>
>
> Matheus Kowalczuk Ferst  writes:
>
> > On 01/06/2022 15:05, Alex Bennée wrote:
> >> From: Paolo Bonzini 
> >>
> >> In preparation for handling more binaries than just cc, handle
> >> the case of "probe_target_compiler $cpu" directly in the function,
> >> setting the target_* variables based on the ones that are used to
> >> build QEMU.  The clang check also needs to be moved after this
> >> fallback.
> >>
> >> Signed-off-by: Paolo Bonzini 
> >> Reviewed-by: Richard Henderson 
> >> Message-Id: <20220517092616.1272238-10-pbonz...@redhat.com>
> >> Signed-off-by: Alex Bennée 
> >> Message-Id: <20220527153603.887929-21-alex.ben...@linaro.org>
> >
> > Hi,
> >
> > After this patch, a clean build in ppc64le hosts will not build
> > ppc64{,le}-linux-user tests with "make check-tcg"
> >
> >>
> >> diff --git a/configure b/configure
> >> index fbf6d39f96..217c8b3cac 100755
> >> --- a/configure
> >> +++ b/configure
> >> @@ -954,10 +954,6 @@ case $git_submodules_action in
> >>   ;;
> >>   esac
> >>
> >> -if eval test -z "\${cross_cc_$cpu}"; then
> >> -eval "cross_cc_${cpu}=\$cc"
> >> -fi
> >> -
> >>   default_target_list=""
> >>   mak_wilds=""
> >>
> >> @@ -2008,13 +2004,6 @@ probe_target_compiler() {
> >> if eval test -n "\"\${cross_cc_$1}\""; then
> >>   if eval has "\"\${cross_cc_$1}\""; then
> >> eval "target_cc=\"\${cross_cc_$1}\""
> >> -  case $1 in
> >> -i386|x86_64)
> >> -  if $target_cc --version | grep -qi "clang"; then
> >> -unset target_cc
> >> -  fi
> >> -  ;;
> >> -  esac
> >>   fi
> >> fi
> >> if eval test -n "\"\${cross_as_$1}\""; then
> >> @@ -2027,6 +2016,20 @@ probe_target_compiler() {
> >> eval "target_ld=\"\${cross_ld_$1}\""
> >>   fi
> >> fi
> >> +  if test "$1" = $cpu; then > +: ${target_cc:=$cc}
> >> +: ${target_as:=$as}
> >> +: ${target_ld:=$ld}
> >> +  fi
> >
> > $cpu is normalized[1] to ppc64 on little-endian hosts, so
> > ppc64le-linux-user will not have $target_{cc,as,ld} set, and
> > ppc64-linux-user will have them set to a toolchain that may not support
> > -mbig-endian. I suppose we have a similar problem with MIPS targets on
> > MIPS hosts.
>
> For now you can always explicitly tell configure about the host compiler
> with:
>
>  --cross-cc-ppc64le=gcc
>
> but we should fix the broken detection. It seems the var cpu has an
> overloaded meaning so I wonder if we just need an explicit host_cpu
> setting when we normalize cpu?
>
> >
> > [1]
> > https://gitlab.com/qemu-project/qemu/-/blob/2ad60f6f8c12ca0acd8834fdd70e088361b8791f/configure#L611
>
>
> --
> Alex Bennée
>

On Wed, Jun 15, 2022 at 1:57 PM Matheus Kowalczuk Ferst
 wrote:
>
> On 01/06/2022 15:05, Alex Bennée wrote:
> > From: Paolo Bonzini 
> >
> > In preparation for handling more binaries than just cc, handle
> > the case of "probe_target_compiler $cpu" directly in the function,
> > setting the target_* variables based on the ones that are used to
> > build QEMU.  The clang check also needs to be moved after this
> > fallback.
> >
> > Signed-off-by: Paolo Bonzini 
> > Reviewed-by: Richard Henderson 
> > Message-Id: <20220517092616.1272238-10-pbonz...@redhat.com>
> > Signed-off-by: Alex Bennée 
> > Message-Id: <20220527153603.887929-21-alex.ben...@linaro.org>
>
> Hi,
>
> After this patch, a clean build in ppc64le hosts will not build
> ppc64{,le}-linux-user tests with "make check-tcg"
>
> >
> > diff --git a/configure b/configure
> > index fbf6d39f96..217c8b3cac 100755
> > --- a/configure
> > +++ b/configure
> > @@ -954,10 +954,6 @@ case $git_submodules_action in
> >   ;;
> >   esac
> >
> > -if eval test -z "\${cross_cc_$cpu}"; then
> > -eval "cross_cc_${cpu}=\$cc"
> > -fi
> > -
> >   default_target_list=""
> >   mak_wilds=""
> >
> > @@ -2008,13 +2004,6 @@ probe_target_compiler() {
> > if eval test -n "\"\${cross_cc_$1}\""; then
> >   if eval has "\"\${cross_cc_$1}\""; then
> > eval "target_cc=\"\${cross_cc_$1}\""
> > -  case $1 in
> > -i386|x86_64)
> > -  if $target_cc --version | grep -qi "clang"; then
> > -unset target_cc
> > -  fi
> > -  ;;
> > -  esac
> >   fi
> > fi
> > if eval test -n "\"\${cross_as_$1}\""; then
> > @@ -2027,6 +2016,20 @@ probe_target_compiler() {
> > eval "target_ld=\"\${cross_ld_$1}\""
> >   fi
> > fi
> > +  if test "$1" = $cpu; then > +: ${target_cc:=$cc}
> > +: ${target_as:=$as}
> > +: ${target_ld:=$ld}
> > +  fi
>
> $cpu is normalized[1] to ppc64 on little-endian hosts, so
> ppc64le-linux-user will not have $target_{cc,as,ld} set, and
> ppc64-linux-user will have them set to a toolchain that may not support
> -mbig-endian. I suppose we have a similar problem with MIPS targets on
> MIPS hosts.
>
> [1]
> 

Re: Corrupted display changing screen colour depth in qemu-system-ppc/MacOS

2022-06-17 Thread Gerd Hoffmann
  Hi,

> > Can you try ditch the QEMU_ALLOCATED_FLAG check added by the commit?
> 
> Commit cb8962c146 drops the QEMU_ALLOCATED_FLAG check: if I add it back in
> with the following diff on top then everything works again:

Ah, the other way around.

> diff --git a/ui/console.c b/ui/console.c
> index 365a2c14b8..decae4287f 100644
> --- a/ui/console.c
> +++ b/ui/console.c
> @@ -2400,11 +2400,12 @@ static void vc_chr_open(Chardev *chr,
> 
>  void qemu_console_resize(QemuConsole *s, int width, int height)
>  {
> -DisplaySurface *surface;
> +DisplaySurface *surface = qemu_console_surface(s);
> 
>  assert(s->console_type == GRAPHIC_CONSOLE);
> 
> -if (qemu_console_get_width(s, -1) == width &&
> +if (surface && (surface->flags & QEMU_ALLOCATED_FLAG) &&
> +qemu_console_get_width(s, -1) == width &&
>  qemu_console_get_height(s, -1) == height) {
>  return;
>  }
> 
> > Which depth changes triggers this?  Going from direct color to a
> > paletted mode?
> 
> A quick test suggests anything that isn't 32-bit colour is affected.

Hmm, I think the commit should simply be reverted.

Short-cutting the qemu_console_resize() call is only valid in case the
current surface was created by qemu_console_resize() too.  When it is
something else -- typically a surface backed by vga vram -- it's not.
Looking at the QEMU_ALLOCATED_FLAG checks exactly that ...

take care,
  Gerd




[PATCH v2] target/ppc: cpu_init: Clean up stop state on cpu reset

2022-06-17 Thread Frederic Barrat
The 'resume_as_sreset' attribute of a cpu is set when a thread is
entering a stop state on ppc books. It causes the thread to be
re-routed to vector 0x100 when woken up by an exception. So it must be
cleared on reset or a thread might be re-routed unexpectedly after a
reset, when it was not in a stop state and/or when the appropriate
exception handler isn't set up yet.

Using skiboot, it can be tested by resetting the system when it is
quiet and most threads are idle and in stop state.

After the reset occurs, skiboot elects a primary thread and all the
others wait in secondary_wait. The primary thread does all the system
initialization from main_cpu_entry() and at some point, the
decrementer interrupt starts ticking. The exception vector for the
decrementer interrupt is in place, so that shouldn't be a
problem. However, if that primary thread was in stop state prior to
the reset, and because the resume_as_sreset parameters is still set,
it is re-routed to exception vector 0x100. Which, at that time, is
still defined as the entry point for BML. So that primary thread
restarts as new and ends up being treated like any other secondary
thread. All threads are now waiting in secondary_wait.

It results in a full system hang with no message on the console, as
the uart hasn't been init'ed yet. It's actually not obvious to realise
what's happening if not tracing reset (-d cpu_reset). The fix is
simply to clear the 'resume_as_sreset' attribute on reset.

Signed-off-by: Frederic Barrat 
---
Changelog:
v2: rework commit message


 target/ppc/cpu_init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 0f891afa04..c16cb8dbe7 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7186,6 +7186,9 @@ static void ppc_cpu_reset(DeviceState *dev)
 }
 pmu_update_summaries(env);
 }
+
+/* clean any pending stop state */
+env->resume_as_sreset = 0;
 #endif
 hreg_compute_hflags(env);
 env->reserve_addr = (target_ulong)-1ULL;
-- 
2.35.3




Re: [PATCH v2 03/10] qga: treat get-guest-fsinfo as "best effort"

2022-06-17 Thread Daniel P . Berrangé
On Thu, Jun 16, 2022 at 06:35:44PM +0400, Marc-André Lureau wrote:
> Hi
> 
> On Thu, Jun 16, 2022 at 6:27 PM John Snow  wrote:
> 
> > In some container environments, there may be references to block devices
> > witnessable from a container through /proc/self/mountinfo that reference
> > devices we simply don't have access to in the container, and could not
> > provide information about.
> >
> > Instead of failing the entire fsinfo command, return stub information
> > for these failed lookups.
> >
> > This allows test-qga to pass under docker tests, which are in turn used
> > by the CentOS VM tests.
> >
> > Signed-off-by: John Snow 
> > ---
> >  qga/commands-posix.c | 8 +++-
> >  1 file changed, 7 insertions(+), 1 deletion(-)
> >
> > diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> > index 0469dc409d4..5989d4dca9d 100644
> > --- a/qga/commands-posix.c
> > +++ b/qga/commands-posix.c
> > @@ -1207,7 +1207,13 @@ static void build_guest_fsinfo_for_device(char
> > const *devpath,
> >
> >  syspath = realpath(devpath, NULL);
> >  if (!syspath) {
> > -error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
> > +if (errno == ENOENT) {
> > +/* This devpath may not exist because of container config,
> > etc. */
> > +fprintf(stderr, "realpath(%s) returned NULL/ENOENT\n",
> > devpath);
> >
> 
> qga uses g_critical() (except for some win32 code paths atm)
> 
> 
> > +fs->name = y
> >
> 
> Hmm, maybe we should make the field optional instead.

In my own testing, this method is called in various scenarios.
Some example:

  devpath==/sys/dev/block/253:0
  syspath==/sys/devices/virtual/block/dm-0

=> fs->name == dm-0

  devpath==/sys/devices/virtual/block/dm-0/slaves/nvme0n1p4
  
syspath==/sys/devices/pci:00/:00:1d.0/:02:00.0/nvme/nvme0/nvme0n1/nvme0n1p4

=> fs->name == nvme0n1p4

  devpath==/sys/dev/block/259:2
  
syspath==/sys/devices/pci:00/:00:1d.0/:02:00.0/nvme/nvme0/nvme0n1/nvme0n1p2

=> fs->name == nvme0n1p2

We set fs->name from  basename(syspath)

If the realpath call fails, we could use  basename(devpath). That
would sometimes give the correct answer, and in other types it
would at least give the major:minor number, which an admin can
manually correlate if desired via /proc/partitions.

If we want to be really advanced, we could just open /proc/partitions
and resolve the proper name ourselves, but that's probably overkill

  basename(sysfspath)

is better than g_strdup("??\?-ENOENT")  IMHO

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [RFC PATCH 12/12] test/qtest: Add ubpf basic test case

2022-06-17 Thread Thomas Huth

On 17/06/2022 09.36, Zhang Chen wrote:

TODO: This test case does not work. Need add ubpf.h header in qtest
compile "-I ../ubpf/vm -I ../ubpf/vm/inc".
I'm not sure if we need it in qtest. Because normal tests/qtest
not including external module test case like fdt. Or we just
need a qtest case for filter-ubpf module.
This test will load pre-compiled ebpf binary and run it in QEMU.

Signed-off-by: Zhang Chen 
---

[...]

diff --git a/tests/qtest/ubpf-test.c b/tests/qtest/ubpf-test.c
new file mode 100644
index 00..6e70a99320
--- /dev/null
+++ b/tests/qtest/ubpf-test.c
@@ -0,0 +1,64 @@
+/*
+ * QEMU Userspace eBPF test case
+ *
+ * Copyright(C) 2022 Intel Corporation.
+ *
+ * Author:
+ *  Zhang Chen 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "ebpf/ubpf.h"
+
+/*
+ * Demo userspace ebpf program
+ * The test binary use clang to build this source code:
+ * demo_ubpf.c
+ *
+ * #include 
+ *
+ * static uint32_t double_it(uint32_t a)
+ * {
+ *  return (a * 2);
+ * }
+ *
+ * uint32_t bpf_prog(int32_t *arg) {
+ *   uint32_t result = 0;
+ *   result = double_it(*arg);
+ *
+ *   return result;
+ * }
+ *
+ * Build the userspace ebpf program binary file:
+ * clang -O2 -target bpf -c demo_ubpf.c -o demo_ubpf.o
+ *
+ * The external terget source:
+ * printf "%b" '\x05\x00\x00\x00' > integer_5.mem
+ *
+ */
+
+int main(int argc, char **argc)
+{
+UbpfState u_ebpf;
+char program_path[] = "demo_ubpf.o";
+/* uBPF can read target from internal source or external source*/
+char target_path[] = "integer_5.mem";
+
+qemu_ubpf_init_jit(_ebpf, true);
+
+g_assert_cmpuint(qemu_ubpf_prepare(_ebpf, program_path), ==, 0);
+
+g_assert_true(qemu_ubpf_read_target(_ebpf, target_path));
+
+g_assert_cmpuint(qemu_run_ubpf_once(_ebpf, u_ebpf.target,
+u_ebpf.target_len), ==, 10);
+
+ubpf_destroy(u_ebpf.vm);
+
+return 0;
+}


Apart from the #include "libqtest.h" there is nothing related to qtest in 
here ... should this maybe rather go into test/unit/ instead?


 Thomas




Re: [PATCH v2 09/10] tests/vm: remove duplicate 'centos' VM test

2022-06-17 Thread Thomas Huth

On 16/06/2022 16.26, John Snow wrote:

This is listed twice by accident; we require genisoimage to run the
test, so remove the unconditional entry.

Signed-off-by: John Snow 
---
  tests/vm/Makefile.include | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index 70eee2510c6..f3b7a9d299d 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -6,7 +6,7 @@ HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m))
  
  EFI_AARCH64 = $(wildcard $(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd)
  
-X86_IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64

+X86_IMAGES := freebsd netbsd openbsd fedora haiku.x86_64
  ifneq ($(GENISOIMAGE),)
  X86_IMAGES += centos
  ifneq ($(EFI_AARCH64),)


Reviewed-by: Thomas Huth 




Re: [PATCH v2 08/10] tests/vm: remove ubuntu.i386 VM test

2022-06-17 Thread Thomas Huth

On 16/06/2022 16.26, John Snow wrote:

Ubuntu 18.04 is out of our support window, and Ubuntu 20.04 does not
support i386 anymore. The debian project does, but they do not provide
any cloud images for it, a new expect-style script would have to be
written.

Since we have i386 cross-compiler tests hosted on GitLab CI, we don't
need to support this VM test anymore.

Signed-off-by: John Snow 
---
  tests/vm/Makefile.include |  3 +--
  tests/vm/ubuntu.i386  | 40 ---
  2 files changed, 1 insertion(+), 42 deletions(-)
  delete mode 100755 tests/vm/ubuntu.i386


Reviewed-by: Thomas Huth 




Re: [PATCH v2 02/10] tests/qemu-iotests: skip 108 when FUSE is not loaded

2022-06-17 Thread Thomas Huth

On 16/06/2022 16.26, John Snow wrote:

In certain container environments we may not have FUSE at all, so skip
the test in this circumstance too.

Signed-off-by: John Snow 
---
  tests/qemu-iotests/108 | 5 +
  1 file changed, 5 insertions(+)

diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
index 9e923d6a59f..54e935acf28 100755
--- a/tests/qemu-iotests/108
+++ b/tests/qemu-iotests/108
@@ -60,6 +60,11 @@ if sudo -n losetup &>/dev/null; then
  else
  loopdev=false
  
+# Check for usable FUSE in the host environment:

+if test ! -c "/dev/fuse"; then
+_notrun 'No passwordless sudo nor usable /dev/fuse'
+fi
+
  # QSD --export fuse will either yield "Parameter 'id' is missing"
  # or "Invalid parameter 'fuse'", depending on whether there is
  # FUSE support or not.


Reviewed-by: Thomas Huth 




Re: [PATCH v2 03/10] qga: treat get-guest-fsinfo as "best effort"

2022-06-17 Thread Thomas Huth

On 16/06/2022 16.43, John Snow wrote:

On Thu, Jun 16, 2022 at 10:36 AM Marc-André Lureau
 wrote:


Hi

On Thu, Jun 16, 2022 at 6:27 PM John Snow  wrote:


In some container environments, there may be references to block devices
witnessable from a container through /proc/self/mountinfo that reference
devices we simply don't have access to in the container, and could not
provide information about.

Instead of failing the entire fsinfo command, return stub information
for these failed lookups.

This allows test-qga to pass under docker tests, which are in turn used
by the CentOS VM tests.

Signed-off-by: John Snow 
---
  qga/commands-posix.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 0469dc409d4..5989d4dca9d 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -1207,7 +1207,13 @@ static void build_guest_fsinfo_for_device(char const 
*devpath,

  syspath = realpath(devpath, NULL);
  if (!syspath) {
-error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
+if (errno == ENOENT) {
+/* This devpath may not exist because of container config, etc. */
+fprintf(stderr, "realpath(%s) returned NULL/ENOENT\n", devpath);



qga uses g_critical() (except for some win32 code paths atm)


Whoops, this is a debugging thing that I left in by accident. I was
just so excited that after testing overnight, everything worked. :)





+fs->name = g_strdup("??\?-ENOENT");



Hmm, maybe we should make the field optional instead.


Does that harm compatibility in a meaningful way? I'm happy to do
whatever QGA maintainers want me to do. I just did something quick and
dirty to get it working at all as a conversation starter. O:-)


Should the device get ignored instead of returning up a dummy device? ... at 
least that's what I'd expect at a quick glance at the problem...


 Thomas




Re: [PATCH] block/rbd: support driver-specific reopen

2022-06-17 Thread Raphael Pour

Hello everyone,

what do you think? Please tell me if something needs to be clarified or 
improved.


Raphael

PS: Hopefully this second reply attempt isn't messed up (first: 
https://lists.nongnu.org/archive/html/qemu-block/2022-06/msg00344.html)


On 4/13/22 14:26, Raphael Pour wrote:

This patch completes the reopen functionality for an attached RBD where altered
driver options can be passed to. This is necessary to move RBDs between ceph
clusters without interrupting QEMU, where some ceph settings need to be 
adjusted.

The reopen_prepare method early returns if no rbd-specific driver options are
given to maintain compatible with the previous behavior by dropping all
generic block layer options. Otherwise the reopen acts similar to qemu_rbd_open.

The reopen_commit tears down the old state and replaces it with the new
one.

The reopen_abort drops an ongoing reopen.

Signed-off-by: Raphael Pour 
---
  block/rbd.c | 206 ++--
  1 file changed, 201 insertions(+), 5 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 6caf35cbba..e7b45d1c50 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1029,19 +1029,213 @@ out:
  static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
 BlockReopenQueue *queue, Error **errp)
  {
-BDRVRBDState *s = state->bs->opaque;
-int ret = 0;
+BDRVRBDState *new_s = state->bs->opaque;
+BlockdevOptionsRbd *opts = NULL;
+const QDictEntry *e;
+Error *local_err = NULL;
+char *keypairs, *secretid;
+rbd_image_info_t info;
+int r = 0;
  
-if (s->snap && state->flags & BDRV_O_RDWR) {

+if (new_s->snap && state->flags & BDRV_O_RDWR) {
  error_setg(errp,
 "Cannot change node '%s' to r/w when using RBD snapshot",
 bdrv_get_device_or_node_name(state->bs));
-ret = -EINVAL;
+r = -EINVAL;
  }
  
-return ret;

+/*
+ * Remove all keys from the generic layer which
+ * can't be converted by rbd
+ */
+qdict_del(state->options, "driver");
+qdict_del(state->options, "node-name");
+qdict_del(state->options, "auto-read-only");
+qdict_del(state->options, "discard");
+qdict_del(state->options, "cache");
+
+/*
+ * To maintain the compatibility prior the rbd-reopen,
+ * where the generic layer can be altered without any
+ * rbd argument given, we must early return if there
+ * aren't any rbd-specific options left.
+ */
+if (qdict_size(state->options) == 0) {
+return r;
+}
+
+new_s = state->opaque = g_new0(BDRVReopenState, 1);
+
+keypairs = g_strdup(qdict_get_try_str(state->options, "=keyvalue-pairs"));
+if (keypairs) {
+qdict_del(state->options, "=keyvalue-pairs");
+}
+
+secretid = g_strdup(qdict_get_try_str(state->options, "password-secret"));
+if (secretid) {
+qdict_del(state->options, "password-secret");
+}
+
+r = qemu_rbd_convert_options(state->options, , _err);
+if (local_err) {
+/*
+ * If keypairs are present, that means some options are present in
+ * the modern option format.  Don't attempt to parse legacy option
+ * formats, as we won't support mixed usage.
+ */
+if (keypairs) {
+error_propagate(errp, local_err);
+goto out;
+}
+
+/*
+ * If the initial attempt to convert and process the options failed,
+ * we may be attempting to open an image file that has the rbd options
+ * specified in the older format consisting of all key/value pairs
+ * encoded in the filename.  Go ahead and attempt to parse the
+ * filename, and see if we can pull out the required options.
+ */
+r = qemu_rbd_attempt_legacy_options(state->options, , );
+if (r < 0) {
+/*
+ * Propagate the original error, not the legacy parsing fallback
+ * error, as the latter was just a best-effort attempt.
+ */
+error_propagate(errp, local_err);
+goto out;
+}
+/*
+ * Take care whenever deciding to actually deprecate; once this ability
+ * is removed, we will not be able to open any images with 
legacy-styled
+ * backing image strings.
+ */
+warn_report("RBD options encoded in the filename as keyvalue pairs "
+"is deprecated");
+}
+
+/*
+ * Remove the processed options from the QDict (the visitor processes
+ * _all_ options in the QDict)
+ */
+while ((e = qdict_first(state->options))) {
+qdict_del(state->options, e->key);
+}
+
+r = qemu_rbd_connect(_s->cluster, _s->io_ctx, opts,
+ !(state->flags & BDRV_O_NOCACHE), keypairs,
+ secretid, errp);
+if (r < 0) {
+goto out;
+}
+
+new_s->snap = g_strdup(opts->snapshot);
+

Re: [PATCH] tests/vm: do not specify -bios option

2022-06-17 Thread Thomas Huth

On 16/06/2022 10.30, Paolo Bonzini wrote:

When running from the build tree, the executable is able to find
the BIOS on its own; when running from the source tree, a firmware
blob should already be installed and there is no guarantee that
the one in the source tree works with the QEMU that is being used for
the installation.

Just remove the -bios option, since it is unnecessary and in fact
there are other x86 VM tests that do not bother specifying it.

Signed-off-by: Paolo Bonzini 
---
  tests/vm/fedora  | 1 -
  tests/vm/freebsd | 1 -
  tests/vm/netbsd  | 1 -
  tests/vm/openbsd | 1 -
  4 files changed, 4 deletions(-)

diff --git a/tests/vm/fedora b/tests/vm/fedora
index 92b78d6e2c..12eca919a0 100755
--- a/tests/vm/fedora
+++ b/tests/vm/fedora
@@ -79,7 +79,6 @@ class FedoraVM(basevm.BaseVM):
  self.exec_qemu_img("create", "-f", "qcow2", img_tmp, self.size)
  self.print_step("Booting installer")
  self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
  "-machine", "graphics=off",
  "-device", "VGA",
  "-cdrom", iso
diff --git a/tests/vm/freebsd b/tests/vm/freebsd
index 805db759d6..cd1fabde52 100755
--- a/tests/vm/freebsd
+++ b/tests/vm/freebsd
@@ -95,7 +95,6 @@ class FreeBSDVM(basevm.BaseVM):
  
  self.print_step("Booting installer")

  self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
  "-machine", "graphics=off",
  "-device", "VGA",
  "-cdrom", iso
diff --git a/tests/vm/netbsd b/tests/vm/netbsd
index 45aa9a7fda..aa883ec23c 100755
--- a/tests/vm/netbsd
+++ b/tests/vm/netbsd
@@ -86,7 +86,6 @@ class NetBSDVM(basevm.BaseVM):
  
  self.print_step("Booting installer")

  self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
  "-machine", "graphics=off",
  "-cdrom", iso
  ])
diff --git a/tests/vm/openbsd b/tests/vm/openbsd
index 13c8254214..6f1b6f5b98 100755
--- a/tests/vm/openbsd
+++ b/tests/vm/openbsd
@@ -82,7 +82,6 @@ class OpenBSDVM(basevm.BaseVM):
  
  self.print_step("Booting installer")

  self.boot(img_tmp, extra_args = [
-"-bios", "pc-bios/bios-256k.bin",
  "-machine", "graphics=off",
  "-device", "VGA",
  "-cdrom", iso


Reviewed-by: Thomas Huth 




Re: [RFC PATCH 01/12] configure: Add iovisor/ubpf project as a submodule for QEMU

2022-06-17 Thread Daniel P . Berrangé
On Fri, Jun 17, 2022 at 03:36:19PM +0800, Zhang Chen wrote:
> Make iovisor/ubpf project be a git submodule for QEMU.
> It will auto clone ubpf project when configure QEMU.

I don't think we need todo this. As it is brand new functionality we
don't have any back compat issues. We should just expect the distros
to ship ubpf if they want their QEMU builds to take advantage of it.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [RFC PATCH v8 00/21] Net Control VQ support with asid in vDPA SVQ

2022-06-17 Thread Eugenio Perez Martin
On Fri, Jun 17, 2022 at 3:29 AM Jason Wang  wrote:
>
> On Wed, Jun 15, 2022 at 6:03 PM Eugenio Perez Martin
>  wrote:
> >
> > On Wed, Jun 15, 2022 at 5:04 AM Jason Wang  wrote:
> > >
> > > On Tue, Jun 14, 2022 at 5:32 PM Eugenio Perez Martin
> > >  wrote:
> > > >
> > > > On Tue, Jun 14, 2022 at 10:20 AM Jason Wang  wrote:
> > > > >
> > > > > On Tue, Jun 14, 2022 at 4:14 PM Eugenio Perez Martin
> > > > >  wrote:
> > > > > >
> > > > > > On Tue, Jun 14, 2022 at 10:02 AM Jason Wang  
> > > > > > wrote:
> > > > > > >
> > > > > > > On Tue, Jun 14, 2022 at 12:32 AM Eugenio Perez Martin
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > On Wed, Jun 8, 2022 at 9:28 PM Eugenio Perez Martin 
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > On Wed, Jun 8, 2022 at 7:51 AM Jason Wang 
> > > > > > > > >  wrote:
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > > > > > > > > > > Control virtqueue is used by networking device for 
> > > > > > > > > > > accepting various
> > > > > > > > > > > commands from the driver. It's a must to support 
> > > > > > > > > > > multiqueue and other
> > > > > > > > > > > configurations.
> > > > > > > > > > >
> > > > > > > > > > > Shadow VirtQueue (SVQ) already makes possible migration 
> > > > > > > > > > > of virtqueue
> > > > > > > > > > > states, effectively intercepting them so qemu can track 
> > > > > > > > > > > what regions of memory
> > > > > > > > > > > are dirty because device action and needs migration. 
> > > > > > > > > > > However, this does not
> > > > > > > > > > > solve networking device state seen by the driver because 
> > > > > > > > > > > CVQ messages, like
> > > > > > > > > > > changes on MAC addresses from the driver.
> > > > > > > > > > >
> > > > > > > > > > > To solve that, this series uses SVQ infraestructure 
> > > > > > > > > > > proposed to intercept
> > > > > > > > > > > networking control messages used by the device. This way, 
> > > > > > > > > > > qemu is able to
> > > > > > > > > > > update VirtIONet device model and to migrate it.
> > > > > > > > > > >
> > > > > > > > > > > However, to intercept all queues would slow device data 
> > > > > > > > > > > forwarding. To solve
> > > > > > > > > > > that, only the CVQ must be intercepted all the time. This 
> > > > > > > > > > > is achieved using
> > > > > > > > > > > the ASID infraestructure, that allows different 
> > > > > > > > > > > translations for different
> > > > > > > > > > > virtqueues. The most updated kernel part of ASID is 
> > > > > > > > > > > proposed at [1].
> > > > > > > > > > >
> > > > > > > > > > > You can run qemu in two modes after applying this series: 
> > > > > > > > > > > only intercepting
> > > > > > > > > > > cvq with x-cvq-svq=on or intercept all the virtqueues 
> > > > > > > > > > > adding cmdline x-svq=on:
> > > > > > > > > > >
> > > > > > > > > > > -netdev 
> > > > > > > > > > > type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,x-cvq-svq=on,x-svq=on
> > > > > > > > > > >
> > > > > > > > > > > First three patches enable the update of the virtio-net 
> > > > > > > > > > > device model for each
> > > > > > > > > > > CVQ message acknoledged by the device.
> > > > > > > > > > >
> > > > > > > > > > > Patches from 5 to 9 enables individual SVQ to copy the 
> > > > > > > > > > > buffers to QEMU's VA.
> > > > > > > > > > > This allows simplyfing the memory mapping, instead of map 
> > > > > > > > > > > all the guest's
> > > > > > > > > > > memory like in the data virtqueues.
> > > > > > > > > > >
> > > > > > > > > > > Patch 10 allows to inject control messages to the device. 
> > > > > > > > > > > This allows to set
> > > > > > > > > > > state to the device both at QEMU startup and at live 
> > > > > > > > > > > migration destination. In
> > > > > > > > > > > the future, this may also be used to emulate _F_ANNOUNCE.
> > > > > > > > > > >
> > > > > > > > > > > Patch 11 updates kernel headers, but it assign random 
> > > > > > > > > > > numbers to needed ioctls
> > > > > > > > > > > because they are still not accepted in the kernel.
> > > > > > > > > > >
> > > > > > > > > > > Patches 12-16 enables the set of the features of the net 
> > > > > > > > > > > device model to the
> > > > > > > > > > > vdpa device at device start.
> > > > > > > > > > >
> > > > > > > > > > > Last ones enables the sepparated ASID and SVQ.
> > > > > > > > > > >
> > > > > > > > > > > Comments are welcomed.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > As discussed, I think we need to split this huge series 
> > > > > > > > > > into smaller ones:
> > > > > > > > > >
> > > > > > > > > > 1) shadow CVQ only, this makes rx-filter-event work
> > > > > > > > > > 2) ASID support for CVQ
> > > > > > > > > >
> > > > > > > > > > And for 1) we need consider whether or not it could be 
> > > > > > > > > > simplified.
> > > > > > > > > >
> > > > > > > > > > Or do it in reverse order, since if we do 1) first, we may 
> > > > > > > > > > have 

[RFC PATCH 12/12] test/qtest: Add ubpf basic test case

2022-06-17 Thread Zhang Chen
TODO: This test case does not work. Need add ubpf.h header in qtest
compile "-I ../ubpf/vm -I ../ubpf/vm/inc".
I'm not sure if we need it in qtest. Because normal tests/qtest
not including external module test case like fdt. Or we just
need a qtest case for filter-ubpf module.
This test will load pre-compiled ebpf binary and run it in QEMU.

Signed-off-by: Zhang Chen 
---
 tests/qtest/demo_ubpf.o   | Bin 0 -> 544 bytes
 tests/qtest/integer_5.mem | Bin 0 -> 4 bytes
 tests/qtest/meson.build   |   3 +-
 tests/qtest/ubpf-test.c   |  64 ++
 4 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 tests/qtest/demo_ubpf.o
 create mode 100644 tests/qtest/integer_5.mem
 create mode 100644 tests/qtest/ubpf-test.c

diff --git a/tests/qtest/demo_ubpf.o b/tests/qtest/demo_ubpf.o
new file mode 100644
index 
..960a411c224348548db42d9ae2716ae3ef4ea249
GIT binary patch
literal 544
zcmb<-^>JfjWMqH=MuzVU2p#Csy$>0EHJ20>URVE5RB+`KtNZ(Wl7bhtPlwo1`
z_#a~fe9`w0b}Wvq*jzLBo(B^7Zl~EGw9{yl;y@Jrlb@VXQnfh0>yPpQj1IU
zk{R@hONvSolYn$(E{LWM&;lC6jK!!0P%$esIrOjt@j;jkO`QXj5BDdO{66uitn
o|MP)V1G3ZtWDbzc0_CIIZv+%agepQ)1eEE4qz|MHW
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "ebpf/ubpf.h"
+
+/*
+ * Demo userspace ebpf program
+ * The test binary use clang to build this source code:
+ * demo_ubpf.c
+ *
+ * #include 
+ *
+ * static uint32_t double_it(uint32_t a)
+ * {
+ *  return (a * 2);
+ * }
+ *
+ * uint32_t bpf_prog(int32_t *arg) {
+ *   uint32_t result = 0;
+ *   result = double_it(*arg);
+ *
+ *   return result;
+ * }
+ *
+ * Build the userspace ebpf program binary file:
+ * clang -O2 -target bpf -c demo_ubpf.c -o demo_ubpf.o
+ *
+ * The external terget source:
+ * printf "%b" '\x05\x00\x00\x00' > integer_5.mem
+ *
+ */
+
+int main(int argc, char **argc)
+{
+UbpfState u_ebpf;
+char program_path[] = "demo_ubpf.o";
+/* uBPF can read target from internal source or external source*/
+char target_path[] = "integer_5.mem";
+
+qemu_ubpf_init_jit(_ebpf, true);
+
+g_assert_cmpuint(qemu_ubpf_prepare(_ebpf, program_path), ==, 0);
+
+g_assert_true(qemu_ubpf_read_target(_ebpf, target_path));
+
+g_assert_cmpuint(qemu_run_ubpf_once(_ebpf, u_ebpf.target,
+u_ebpf.target_len), ==, 10);
+
+ubpf_destroy(u_ebpf.vm);
+
+return 0;
+}
-- 
2.25.1




[RFC PATCH 11/12] docs/devel: Add userspace-ebpf.rst

2022-06-17 Thread Zhang Chen
Introduce userspace ebpf basic knowledge.

Signed-off-by: Zhang Chen 
---
 docs/devel/userspace-ebpf.rst | 106 ++
 1 file changed, 106 insertions(+)
 create mode 100644 docs/devel/userspace-ebpf.rst

diff --git a/docs/devel/userspace-ebpf.rst b/docs/devel/userspace-ebpf.rst
new file mode 100644
index 00..41eb9b04d6
--- /dev/null
+++ b/docs/devel/userspace-ebpf.rst
@@ -0,0 +1,106 @@
+===
+Userspace eBPF support
+===
+
+eBPF is a revolutionary technology with origins in the Linux kernel that
+can run sandboxed programs in an operating system kernel. It is used to
+safely and efficiently extend the capabilities of the kernel without
+requiring to change kernel source code or load kernel
+modules.(from https://ebpf.io/)
+
+Recently, I worked on QEMU net filter related jobs, like netfilter/iptables
+in kernel. We noticed kernel extend the netfilter original cBPF to eBPF,
+
+It make Linux kernel have the ability to load code dynamically. Why not
+enable user space eBPF in QEMU? It can load binary eBPF program even
+when VM running. Add some hooks in QEMU as the user space eBPF load point.
+Do the things on different layers. The original idea from Jason Wang.
+
+
+That???s the advantages of kernel eBPF. Most of the functions can be
+implemented in QEMU. The Power of Programmability.
+
+1). Safety:
+
+Building on the foundation of seeing and understanding all system
+calls and combining that with a packet and socket-level view of all
+networking operations allows for revolutionary new approaches to
+securing systems.
+
+2). Tracing & Profiling:
+
+The ability to attach eBPF programs to trace points as well as kernel
+and user application probe points allows unprecedented visibility into
+the runtime behavior of applications and the system itself.
+
+3). Networking:
+
+The combination of programmability and efficiency makes eBPF a natural
+fit for all packet processing requirements of networking solutions.
+
+4). Observability & Monitoring:
+
+Instead of relying on static counters and gauges exposed by the
+perating system, eBPF enables the collection & in-kernel aggregation
+of custom metrics and generation of visibility events based on a wide
+range of possible sources.
+
+Qemu userspace ebpf design based on ubpf project 
(https://github.com/iovisor/ubpf).
+The most mature userspace ebpf implementation. This project officially
+support by iovisor(Like BCC and bpftrace). Qemu userspace ebpf make
+the ubpf project as the git submodule.
+
+Current implementation support load ebpf program and run it in
+filter-ubpf module, developer can easy reuse the ubpf function in
+Qemu's other modules from the function in /ebpf/ubpf.c, And it support JIT.
+For the uBPF License is Apache License 2.0, It's OK to compatible
+with QEMU???s GPLv2 LICENSE same as mason.
+
+How to use it:
+1. Write your ebpf C program. For example filter dst IP:
+
+bpf_filter.c
+
+#include 
+#include 
+
+#define ONE_ONE_ONE_ONE 0x01010101
+
+struct ipv4_header {
+uint8_t ver_ihl;
+uint8_t tos;
+uint16_t total_length;
+uint16_t id;
+uint16_t frag;
+uint8_t ttl;
+uint8_t proto;
+uint16_t csum;
+uint32_t src;
+uint32_t dst;
+};
+
+int is_dst_one_one_one_one(void *opaque) {
+struct ipv4_header *ipv4_header = (struct ipv4_header*)opaque;
+
+if (ntohl(ipv4_header->dst) == ONE_ONE_ONE_ONE) {
+return 1;
+}
+
+return 0;
+}
+
+2. Build it with clang:
+  clang -O2 -target bpf -c bpf_filter.c -o ip_dst.o
+
+3. Load it with Qemu filter-ubpf:
+  -object filter-ubpf,netdev=hn0,id=ubpf1,queue=tx,ip-mode=on,
+   ubpf-handler=ip_dst.o
+
+4. Boot the VM and it will filt IP dst 1.1.1.1 packet.
+
+
+TODO: Need to add more comments and test-case for ubpf, current
+  implementation not include ebpf verifier. Qemu is a userspace
+  program, not like kernel ebpf run code in kernel space, I think
+  if the someone want to hack Qemu code no need to load a malicious
+  ubpf program, he can hack Qemu code directly.
-- 
2.25.1




[RFC PATCH 07/12] net/filter: Introduce filter-ubpf module

2022-06-17 Thread Zhang Chen
The filter-ubpf module able to load user defined ebpf program
to handle network packet based on filter framework.

Signed-off-by: Zhang Chen 
---
 net/filter-ubpf.c | 149 ++
 net/meson.build   |   1 +
 2 files changed, 150 insertions(+)
 create mode 100644 net/filter-ubpf.c

diff --git a/net/filter-ubpf.c b/net/filter-ubpf.c
new file mode 100644
index 00..c63a021759
--- /dev/null
+++ b/net/filter-ubpf.c
@@ -0,0 +1,149 @@
+/*
+ * QEMU Userspace eBPF Support
+ *
+ * Copyright(C) 2022 Intel Corporation.
+ *
+ * Author:
+ *  Zhang Chen 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "net/filter.h"
+#include "net/net.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "ebpf/ubpf.h"
+
+#define TYPE_FILTER_UBPF "filter-ubpf"
+OBJECT_DECLARE_SIMPLE_TYPE(FiliterUbpfState, FILTER_UBPF)
+
+struct FiliterUbpfState {
+NetFilterState parent_obj;
+bool ip_mode;
+char *handler;
+UbpfState ubpf;
+};
+
+static ssize_t filter_ubpf_receive_iov(NetFilterState *nf,
+   NetClientState *sender,
+   unsigned flags,
+   const struct iovec *iov,
+   int iovcnt,
+   NetPacketSent *sent_cb)
+{
+/* TODO: handle packet by loaded userspace ebpf program */
+
+return 0;
+}
+
+static void filter_ubpf_cleanup(NetFilterState *nf)
+{
+/* cleanup */
+}
+
+static void filter_ubpf_setup(NetFilterState *nf, Error **errp)
+{
+FiliterUbpfState *s = FILTER_UBPF(nf);
+
+if (s->handler == NULL) {
+error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, "filter-ubpf parameter"\
+  " 'ubpf-handler' cannot be empty");
+return;
+}
+
+qemu_ubpf_init_jit(>ubpf, true);
+
+if (qemu_ubpf_prepare(>ubpf, s->handler)) {
+error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, "filter-ubpf parameter"\
+  " 'ubpf-handler' cannot be load");
+return;
+}
+}
+
+static char *filter_ubpf_get_handler(Object *obj, Error **errp)
+{
+FiliterUbpfState *s = FILTER_UBPF(obj);
+
+return g_strdup(s->handler);
+}
+
+static void filter_ubpf_set_handler(Object *obj,
+const char *value,
+Error **errp)
+{
+FiliterUbpfState *s = FILTER_UBPF(obj);
+
+g_free(s->handler);
+s->handler = g_strdup(value);
+if (!s->handler) {
+error_setg(errp, "filter ubpf needs 'ubpf-handler' "
+   "property set");
+return;
+}
+}
+
+static bool filter_ubpf_get_mode(Object *obj, Error **errp)
+{
+FiliterUbpfState *s = FILTER_UBPF(obj);
+
+return s->ip_mode;
+}
+
+static void filter_ubpf_set_mode(Object *obj, bool value, Error **errp)
+{
+FiliterUbpfState *s = FILTER_UBPF(obj);
+
+s->ip_mode = value;
+}
+
+static void filter_ubpf_class_init(ObjectClass *oc, void *data)
+{
+NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+object_class_property_add_str(oc, "ubpf-handler",
+  filter_ubpf_get_handler,
+  filter_ubpf_set_handler);
+object_class_property_add_bool(oc, "ip-mode",
+   filter_ubpf_get_mode,
+   filter_ubpf_set_mode);
+
+nfc->setup = filter_ubpf_setup;
+nfc->cleanup = filter_ubpf_cleanup;
+nfc->receive_iov = filter_ubpf_receive_iov;
+}
+
+static void filter_ubpf_init(Object *obj)
+{
+FiliterUbpfState *s = FILTER_UBPF(obj);
+
+/* Filter-ubpf default is ip_mode */
+s->ip_mode = true;
+}
+
+static void filter_ubpf_fini(Object *obj)
+{
+/* do some thing */
+}
+
+static const TypeInfo filter_ubpf_info = {
+.name = TYPE_FILTER_UBPF,
+.parent = TYPE_NETFILTER,
+.class_init = filter_ubpf_class_init,
+.instance_init = filter_ubpf_init,
+.instance_finalize = filter_ubpf_fini,
+.instance_size = sizeof(FiliterUbpfState),
+};
+
+static void register_types(void)
+{
+type_register_static(_ubpf_info);
+}
+
+type_init(register_types);
diff --git a/net/meson.build b/net/meson.build
index 754e2d1d40..177078fa7a 100644
--- a/net/meson.build
+++ b/net/meson.build
@@ -14,6 +14,7 @@ softmmu_ss.add(files(
   'queue.c',
   'socket.c',
   'util.c',
+  'filter-ubpf.c',
 ))
 
 softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('filter-replay.c'))
-- 
2.25.1




[RFC PATCH 08/12] qapi: Add FilterUbpfProperties and qemu-options

2022-06-17 Thread Zhang Chen
Add filter-ubpf related QOM and qemu-options.

Signed-off-by: Zhang Chen 
---
 qapi/qom.json   | 18 ++
 qemu-options.hx |  6 ++
 2 files changed, 24 insertions(+)

diff --git a/qapi/qom.json b/qapi/qom.json
index 6a653c6636..820a5218e8 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -444,6 +444,22 @@
   'base': 'NetfilterProperties',
   'data': { '*vnet_hdr_support': 'bool' } }
 
+##
+# @FilterUbpfProperties:
+#
+# Properties for filter-ubpf objects.
+#
+# @ip-mode: if true, IP packet handle mode is enabled(default: true).
+#
+# @ubpf-handler: The filename where the userspace ebpf packets handler.
+#
+# Since: 7.1
+##
+{ 'struct': 'FilterUbpfProperties',
+  'base': 'NetfilterProperties',
+  'data': { '*ip-mode': 'bool',
+'*ubpf-handler': 'str' } }
+
 ##
 # @InputBarrierProperties:
 #
@@ -845,6 +861,7 @@
 'filter-redirector',
 'filter-replay',
 'filter-rewriter',
+'filter-ubpf',
 'input-barrier',
 { 'name': 'input-linux',
   'if': 'CONFIG_LINUX' },
@@ -911,6 +928,7 @@
   'filter-redirector':  'FilterRedirectorProperties',
   'filter-replay':  'NetfilterProperties',
   'filter-rewriter':'FilterRewriterProperties',
+  'filter-ubpf':'FilterUbpfProperties',
   'input-barrier':  'InputBarrierProperties',
   'input-linux':{ 'type': 'InputLinuxProperties',
   'if': 'CONFIG_LINUX' },
diff --git a/qemu-options.hx b/qemu-options.hx
index 60cf188da4..3dfb858867 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -5080,6 +5080,12 @@ SRST
 stored. The file format is libpcap, so it can be analyzed with
 tools such as tcpdump or Wireshark.
 
+``-object 
filter-ubpf,id=id,netdev=dev,ubpf-handler=filename[,ip-mode][,position=head|tail|id=][,insert=behind|before]``
+filter-ubpf is the userspace ebpf network traffic handler on netdev dev
+from the userspace ebpf handler file specified by filename.
+If disable ip_mode, the loaded ebpf program will handle raw
+network packet.
+
 ``-object 
colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}][,max_queue_size=@var{size}]``
 Colo-compare gets packet from primary\_in chardevid and
 secondary\_in, then compare whether the payload of primary packet
-- 
2.25.1




[RFC PATCH 05/12] ebpf/uBPF: Add qemu_prepare_ubpf to load ebpf binary

2022-06-17 Thread Zhang Chen
The qemu_prepare_ubpf() can load user defined userspace ebpf binary
file to Qemu userspace ebpf VM but not run it. The ebpf program
will triggered in the hook point.

Signed-off-by: Zhang Chen 
---
 ebpf/ubpf-stub.c |   5 +++
 ebpf/ubpf.c  | 100 +++
 ebpf/ubpf.h  |   1 +
 3 files changed, 106 insertions(+)

diff --git a/ebpf/ubpf-stub.c b/ebpf/ubpf-stub.c
index 2e8bf15b91..885bd954b7 100644
--- a/ebpf/ubpf-stub.c
+++ b/ebpf/ubpf-stub.c
@@ -22,3 +22,8 @@ bool qemu_ubpf_read_target(UbpfState *u_ebpf, char *path)
 }
 
 void qemu_ubpf_init_jit(UbpfState *u_ebpf, bool jit) {}
+
+int qemu_ubpf_prepare(UbpfState *u_ebpf, char *code_path)
+{
+return 0;
+}
diff --git a/ebpf/ubpf.c b/ebpf/ubpf.c
index 38a6530903..d65fffeda3 100644
--- a/ebpf/ubpf.c
+++ b/ebpf/ubpf.c
@@ -99,3 +99,103 @@ void qemu_ubpf_init_jit(UbpfState *u_ebpf, bool jit)
 {
 u_ebpf->jit = jit;
 }
+
+static uint64_t gather_bytes(uint8_t a, uint8_t b, uint8_t c,
+ uint8_t d, uint8_t e)
+{
+return ((uint64_t)a << 32) |
+   ((uint32_t)b << 24) |
+   ((uint32_t)c << 16) |
+   ((uint16_t)d << 8) |
+   e;
+}
+
+static void trash_registers(void)
+{
+/* Overwrite all caller-save registers */
+asm(
+"mov $0xf0, %rax;"
+"mov $0xf1, %rcx;"
+"mov $0xf2, %rdx;"
+"mov $0xf3, %rsi;"
+"mov $0xf4, %rdi;"
+"mov $0xf5, %r8;"
+"mov $0xf6, %r9;"
+"mov $0xf7, %r10;"
+"mov $0xf8, %r11;"
+);
+}
+
+static uint32_t sqrti(uint32_t x)
+{
+return sqrt(x);
+}
+
+static uint64_t unwind(uint64_t i)
+{
+return i;
+}
+
+static void register_functions(struct ubpf_vm *vm)
+{
+ubpf_register(vm, 0, "gather_bytes", gather_bytes);
+ubpf_register(vm, 1, "memfrob", memfrob);
+ubpf_register(vm, 2, "trash_registers", trash_registers);
+ubpf_register(vm, 3, "sqrti", sqrti);
+ubpf_register(vm, 4, "strcmp_ext", strcmp);
+ubpf_register(vm, 5, "unwind", unwind);
+ubpf_set_unwind_function_index(vm, 5);
+}
+
+int qemu_ubpf_prepare(UbpfState *u_ebpf, char *code_path)
+{
+bool is_elf;
+char *errmsg;
+int ret;
+
+if (!qemu_ubpf_read_code(u_ebpf, code_path)) {
+error_report("Ubpf failed to read code");
+return -1;
+}
+
+u_ebpf->vm = ubpf_create();
+if (!u_ebpf->vm) {
+error_report("Failed to create ubpf VM");
+return -1;
+}
+
+register_functions(u_ebpf->vm);
+
+/*
+ * The ELF magic corresponds to an RSH instruction with an offset,
+ * which is invalid.
+ */
+ is_elf = u_ebpf->code_len >= SELFMAG && !memcmp(u_ebpf->code,
+ ELFMAG, SELFMAG);
+
+if (is_elf) {
+ret = ubpf_load_elf(u_ebpf->vm, u_ebpf->code,
+u_ebpf->code_len, );
+} else {
+ret = ubpf_load(u_ebpf->vm, u_ebpf->code,
+u_ebpf->code_len, );
+}
+
+if (ret < 0) {
+error_report("Failed to load ubpf code: %s ", errmsg);
+free(errmsg);
+ubpf_destroy(u_ebpf->vm);
+return -1;
+}
+
+if (u_ebpf->jit) {
+u_ebpf->fn = ubpf_compile(u_ebpf->vm, );
+if (u_ebpf->fn == NULL) {
+error_report("Failed to ubpf compile: %s", errmsg);
+free(errmsg);
+return -1;
+}
+}
+
+return 0;
+}
diff --git a/ebpf/ubpf.h b/ebpf/ubpf.h
index 808c02565c..9a35efbeb6 100644
--- a/ebpf/ubpf.h
+++ b/ebpf/ubpf.h
@@ -37,5 +37,6 @@ typedef struct UbpfState {
 bool qemu_ubpf_read_code(UbpfState *u_ebpf, char *path);
 bool qemu_ubpf_read_target(UbpfState *u_ebpf, char *path);
 void qemu_ubpf_init_jit(UbpfState *u_ebpf, bool jit);
+int qemu_ubpf_prepare(UbpfState *u_ebpf, char *code_path);
 
 #endif /* QEMU_UBPF_H */
-- 
2.25.1




  1   2   >