[PATCH v2 10/12] hw/sd/sdcard: Simplify sd_inactive_state handling

2024-06-24 Thread Philippe Mathieu-Daudé
Card entering sd_inactive_state powers off, and won't respond
anymore. Handle that once when entering sd_do_command().

Remove condition always true in sd_cmd_GO_IDLE_STATE().

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/sd/sd.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 36955189e8..fce99d655d 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -1072,10 +1072,8 @@ static sd_rsp_type_t sd_cmd_unimplemented(SDState *sd, 
SDRequest req)
 /* CMD0 */
 static sd_rsp_type_t sd_cmd_GO_IDLE_STATE(SDState *sd, SDRequest req)
 {
-if (sd->state != sd_inactive_state) {
-sd->state = sd_idle_state;
-sd_reset(DEVICE(sd));
-}
+sd->state = sd_idle_state;
+sd_reset(DEVICE(sd));
 
 return sd_is_spi(sd) ? sd_r1 : sd_r0;
 }
@@ -1570,7 +1568,6 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, 
SDRequest req)
 switch (sd->state) {
 case sd_ready_state:
 case sd_identification_state:
-case sd_inactive_state:
 return sd_illegal;
 case sd_idle_state:
 if (rca) {
@@ -1791,6 +1788,11 @@ int sd_do_command(SDState *sd, SDRequest *req,
 return 0;
 }
 
+if (sd->state == sd_inactive_state) {
+rtype = sd_illegal;
+goto send_response;
+}
+
 if (sd_req_crc_validate(req)) {
 sd->card_status |= COM_CRC_ERROR;
 rtype = sd_illegal;
-- 
2.41.0




[PATCH v2 09/12] hw/sd/sdcard: Assign SDCardStates enum values

2024-06-24 Thread Philippe Mathieu-Daudé
SDCardStates enum values are specified, so assign them
correspondingly. It will be useful later when we add
states from later specs, which might not be continuous.

See CURRENT_STATE bits in section 4.10.1 "Card Status".

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/sd/sd.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 8816bd6671..36955189e8 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -76,16 +76,16 @@ enum SDCardModes {
 };
 
 enum SDCardStates {
-sd_inactive_state = -1,
-sd_idle_state = 0,
-sd_ready_state,
-sd_identification_state,
-sd_standby_state,
-sd_transfer_state,
-sd_sendingdata_state,
-sd_receivingdata_state,
-sd_programming_state,
-sd_disconnect_state,
+sd_inactive_state   = -1,
+sd_idle_state   = 0,
+sd_ready_state  = 1,
+sd_identification_state = 2,
+sd_standby_state= 3,
+sd_transfer_state   = 4,
+sd_sendingdata_state= 5,
+sd_receivingdata_state  = 6,
+sd_programming_state= 7,
+sd_disconnect_state = 8,
 };
 
 typedef sd_rsp_type_t (*sd_cmd_handler)(SDState *sd, SDRequest req);
-- 
2.41.0




[PATCH v2 08/12] hw/sd/sdcard: Use READY_FOR_DATA definition instead of magic value

2024-06-24 Thread Philippe Mathieu-Daudé
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/sd/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 0742ba8b38..8816bd6671 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -557,7 +557,7 @@ FIELD(CSR, OUT_OF_RANGE,   31,  1)
 
 static void sd_set_cardstatus(SDState *sd)
 {
-sd->card_status = 0x0100;
+sd->card_status = READY_FOR_DATA;
 }
 
 static void sd_set_sdstatus(SDState *sd)
-- 
2.41.0




[PATCH v2 06/12] hw/sd/sdcard: Send WRITE_PROT bits MSB first (CMD30)

2024-06-24 Thread Philippe Mathieu-Daudé
Per sections 3.6.1 (SD Bus Protocol) and 7.3.2 (Responses):

  In the CMD line the Most Significant Bit is transmitted first.

Use the stl_be_p() helper to store the value in big-endian.

Signed-off-by: Philippe Mathieu-Daudé 
Tested-by: Cédric Le Goater 
---
RFC because I'm surprised this has been unnoticed for 17 years
(commit a1bb27b1e9 "initial SD card emulation", April 2007).

Cc: Peter Maydell 
---
 hw/sd/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 0f8440efcc..b604b8e71f 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -1498,7 +1498,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, 
SDRequest req)
 }
 
 sd->state = sd_sendingdata_state;
-*(uint32_t *) sd->data = sd_wpbits(sd, req.arg);
+stl_be_p(sd->data, sd_wpbits(sd, req.arg));
 sd->data_start = addr;
 sd->data_offset = 0;
 return sd_r1;
-- 
2.41.0




[PATCH v2 04/12] hw/sd/sdcard: Trace block offset in READ/WRITE data accesses

2024-06-24 Thread Philippe Mathieu-Daudé
Useful to detect out of bound accesses.

Signed-off-by: Philippe Mathieu-Daudé 
Tested-by: Cédric Le Goater 
---
 hw/sd/sd.c | 4 ++--
 hw/sd/trace-events | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 14bfcc5d6b..e4587a0a37 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -1913,7 +1913,7 @@ void sd_write_byte(SDState *sd, uint8_t value)
 
 trace_sdcard_write_data(sd_proto(sd)->name,
 sd->last_cmd_name,
-sd->current_cmd, value);
+sd->current_cmd, sd->data_offset, value);
 switch (sd->current_cmd) {
 case 24:  /* CMD24:  WRITE_SINGLE_BLOCK */
 sd->data[sd->data_offset ++] = value;
@@ -2069,7 +2069,7 @@ uint8_t sd_read_byte(SDState *sd)
 
 trace_sdcard_read_data(sd_proto(sd)->name,
sd->last_cmd_name,
-   sd->current_cmd, io_len);
+   sd->current_cmd, sd->data_offset, io_len);
 switch (sd->current_cmd) {
 case 6:  /* CMD6:   SWITCH_FUNCTION */
 ret = sd->data[sd->data_offset ++];
diff --git a/hw/sd/trace-events b/hw/sd/trace-events
index 724365efc3..0eee98a646 100644
--- a/hw/sd/trace-events
+++ b/hw/sd/trace-events
@@ -52,8 +52,8 @@ sdcard_lock(void) ""
 sdcard_unlock(void) ""
 sdcard_read_block(uint64_t addr, uint32_t len) "addr 0x%" PRIx64 " size 0x%x"
 sdcard_write_block(uint64_t addr, uint32_t len) "addr 0x%" PRIx64 " size 0x%x"
-sdcard_write_data(const char *proto, const char *cmd_desc, uint8_t cmd, 
uint8_t value) "%s %20s/ CMD%02d value 0x%02x"
-sdcard_read_data(const char *proto, const char *cmd_desc, uint8_t cmd, 
uint32_t length) "%s %20s/ CMD%02d len %" PRIu32
+sdcard_write_data(const char *proto, const char *cmd_desc, uint8_t cmd, 
uint32_t offset, uint8_t value) "%s %20s/ CMD%02d ofs %"PRIu32" value 0x%02x"
+sdcard_read_data(const char *proto, const char *cmd_desc, uint8_t cmd, 
uint32_t offset, uint32_t length) "%s %20s/ CMD%02d ofs %"PRIu32" len %" PRIu32
 sdcard_set_voltage(uint16_t millivolts) "%u mV"
 
 # pxa2xx_mmci.c
-- 
2.41.0




[PATCH v2 12/12] hw/sd/sdcard: Add direct reference to SDProto in SDState

2024-06-24 Thread Philippe Mathieu-Daudé
Keep direct reference to SDProto in SDState,
remove then unnecessary sd_proto().

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/sd/sd.c | 37 +
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 3b885ba8a0..6685fba4bb 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -117,6 +117,8 @@ struct SDState {
 uint8_t spec_version;
 BlockBackend *blk;
 
+const SDProto *proto;
+
 /* Runtime changeables */
 
 uint32_t mode;/* current card mode, one of SDCardModes */
@@ -155,18 +157,11 @@ struct SDState {
 
 static void sd_realize(DeviceState *dev, Error **errp);
 
-static const struct SDProto *sd_proto(SDState *sd)
-{
-SDCardClass *sc = SD_CARD_GET_CLASS(sd);
-
-return sc->proto;
-}
-
 static const SDProto sd_proto_spi;
 
 static bool sd_is_spi(SDState *sd)
 {
-return sd_proto(sd) == _proto_spi;
+return sd->proto == _proto_spi;
 }
 
 static const char *sd_version_str(enum SDPhySpecificationVersion version)
@@ -1035,7 +1030,7 @@ static bool address_in_range(SDState *sd, const char 
*desc,
 static sd_rsp_type_t sd_invalid_state_for_cmd(SDState *sd, SDRequest req)
 {
 qemu_log_mask(LOG_GUEST_ERROR, "%s: CMD%i in a wrong state: %s (spec 
%s)\n",
-  sd_proto(sd)->name, req.cmd, sd_state_name(sd->state),
+  sd->proto->name, req.cmd, sd_state_name(sd->state),
   sd_version_str(sd->spec_version));
 
 return sd_illegal;
@@ -1044,7 +1039,7 @@ static sd_rsp_type_t sd_invalid_state_for_cmd(SDState 
*sd, SDRequest req)
 static sd_rsp_type_t sd_invalid_mode_for_cmd(SDState *sd, SDRequest req)
 {
 qemu_log_mask(LOG_GUEST_ERROR, "%s: CMD%i in a wrong mode: %s (spec %s)\n",
-  sd_proto(sd)->name, req.cmd, sd_mode_name(sd->mode),
+  sd->proto->name, req.cmd, sd_mode_name(sd->mode),
   sd_version_str(sd->spec_version));
 
 return sd_illegal;
@@ -1053,7 +1048,7 @@ static sd_rsp_type_t sd_invalid_mode_for_cmd(SDState *sd, 
SDRequest req)
 static sd_rsp_type_t sd_cmd_illegal(SDState *sd, SDRequest req)
 {
 qemu_log_mask(LOG_GUEST_ERROR, "%s: Unknown CMD%i for spec %s\n",
-  sd_proto(sd)->name, req.cmd,
+  sd->proto->name, req.cmd,
   sd_version_str(sd->spec_version));
 
 return sd_illegal;
@@ -1064,7 +1059,7 @@ __attribute__((unused))
 static sd_rsp_type_t sd_cmd_unimplemented(SDState *sd, SDRequest req)
 {
 qemu_log_mask(LOG_UNIMP, "%s: CMD%i not implemented\n",
-  sd_proto(sd)->name, req.cmd);
+  sd->proto->name, req.cmd);
 
 return sd_illegal;
 }
@@ -1157,7 +1152,7 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, 
SDRequest req)
  * However there is no ACMD55, so we want to trace this particular case.
  */
 if (req.cmd != 55 || sd->expecting_acmd) {
-trace_sdcard_normal_command(sd_proto(sd)->name,
+trace_sdcard_normal_command(sd->proto->name,
 sd->last_cmd_name, req.cmd,
 req.arg, sd_state_name(sd->state));
 }
@@ -1176,8 +1171,8 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, 
SDRequest req)
 return sd_illegal;
 }
 
-if (sd_proto(sd)->cmd[req.cmd]) {
-return sd_proto(sd)->cmd[req.cmd](sd, req);
+if (sd->proto->cmd[req.cmd]) {
+return sd->proto->cmd[req.cmd](sd, req);
 }
 
 switch (req.cmd) {
@@ -1623,12 +1618,12 @@ static sd_rsp_type_t sd_app_command(SDState *sd,
 SDRequest req)
 {
 sd->last_cmd_name = sd_acmd_name(req.cmd);
-trace_sdcard_app_command(sd_proto(sd)->name, sd->last_cmd_name,
+trace_sdcard_app_command(sd->proto->name, sd->last_cmd_name,
  req.cmd, req.arg, sd_state_name(sd->state));
 sd->card_status |= APP_CMD;
 
-if (sd_proto(sd)->acmd[req.cmd]) {
-return sd_proto(sd)->acmd[req.cmd](sd, req);
+if (sd->proto->acmd[req.cmd]) {
+return sd->proto->acmd[req.cmd](sd, req);
 }
 
 switch (req.cmd) {
@@ -1919,7 +1914,7 @@ void sd_write_byte(SDState *sd, uint8_t value)
 if (sd->card_status & (ADDRESS_ERROR | WP_VIOLATION))
 return;
 
-trace_sdcard_write_data(sd_proto(sd)->name,
+trace_sdcard_write_data(sd->proto->name,
 sd->last_cmd_name,
 sd->current_cmd, sd->data_offset, value);
 switch (sd->current_cmd) {
@@ -2074,7 +2069,7 @@ uint8_t sd_read_byte(SDState *sd)
 
 io_len = (sd->ocr & (1 << 30)) ? 512 : sd->blk_len;
 
-trace_sdcard_read_data(sd_proto(sd)->name,
+trace_sdcard_read_data(sd->proto->name,
sd->last_cmd_name,
sd->current_cmd, sd->data_offset, io_len);
 switch (sd->current_cmd) {
@@ -2218,7 +2213,9 @@ static const SDProto sd_proto_sd = {
 static void sd_instance_init(Object *obj)
 {
 

[PATCH v2 07/12] hw/sd/sdcard: Send NUM_WR_BLOCKS bits MSB first (ACMD22)

2024-06-24 Thread Philippe Mathieu-Daudé
Per sections 3.6.1 (SD Bus Protocol), 4.3.4 "Data Write"
and 7.3.2 (Responses):

  In the CMD line the Most Significant Bit is transmitted first.

Use the stl_be_p() helper to store the value in big-endian.

Signed-off-by: Philippe Mathieu-Daudé 
Tested-by: Cédric Le Goater 
---
RFC because I'm surprised this has been unnoticed for 17 years
(commit a1bb27b1e9 "initial SD card emulation", April 2007).

Cc: Peter Maydell 
---
 hw/sd/sd.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index b604b8e71f..0742ba8b38 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -1659,8 +1659,7 @@ static sd_rsp_type_t sd_app_command(SDState *sd,
 case 22:  /* ACMD22: SEND_NUM_WR_BLOCKS */
 switch (sd->state) {
 case sd_transfer_state:
-*(uint32_t *) sd->data = sd->blk_written;
-
+stl_be_p(sd->data, sd->blk_written);
 sd->state = sd_sendingdata_state;
 sd->data_start = 0;
 sd->data_offset = 0;
-- 
2.41.0




[PATCH v2 01/12] tests/qtest: Disable npcm7xx_sdhci tests using hardcoded RCA

2024-06-24 Thread Philippe Mathieu-Daudé
Disable tests using 0x4567 hardcoded RCA otherwise when
using random RCA we get:

  ERROR:../../tests/qtest/npcm7xx_sdhci-test.c:69:write_sdread: assertion 
failed: (ret == len)
  not ok /arm/npcm7xx_sdhci/read_sd - 
ERROR:../../tests/qtest/npcm7xx_sdhci-test.c:69:write_sdread: assertion failed: 
(ret == len)
  Bail out!

See 
https://lore.kernel.org/qemu-devel/37f83be9-deb5-42a1-b704-14984351d...@linaro.org/

Signed-off-by: Philippe Mathieu-Daudé 
---
Cc: Hao Wu 
Cc: Shengtan Mao 
Cc: Tyrone Ting 
---
 tests/qtest/npcm7xx_sdhci-test.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/tests/qtest/npcm7xx_sdhci-test.c b/tests/qtest/npcm7xx_sdhci-test.c
index 5d68540e52..6a42b142ad 100644
--- a/tests/qtest/npcm7xx_sdhci-test.c
+++ b/tests/qtest/npcm7xx_sdhci-test.c
@@ -44,6 +44,7 @@ static QTestState *setup_sd_card(void)
 sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0x4120, 0, (41 << 8));
 sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0, 0, SDHC_ALL_SEND_CID);
 sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0, 0, SDHC_SEND_RELATIVE_ADDR);
+g_test_skip("hardcoded 0x4567 card address");
 sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0x4567, 0,
SDHC_SELECT_DESELECT_CARD);
 
@@ -76,6 +77,9 @@ static void test_read_sd(void)
 {
 QTestState *qts = setup_sd_card();
 
+g_test_skip("hardcoded 0x4567 card address used in setup_sd_card()");
+return;
+
 write_sdread(qts, "hello world");
 write_sdread(qts, "goodbye");
 
@@ -108,6 +112,9 @@ static void test_write_sd(void)
 {
 QTestState *qts = setup_sd_card();
 
+g_test_skip("hardcoded 0x4567 card address used in setup_sd_card()");
+return;
+
 sdwrite_read(qts, "hello world");
 sdwrite_read(qts, "goodbye");
 
-- 
2.41.0




[PATCH v2 11/12] hw/sd/sdcard: Restrict SWITCH_FUNCTION to sd_transfer_state (CMD6)

2024-06-24 Thread Philippe Mathieu-Daudé
SWITCH_FUNCTION is only allowed in TRANSFER state
(See 4.8 "Card State Transition Table).

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/sd/sd.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index fce99d655d..3b885ba8a0 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -1196,6 +1196,10 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, 
SDRequest req)
 if (sd->mode != sd_data_transfer_mode) {
 return sd_invalid_mode_for_cmd(sd, req);
 }
+if (sd->state != sd_transfer_state) {
+return sd_invalid_state_for_cmd(sd, req);
+}
+
 sd_function_switch(sd, req.arg);
 sd->state = sd_sendingdata_state;
 sd->data_start = 0;
-- 
2.41.0




[PATCH v2 03/12] hw/sd/sdcard: Track last command used to help logging

2024-06-24 Thread Philippe Mathieu-Daudé
The command is selected on the I/O lines, and further
processing might be done on the DAT lines via the
sd_read_byte() and sd_write_byte() handlers. Since
these methods can't distinct between normal and APP
commands, keep the name of the current command in
the SDState and use it in the DAT handlers. This
fixes a bug that all normal commands were displayed
as APP commands.

Fixes: 2ed61fb57b ("sdcard: Display command name when tracing CMD/ACMD")
Signed-off-by: Philippe Mathieu-Daudé 
Tested-by: Cédric Le Goater 
---
 hw/sd/sd.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index ec58c5e2a6..14bfcc5d6b 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -134,6 +134,7 @@ struct SDState {
 uint32_t pwd_len;
 uint8_t function_group[6];
 uint8_t current_cmd;
+const char *last_cmd_name;
 /* True if we will handle the next command as an ACMD. Note that this does
  * *not* track the APP_CMD status bit!
  */
@@ -1150,12 +1151,13 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, 
SDRequest req)
 uint16_t rca;
 uint64_t addr;
 
+sd->last_cmd_name = sd_cmd_name(req.cmd);
 /* CMD55 precedes an ACMD, so we are not interested in tracing it.
  * However there is no ACMD55, so we want to trace this particular case.
  */
 if (req.cmd != 55 || sd->expecting_acmd) {
 trace_sdcard_normal_command(sd_proto(sd)->name,
-sd_cmd_name(req.cmd), req.cmd,
+sd->last_cmd_name, req.cmd,
 req.arg, sd_state_name(sd->state));
 }
 
@@ -1616,7 +1618,8 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, 
SDRequest req)
 static sd_rsp_type_t sd_app_command(SDState *sd,
 SDRequest req)
 {
-trace_sdcard_app_command(sd_proto(sd)->name, sd_acmd_name(req.cmd),
+sd->last_cmd_name = sd_acmd_name(req.cmd);
+trace_sdcard_app_command(sd_proto(sd)->name, sd->last_cmd_name,
  req.cmd, req.arg, sd_state_name(sd->state));
 sd->card_status |= APP_CMD;
 
@@ -1909,7 +1912,7 @@ void sd_write_byte(SDState *sd, uint8_t value)
 return;
 
 trace_sdcard_write_data(sd_proto(sd)->name,
-sd_acmd_name(sd->current_cmd),
+sd->last_cmd_name,
 sd->current_cmd, value);
 switch (sd->current_cmd) {
 case 24:  /* CMD24:  WRITE_SINGLE_BLOCK */
@@ -2065,7 +2068,7 @@ uint8_t sd_read_byte(SDState *sd)
 io_len = (sd->ocr & (1 << 30)) ? 512 : sd->blk_len;
 
 trace_sdcard_read_data(sd_proto(sd)->name,
-   sd_acmd_name(sd->current_cmd),
+   sd->last_cmd_name,
sd->current_cmd, io_len);
 switch (sd->current_cmd) {
 case 6:  /* CMD6:   SWITCH_FUNCTION */
@@ -2210,6 +2213,7 @@ static void sd_instance_init(Object *obj)
 {
 SDState *sd = SD_CARD(obj);
 
+sd->last_cmd_name = "UNSET";
 sd->enable = true;
 sd->ocr_power_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sd_ocr_powerup, sd);
 }
-- 
2.41.0




[PATCH v2 02/12] hw/sd/sdcard: Generate random RCA value

2024-06-24 Thread Philippe Mathieu-Daudé
Rather than using the obscure 0x4567 magic value,
use a real random one.

Signed-off-by: Philippe Mathieu-Daudé 
Tested-by: Cédric Le Goater 
---
 hw/sd/sd.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index a48010cfc1..ec58c5e2a6 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -46,6 +46,7 @@
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
 #include "qemu/log.h"
+#include "qemu/guest-random.h"
 #include "qemu/module.h"
 #include "sdmmc-internal.h"
 #include "trace.h"
@@ -490,11 +491,6 @@ static void sd_set_csd(SDState *sd, uint64_t size)
 
 /* Relative Card Address register */
 
-static void sd_set_rca(SDState *sd)
-{
-sd->rca += 0x4567;
-}
-
 static uint16_t sd_req_get_rca(SDState *s, SDRequest req)
 {
 if (sd_cmd_type[req.cmd] == sd_ac || sd_cmd_type[req.cmd] == sd_adtc) {
@@ -1107,7 +1103,7 @@ static sd_rsp_type_t sd_cmd_SEND_RELATIVE_ADDR(SDState 
*sd, SDRequest req)
 case sd_identification_state:
 case sd_standby_state:
 sd->state = sd_standby_state;
-sd_set_rca(sd);
+qemu_guest_getrandom_nofail(>rca, sizeof(sd->rca));
 return sd_r6;
 
 default:
-- 
2.41.0




[PATCH v2 05/12] hw/sd/sdcard: Do not store vendor data on block drive (CMD56)

2024-06-24 Thread Philippe Mathieu-Daudé
"General command" (GEN_CMD, CMD56) is described as:

  GEN_CMD is the same as the single block read or write
  commands (CMD24 or CMD17). The difference is that [...]
  the data block is not a memory payload data but has a
  vendor specific format and meaning.

Thus this block must not be stored overwriting data block
on underlying storage drive. Keep it in a dedicated
'vendor_data[]' array.

Signed-off-by: Philippe Mathieu-Daudé 
Tested-by: Cédric Le Goater 
---
RFC: Is it safe to reuse VMSTATE_UNUSED_V() (which happens
to be the same size)?

Cc: Peter Xu 
Cc: Fabiano Rosas 
---
 hw/sd/sd.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index e4587a0a37..0f8440efcc 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -143,6 +143,8 @@ struct SDState {
 uint64_t data_start;
 uint32_t data_offset;
 uint8_t data[512];
+uint8_t vendor_data[512];
+
 qemu_irq readonly_cb;
 qemu_irq inserted_cb;
 QEMUTimer *ocr_power_timer;
@@ -647,6 +649,7 @@ static void sd_reset(DeviceState *dev)
 sd->wp_switch = sd->blk ? !blk_is_writable(sd->blk) : false;
 sd->wp_group_bits = sect;
 sd->wp_group_bmap = bitmap_new(sd->wp_group_bits);
+memset(sd->vendor_data, 0xec, sizeof(sd->vendor_data));
 memset(sd->function_group, 0, sizeof(sd->function_group));
 sd->erase_start = INVALID_ADDRESS;
 sd->erase_end = INVALID_ADDRESS;
@@ -762,7 +765,7 @@ static const VMStateDescription sd_vmstate = {
 VMSTATE_UINT64(data_start, SDState),
 VMSTATE_UINT32(data_offset, SDState),
 VMSTATE_UINT8_ARRAY(data, SDState, 512),
-VMSTATE_UNUSED_V(1, 512),
+VMSTATE_UINT8_ARRAY(vendor_data, SDState, 512),
 VMSTATE_BOOL(enable, SDState),
 VMSTATE_END_OF_LIST()
 },
@@ -2020,9 +2023,8 @@ void sd_write_byte(SDState *sd, uint8_t value)
 break;
 
 case 56:  /* CMD56:  GEN_CMD */
-sd->data[sd->data_offset ++] = value;
-if (sd->data_offset >= sd->blk_len) {
-APP_WRITE_BLOCK(sd->data_start, sd->data_offset);
+sd->vendor_data[sd->data_offset ++] = value;
+if (sd->data_offset >= sizeof(sd->vendor_data)) {
 sd->state = sd_transfer_state;
 }
 break;
@@ -2156,12 +2158,11 @@ uint8_t sd_read_byte(SDState *sd)
 break;
 
 case 56:  /* CMD56:  GEN_CMD */
-if (sd->data_offset == 0)
-APP_READ_BLOCK(sd->data_start, sd->blk_len);
-ret = sd->data[sd->data_offset ++];
+ret = sd->vendor_data[sd->data_offset ++];
 
-if (sd->data_offset >= sd->blk_len)
+if (sd->data_offset >= sizeof(sd->vendor_data)) {
 sd->state = sd_transfer_state;
+}
 break;
 
 default:
-- 
2.41.0




[PATCH v2 00/12] hw/sd/sdcard: Accumulation of cleanups and fixes

2024-06-24 Thread Philippe Mathieu-Daudé
Since v1:
- various patches merged, few more added

Various SD card cleanups and fixes accumulated over
the years. Various have been useful to help integrating
eMMC support (which will come later).

Philippe Mathieu-Daudé (12):
  tests/qtest: Disable npcm7xx_sdhci tests using hardcoded RCA
  hw/sd/sdcard: Generate random RCA value
  hw/sd/sdcard: Track last command used to help logging
  hw/sd/sdcard: Trace block offset in READ/WRITE data accesses
  hw/sd/sdcard: Do not store vendor data on block drive (CMD56)
  hw/sd/sdcard: Send WRITE_PROT bits MSB first (CMD30)
  hw/sd/sdcard: Send NUM_WR_BLOCKS bits MSB first (ACMD22)
  hw/sd/sdcard: Use READY_FOR_DATA definition instead of magic value
  hw/sd/sdcard: Assign SDCardStates enum values
  hw/sd/sdcard: Simplify sd_inactive_state handling
  hw/sd/sdcard: Restrict SWITCH_FUNCTION to sd_transfer_state (CMD6)
  hw/sd/sdcard: Add direct reference to SDProto in SDState

 hw/sd/sd.c   | 119 ---
 tests/qtest/npcm7xx_sdhci-test.c |   7 ++
 hw/sd/trace-events   |   4 +-
 3 files changed, 70 insertions(+), 60 deletions(-)

-- 
2.41.0




[PATCH 07/13] target/arm: Convert BFDOT to decodetree

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |  2 ++
 target/arm/tcg/translate-a64.c | 20 +---
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 8a0251f83c..6819fd2587 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -950,6 +950,7 @@ SQRDMLSH_v  0.10 1110 ..0 . 10001 1 . . 
@qrrr_e
 SDOT_v  0.00 1110 100 . 10010 1 . . @qrrr_s
 UDOT_v  0.10 1110 100 . 10010 1 . . @qrrr_s
 USDOT_v 0.00 1110 100 . 10011 1 . . @qrrr_s
+BFDOT_v 0.10 1110 010 . 1 1 . . @qrrr_s
 
 ### Advanced SIMD scalar x indexed element
 
@@ -1029,6 +1030,7 @@ SDOT_vi 0.00  10 ..  1110 . 0 . . 
  @qrrx_s
 UDOT_vi 0.10  10 ..  1110 . 0 . .   @qrrx_s
 SUDOT_vi0.00  00 ..   . 0 . .   @qrrx_s
 USDOT_vi0.00  10 ..   . 0 . .   @qrrx_s
+BFDOT_vi0.00  01 ..   . 0 . .   @qrrx_s
 
 # Floating-point conditional select
 
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 9a658ca876..0f44cd5aee 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5604,6 +5604,7 @@ static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
+TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot)
 
 /*
  * Advanced SIMD scalar/vector x indexed element
@@ -5942,6 +5943,8 @@ TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
gen_helper_gvec_sudot_idx_b)
 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
gen_helper_gvec_usdot_idx_b)
+TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx, a,
+   gen_helper_gvec_bfdot_idx)
 
 /*
  * Advanced SIMD scalar pairwise
@@ -10951,11 +10954,11 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 break;
 case 0x1f:
 switch (size) {
-case 1: /* BFDOT */
 case 3: /* BFMLAL{B,T} */
 feature = dc_isar_feature(aa64_bf16, s);
 break;
 default:
+case 1: /* BFDOT */
 unallocated_encoding(s);
 return;
 }
@@ -11036,9 +11039,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 return;
 case 0xf:
 switch (size) {
-case 1: /* BFDOT */
-gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, 
gen_helper_gvec_bfdot);
-break;
 case 3: /* BFMLAL{B,T} */
 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
   gen_helper_gvec_bfmlal);
@@ -12053,13 +12053,6 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 break;
 case 0x0f:
 switch (size) {
-case 1: /* BFDOT */
-if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
-unallocated_encoding(s);
-return;
-}
-size = MO_32;
-break;
 case 3: /* BFMLAL{B,T} */
 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
 unallocated_encoding(s);
@@ -12070,6 +12063,7 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 break;
 default:
 case 0: /* SUDOT */
+case 1: /* BFDOT */
 case 2: /* USDOT */
 unallocated_encoding(s);
 return;
@@ -12179,10 +12173,6 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 switch (16 * u + opcode) {
 case 0x0f:
 switch (extract32(insn, 22, 2)) {
-case 1: /* BFDOT */
-gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
- gen_helper_gvec_bfdot_idx);
-return;
 case 3: /* BFMLAL{B,T} */
 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
   gen_helper_gvec_bfmlal_idx);
-- 
2.34.1




[PATCH 06/13] target/arm: Convert SUDOT, USDOT to decodetree

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |  3 +++
 target/arm/tcg/translate-a64.c | 35 --
 2 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 7411d4ba97..8a0251f83c 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -949,6 +949,7 @@ SQRDMLSH_v  0.10 1110 ..0 . 10001 1 . . 
@qrrr_e
 
 SDOT_v  0.00 1110 100 . 10010 1 . . @qrrr_s
 UDOT_v  0.10 1110 100 . 10010 1 . . @qrrr_s
+USDOT_v 0.00 1110 100 . 10011 1 . . @qrrr_s
 
 ### Advanced SIMD scalar x indexed element
 
@@ -1026,6 +1027,8 @@ SQRDMLSH_vi 0.10  10 ..   . 0 . . 
  @qrrx_s
 
 SDOT_vi 0.00  10 ..  1110 . 0 . .   @qrrx_s
 UDOT_vi 0.10  10 ..  1110 . 0 . .   @qrrx_s
+SUDOT_vi0.00  00 ..   . 0 . .   @qrrx_s
+USDOT_vi0.00  10 ..   . 0 . .   @qrrx_s
 
 # Floating-point conditional select
 
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index f2e7d8d75c..9a658ca876 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5603,6 +5603,7 @@ static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
 
 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
+TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
 
 /*
  * Advanced SIMD scalar/vector x indexed element
@@ -5937,6 +5938,10 @@ static bool do_dot_vector_idx(DisasContext *s, 
arg_qrrx_e *a,
 
 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
+TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
+   gen_helper_gvec_sudot_idx_b)
+TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
+   gen_helper_gvec_usdot_idx_b)
 
 /*
  * Advanced SIMD scalar pairwise
@@ -10914,13 +10919,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 int rot;
 
 switch (u * 16 + opcode) {
-case 0x03: /* USDOT */
-if (size != MO_32) {
-unallocated_encoding(s);
-return;
-}
-feature = dc_isar_feature(aa64_i8mm, s);
-break;
 case 0x04: /* SMMLA */
 case 0x14: /* UMMLA */
 case 0x05: /* USMMLA */
@@ -10964,6 +10962,7 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 break;
 default:
 case 0x02: /* SDOT (vector) */
+case 0x03: /* USDOT */
 case 0x10: /* SQRDMLAH (vector) */
 case 0x11: /* SQRDMLSH (vector) */
 case 0x12: /* UDOT (vector) */
@@ -10979,10 +10978,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 }
 
 switch (opcode) {
-case 0x3: /* USDOT */
-gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
-return;
-
 case 0x04: /* SMMLA, UMMLA */
 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
  u ? gen_helper_gvec_ummla_b
@@ -12058,14 +12053,6 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 break;
 case 0x0f:
 switch (size) {
-case 0: /* SUDOT */
-case 2: /* USDOT */
-if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) {
-unallocated_encoding(s);
-return;
-}
-size = MO_32;
-break;
 case 1: /* BFDOT */
 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
 unallocated_encoding(s);
@@ -12082,6 +12069,8 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 size = MO_16;
 break;
 default:
+case 0: /* SUDOT */
+case 2: /* USDOT */
 unallocated_encoding(s);
 return;
 }
@@ -12190,18 +12179,10 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 switch (16 * u + opcode) {
 case 0x0f:
 switch (extract32(insn, 22, 2)) {
-case 0: /* SUDOT */
-gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
- gen_helper_gvec_sudot_idx_b);
-return;
 case 1: /* BFDOT */
 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
  gen_helper_gvec_bfdot_idx);
 return;
-case 2: /* USDOT */
-gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index,
- gen_helper_gvec_usdot_idx_b);
-return;
 case 3: /* BFMLAL{B,T} */
 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q,
   gen_helper_gvec_bfmlal_idx);
-- 
2.34.1




Re: [PATCH V13 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code

2024-06-24 Thread Harsh Prateek Bora

+qemu-devel, qemu-ppc

Ping!

On 6/17/24 15:18, Harsh Prateek Bora wrote:


+ MST, Igor - to help with early review/merge. TIA.

On 6/14/24 16:06, Salil Mehta wrote:

Hello


  From: Harsh Prateek Bora 
  Sent: Friday, June 14, 2024 6:24 AM
  Hi Paolo, Nick,
  Can this patch 1/8 be merged earlier provided we have got 
sufficient R-bys

  for it and the review of entire series may take a longer time?
  We have some ppc64 patches based on it, hence the ask.
  Hi Salil,
  I am hoping we are not expecting anymore changes to this patch, please
  confirm.



I do not expect any change. I had requested Michael to merge the complete
series as it is stranding other users. He then requested Igor to take 
a final look but
he has not reverted yet. I'll remind Michael again. BTW, can you reply 
to below
patch explicitly indicating your interest in the series so that MST 
knows who else

are the stake holders here

https://lore.kernel.org/qemu-devel/20240605160327.3c71f...@imammedo.users.ipa.redhat.com/


Hi Paolo,

A request, would it be possible to skim through this series from KVM 
perspective?
(although nothing has changed which will affect the KVM and this is 
architecture

agnostic patch-set)

Many thanks!

Best
Salil.



  regards,
  Harsh
  On 6/7/24 17:26, Salil Mehta wrote:
  > KVM vCPU creation is done once during the vCPU realization when Qemu
  > vCPU thread is spawned. This is common to all the architectures 
as of now.

  >
  > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
  > but the corresponding KVM vCPU object in the Host KVM is not 
destroyed
  > as KVM doesn't support vCPU removal. Therefore, its 
representative KVM

  > vCPU object/context in Qemu is parked.
  >
  > Refactor architecture common logic so that some APIs could be reused
  > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
  > Update new/old APIs with trace events. No functional change is 
intended

  here.
  >
  > Signed-off-by: Salil Mehta 
  > Reviewed-by: Gavin Shan 
  > Tested-by: Vishnu Pajjuri 
  > Reviewed-by: Jonathan Cameron 
  > Tested-by: Xianglai Li 
  > Tested-by: Miguel Luis 
  > Reviewed-by: Shaoqin Huang 
  > Reviewed-by: Vishnu Pajjuri 
  > Reviewed-by: Nicholas Piggin 
  > Tested-by: Zhao Liu 
  > Reviewed-by: Zhao Liu 
  > Reviewed-by: Harsh Prateek Bora 
  > ---
  >   accel/kvm/kvm-all.c    | 95 


  --
  >   accel/kvm/kvm-cpus.h   |  1 -
  >   accel/kvm/trace-events |  5 ++-
  >   include/sysemu/kvm.h   | 25 +++
  >   4 files changed, 92 insertions(+), 34 deletions(-)
  >
  > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
  > c0be9f5eed..8f9128bb92 100644
  > --- a/accel/kvm/kvm-all.c
  > +++ b/accel/kvm/kvm-all.c
  > @@ -340,14 +340,71 @@ err:
  >   return ret;
  >   }
  >
  > +void kvm_park_vcpu(CPUState *cpu)
  > +{
  > +    struct KVMParkedVcpu *vcpu;
  > +
  > +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
  > +
  > +    vcpu = g_malloc0(sizeof(*vcpu));
  > +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
  > +    vcpu->kvm_fd = cpu->kvm_fd;
  > +    QLIST_INSERT_HEAD(_state->kvm_parked_vcpus, vcpu, node); }
  > +
  > +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) {
  > +    struct KVMParkedVcpu *cpu;
  > +    int kvm_fd = -ENOENT;
  > +
  > +    QLIST_FOREACH(cpu, >kvm_parked_vcpus, node) {
  > +    if (cpu->vcpu_id == vcpu_id) {
  > +    QLIST_REMOVE(cpu, node);
  > +    kvm_fd = cpu->kvm_fd;
  > +    g_free(cpu);
  > +    }
  > +    }
  > +
  > +    trace_kvm_unpark_vcpu(vcpu_id, kvm_fd > 0 ? "unparked" : "not
  > + found parked");
  > +
  > +    return kvm_fd;
  > +}
  > +
  > +int kvm_create_vcpu(CPUState *cpu)
  > +{
  > +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
  > +    KVMState *s = kvm_state;
  > +    int kvm_fd;
  > +
  > +    /* check if the KVM vCPU already exist but is parked */
  > +    kvm_fd = kvm_unpark_vcpu(s, vcpu_id);
  > +    if (kvm_fd < 0) {
  > +    /* vCPU not parked: create a new KVM vCPU */
  > +    kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
  > +    if (kvm_fd < 0) {
  > +    error_report("KVM_CREATE_VCPU IOCTL failed for vCPU 
%lu",

  vcpu_id);
  > +    return kvm_fd;
  > +    }
  > +    }
  > +
  > +    cpu->kvm_fd = kvm_fd;
  > +    cpu->kvm_state = s;
  > +    cpu->vcpu_dirty = true;
  > +    cpu->dirty_pages = 0;
  > +    cpu->throttle_us_per_full = 0;
  > +
  > +    trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd);
  > +
  > +    return 0;
  > +}
  > +
  >   static int do_kvm_destroy_vcpu(CPUState *cpu)
  >   {
  >   KVMState *s = kvm_state;
  >   long mmap_size;
  > -    struct KVMParkedVcpu *vcpu = NULL;
  >   int ret = 0;
  >
  > -    trace_kvm_destroy_vcpu();
  > +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
  >
  >   ret = kvm_arch_destroy_vcpu(cpu);
  >   if (ret < 0) {
  > @@ -373,10 +430,7 @@ static int 

[PATCH 12/13] target/arm: Convert FCMLA to decodetree

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |   5 +
 target/arm/tcg/translate-a64.c | 241 ++---
 2 files changed, 76 insertions(+), 170 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index f330919851..4b2a6ba302 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -960,6 +960,8 @@ USMMLA  0100 1110 100 . 10101 1 . . 
@rrr_q1e0
 FCADD_900.10 1110 ..0 . 11100 1 . . @qrrr_e
 FCADD_270   0.10 1110 ..0 . 0 1 . . @qrrr_e
 
+FCMLA_v 0 q:1 10 1110 esz:2 0 rm:5 110 rot:2 1 rn:5 rd:5
+
 ### Advanced SIMD scalar x indexed element
 
 FMUL_si 0101  00 ..  1001 . 0 . .   @rrx_h
@@ -1041,6 +1043,9 @@ USDOT_vi0.00  10 ..   . 0 . . 
  @qrrx_s
 BFDOT_vi0.00  01 ..   . 0 . .   @qrrx_s
 BFMLAL_vi   0.00  11 ..   . 0 . .   @qrrx_h
 
+FCMLA_vi0 q:1 10  10 . rm:5 0 rot:2 1 . 0 rn:5 rd:5 esz=1 idx=%hl
+FCMLA_vi0 q:1 10  01 0 rm:5 0 rot:2 1 idx:1 0 rn:5 rd:5 esz=2
+
 # Floating-point conditional select
 
 FCSEL   0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index a1b338263f..0a54a9ef8f 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5631,6 +5631,39 @@ static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = 
{
 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
 
+static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
+{
+gen_helper_gvec_4_ptr *fn;
+
+if (!dc_isar_feature(aa64_fcma, s)) {
+return false;
+}
+switch (a->esz) {
+case MO_64:
+if (!a->q) {
+return false;
+}
+fn = gen_helper_gvec_fcmlad;
+break;
+case MO_32:
+fn = gen_helper_gvec_fcmlas;
+break;
+case MO_16:
+if (!dc_isar_feature(aa64_fp16, s)) {
+return false;
+}
+fn = gen_helper_gvec_fcmlah;
+break;
+default:
+return false;
+}
+if (fp_access_check(s)) {
+gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
+  a->esz == MO_16, a->rot, fn);
+}
+return true;
+}
+
 /*
  * Advanced SIMD scalar/vector x indexed element
  */
@@ -5985,6 +6018,36 @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e 
*a)
 return true;
 }
 
+static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
+{
+gen_helper_gvec_4_ptr *fn;
+
+if (!dc_isar_feature(aa64_fcma, s)) {
+return false;
+}
+switch (a->esz) {
+case MO_16:
+if (!dc_isar_feature(aa64_fp16, s)) {
+return false;
+}
+fn = gen_helper_gvec_fcmlah_idx;
+break;
+case MO_32:
+if (!a->q && a->idx) {
+return false;
+}
+fn = gen_helper_gvec_fcmlas_idx;
+break;
+default:
+g_assert_not_reached();
+}
+if (fp_access_check(s)) {
+gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
+  a->esz == MO_16, (a->idx << 2) | a->rot, fn);
+}
+return true;
+}
+
 /*
  * Advanced SIMD scalar pairwise
  */
@@ -10942,90 +11005,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, 
uint32_t insn)
 }
 }
 
-/* AdvSIMD three same extra
- *  31   30  29 28   24 23  22  21 20  16  15 1411  10 9  5 4  0
- * +---+---+---+---+--+---+--+---++---+++
- * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
- * +---+---+---+---+--+---+--+---++---+++
- */
-static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
-{
-int rd = extract32(insn, 0, 5);
-int rn = extract32(insn, 5, 5);
-int opcode = extract32(insn, 11, 4);
-int rm = extract32(insn, 16, 5);
-int size = extract32(insn, 22, 2);
-bool u = extract32(insn, 29, 1);
-bool is_q = extract32(insn, 30, 1);
-bool feature;
-int rot;
-
-switch (u * 16 + opcode) {
-case 0x18: /* FCMLA, #0 */
-case 0x19: /* FCMLA, #90 */
-case 0x1a: /* FCMLA, #180 */
-case 0x1b: /* FCMLA, #270 */
-if (size == 0
-|| (size == 1 && !dc_isar_feature(aa64_fp16, s))
-|| (size == 3 && !is_q)) {
-unallocated_encoding(s);
-return;
-}
-feature = dc_isar_feature(aa64_fcma, s);
-break;
-default:
-case 0x02: /* SDOT (vector) */
-case 0x03: /* USDOT */
-case 0x04: /* SMMLA */
-case 0x05: /* USMMLA */
-case 0x10: /* SQRDMLAH (vector) */
-case 0x11: /* SQRDMLSH (vector) */
-case 0x12: /* UDOT (vector) */
-case 0x14: /* UMMLA */
-case 0x1c: /* FCADD, #90 */
-

[PATCH 05/13] target/arm: Convert SDOT, UDOT to decodetree

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |  7 +
 target/arm/tcg/translate-a64.c | 54 ++
 2 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 613cc9365c..7411d4ba97 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -61,6 +61,7 @@
 
 @qrrr_b . q:1 .. ... rm:5 .. rn:5 rd:5  _e esz=0
 @qrrr_h . q:1 .. ... rm:5 .. rn:5 rd:5  _e esz=1
+@qrrr_s . q:1 .. ... rm:5 .. rn:5 rd:5  _e esz=2
 @qrrr_sd. q:1 .. ... rm:5 .. rn:5 rd:5  _e esz=%esz_sd
 @qrrr_e . q:1 .. esz:2 . rm:5 .. rn:5 rd:5  _e
 @qr2r_e . q:1 .. esz:2 . . .. rm:5 rd:5 _e rn=%rd
@@ -946,6 +947,9 @@ SQRDMULH_v  0.10 1110 ..1 . 10110 1 . . 
@qrrr_e
 SQRDMLAH_v  0.10 1110 ..0 . 1 1 . . @qrrr_e
 SQRDMLSH_v  0.10 1110 ..0 . 10001 1 . . @qrrr_e
 
+SDOT_v  0.00 1110 100 . 10010 1 . . @qrrr_s
+UDOT_v  0.10 1110 100 . 10010 1 . . @qrrr_s
+
 ### Advanced SIMD scalar x indexed element
 
 FMUL_si 0101  00 ..  1001 . 0 . .   @rrx_h
@@ -1020,6 +1024,9 @@ SQRDMLAH_vi 0.10  10 ..  1101 . 0 . . 
  @qrrx_s
 SQRDMLSH_vi 0.10  01 ..   . 0 . .   @qrrx_h
 SQRDMLSH_vi 0.10  10 ..   . 0 . .   @qrrx_s
 
+SDOT_vi 0.00  10 ..  1110 . 0 . .   @qrrx_s
+UDOT_vi 0.10  10 ..  1110 . 0 . .   @qrrx_s
+
 # Floating-point conditional select
 
 FCSEL   0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 32c24c7422..f2e7d8d75c 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5592,6 +5592,18 @@ TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, 
gen_gvec_sqrdmulh_qc)
 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc)
 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc)
 
+static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a,
+  gen_helper_gvec_4 *fn)
+{
+if (fp_access_check(s)) {
+gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn);
+}
+return true;
+}
+
+TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b)
+TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
+
 /*
  * Advanced SIMD scalar/vector x indexed element
  */
@@ -5914,6 +5926,18 @@ static gen_helper_gvec_4 * const 
f_vector_idx_sqrdmlsh[2] = {
 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a,
f_vector_idx_sqrdmlsh)
 
+static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a,
+  gen_helper_gvec_4 *fn)
+{
+if (fp_access_check(s)) {
+gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn);
+}
+return true;
+}
+
+TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b)
+TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b)
+
 /*
  * Advanced SIMD scalar pairwise
  */
@@ -10890,14 +10914,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 int rot;
 
 switch (u * 16 + opcode) {
-case 0x02: /* SDOT (vector) */
-case 0x12: /* UDOT (vector) */
-if (size != MO_32) {
-unallocated_encoding(s);
-return;
-}
-feature = dc_isar_feature(aa64_dp, s);
-break;
 case 0x03: /* USDOT */
 if (size != MO_32) {
 unallocated_encoding(s);
@@ -10947,8 +10963,10 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 }
 break;
 default:
+case 0x02: /* SDOT (vector) */
 case 0x10: /* SQRDMLAH (vector) */
 case 0x11: /* SQRDMLSH (vector) */
+case 0x12: /* UDOT (vector) */
 unallocated_encoding(s);
 return;
 }
@@ -10961,11 +10979,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 }
 
 switch (opcode) {
-case 0x2: /* SDOT / UDOT */
-gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0,
- u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
-return;
-
 case 0x3: /* USDOT */
 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b);
 return;
@@ -12043,13 +12056,6 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 case 0x0b: /* SQDMULL, SQDMULL2 */
 is_long = true;
 break;
-case 0x0e: /* SDOT */
-case 0x1e: /* UDOT */
-if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) {
-unallocated_encoding(s);
-return;
-}
-break;
 case 0x0f:
 switch (size) {
 case 

[PATCH 04/13] target/arm: Convert SQRDMLAH, SQRDMLSH to decodetree

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper.h|  10 ++
 target/arm/tcg/a64.decode  |  16 +++
 target/arm/tcg/translate-a64.c | 206 +
 target/arm/tcg/vec_helper.c|  72 
 4 files changed, 180 insertions(+), 124 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index eca2043fc2..970d059dec 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -979,6 +979,16 @@ DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmlah_idx_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmlsh_idx_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 2b7a3254a0..613cc9365c 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -781,6 +781,8 @@ CMEQ_s  0111 1110 111 . 10001 1 . . 
@rrr_d
 
 SQDMULH_s   0101 1110 ..1 . 10110 1 . . @rrr_e
 SQRDMULH_s  0111 1110 ..1 . 10110 1 . . @rrr_e
+SQRDMLAH_s  0111 1110 ..0 . 1 1 . . @rrr_e
+SQRDMLSH_s  0111 1110 ..0 . 10001 1 . . @rrr_e
 
 ### Advanced SIMD scalar pairwise
 
@@ -941,6 +943,8 @@ MLS_v   0.10 1110 ..1 . 10010 1 . . 
@qrrr_e
 
 SQDMULH_v   0.00 1110 ..1 . 10110 1 . . @qrrr_e
 SQRDMULH_v  0.10 1110 ..1 . 10110 1 . . @qrrr_e
+SQRDMLAH_v  0.10 1110 ..0 . 1 1 . . @qrrr_e
+SQRDMLSH_v  0.10 1110 ..0 . 10001 1 . . @qrrr_e
 
 ### Advanced SIMD scalar x indexed element
 
@@ -966,6 +970,12 @@ SQDMULH_si  0101  10 ..  1100 . 0 . .  
 @rrx_s
 SQRDMULH_si 0101  01 ..  1101 . 0 . .   @rrx_h
 SQRDMULH_si 0101  10 . . 1101 . 0 . .   @rrx_s
 
+SQRDMLAH_si 0111  01 ..  1101 . 0 . .   @rrx_h
+SQRDMLAH_si 0111  10 ..  1101 . 0 . .   @rrx_s
+
+SQRDMLSH_si 0111  01 ..   . 0 . .   @rrx_h
+SQRDMLSH_si 0111  10 ..   . 0 . .   @rrx_s
+
 ### Advanced SIMD vector x indexed element
 
 FMUL_vi 0.00  00 ..  1001 . 0 . .   @qrrx_h
@@ -1004,6 +1014,12 @@ SQDMULH_vi  0.00  10 . . 1100 . 0 . 
.   @qrrx_s
 SQRDMULH_vi 0.00  01 ..  1101 . 0 . .   @qrrx_h
 SQRDMULH_vi 0.00  10 . . 1101 . 0 . .   @qrrx_s
 
+SQRDMLAH_vi 0.10  01 ..  1101 . 0 . .   @qrrx_h
+SQRDMLAH_vi 0.10  10 ..  1101 . 0 . .   @qrrx_s
+
+SQRDMLSH_vi 0.10  01 ..   . 0 . .   @qrrx_h
+SQRDMLSH_vi 0.10  10 ..   . 0 . .   @qrrx_s
+
 # Floating-point conditional select
 
 FCSEL   0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 93543da39c..32c24c7422 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5235,6 +5235,43 @@ static const ENVScalar2 f_scalar_sqrdmulh = {
 };
 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, _scalar_sqrdmulh)
 
+typedef struct ENVScalar3 {
+NeonGenThreeOpEnvFn *gen_hs[2];
+} ENVScalar3;
+
+static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a,
+  const ENVScalar3 *f)
+{
+TCGv_i32 t0, t1, t2;
+
+if (a->esz != MO_16 && a->esz != MO_32) {
+return false;
+}
+if (!fp_access_check(s)) {
+return true;
+}
+
+t0 = tcg_temp_new_i32();
+t1 = tcg_temp_new_i32();
+t2 = tcg_temp_new_i32();
+read_vec_element_i32(s, t0, a->rn, 0, a->esz);
+read_vec_element_i32(s, t1, a->rm, 0, a->esz);
+read_vec_element_i32(s, t2, a->rd, 0, a->esz);
+f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2);
+write_fp_sreg(s, a->rd, t0);
+return true;
+}
+
+static const ENVScalar3 f_scalar_sqrdmlah = {
+{ gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 }
+};
+TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, _scalar_sqrdmlah)
+
+static const ENVScalar3 f_scalar_sqrdmlsh = {
+{ gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 }
+};
+TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, _scalar_sqrdmlsh)
+
 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
 {
   

[PATCH 02/13] target/arm: Fix SQDMULH (by element) with Q=0

2024-06-24 Thread Richard Henderson
The inner loop, bounded by eltspersegment, must not be
larger than the outer loop, bounded by elements.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/vec_helper.c | 24 
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 7b34cc98af..d477479bb1 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -317,10 +317,12 @@ void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void 
*vm,
 intptr_t i, j, opr_sz = simd_oprsz(desc);
 int idx = simd_data(desc);
 int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+intptr_t elements = opr_sz / 2;
+intptr_t eltspersegment = MIN(16 / 2, elements);
 
-for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+for (i = 0; i < elements; i += 16 / 2) {
 int16_t mm = m[i];
-for (j = 0; j < 16 / 2; ++j) {
+for (j = 0; j < eltspersegment; ++j) {
 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq);
 }
 }
@@ -333,10 +335,12 @@ void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void 
*vm,
 intptr_t i, j, opr_sz = simd_oprsz(desc);
 int idx = simd_data(desc);
 int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+intptr_t elements = opr_sz / 2;
+intptr_t eltspersegment = MIN(16 / 2, elements);
 
-for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+for (i = 0; i < elements; i += 16 / 2) {
 int16_t mm = m[i];
-for (j = 0; j < 16 / 2; ++j) {
+for (j = 0; j < eltspersegment; ++j) {
 d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq);
 }
 }
@@ -512,10 +516,12 @@ void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void 
*vm,
 intptr_t i, j, opr_sz = simd_oprsz(desc);
 int idx = simd_data(desc);
 int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+intptr_t elements = opr_sz / 4;
+intptr_t eltspersegment = MIN(16 / 4, elements);
 
-for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+for (i = 0; i < elements; i += 16 / 4) {
 int32_t mm = m[i];
-for (j = 0; j < 16 / 4; ++j) {
+for (j = 0; j < eltspersegment; ++j) {
 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq);
 }
 }
@@ -528,10 +534,12 @@ void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void 
*vm,
 intptr_t i, j, opr_sz = simd_oprsz(desc);
 int idx = simd_data(desc);
 int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+intptr_t elements = opr_sz / 4;
+intptr_t eltspersegment = MIN(16 / 4, elements);
 
-for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+for (i = 0; i < elements; i += 16 / 4) {
 int32_t mm = m[i];
-for (j = 0; j < 16 / 4; ++j) {
+for (j = 0; j < eltspersegment; ++j) {
 d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq);
 }
 }
-- 
2.34.1




[PATCH 10/13] target/arm: Add data argument to do_fp3_vector

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/translate-a64.c | 52 +-
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 2697c4b305..57cdde008e 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5290,7 +5290,7 @@ TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU)
 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ)
 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE)
 
-static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
+static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
   gen_helper_gvec_3_ptr * const fns[3])
 {
 MemOp esz = a->esz;
@@ -5313,7 +5313,7 @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a,
 }
 if (fp_access_check(s)) {
 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
-  esz == MO_16, 0, fns[esz - 1]);
+  esz == MO_16, data, fns[esz - 1]);
 }
 return true;
 }
@@ -5323,168 +5323,168 @@ static gen_helper_gvec_3_ptr * const f_vector_fadd[3] 
= {
 gen_helper_gvec_fadd_s,
 gen_helper_gvec_fadd_d,
 };
-TRANS(FADD_v, do_fp3_vector, a, f_vector_fadd)
+TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd)
 
 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = {
 gen_helper_gvec_fsub_h,
 gen_helper_gvec_fsub_s,
 gen_helper_gvec_fsub_d,
 };
-TRANS(FSUB_v, do_fp3_vector, a, f_vector_fsub)
+TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub)
 
 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = {
 gen_helper_gvec_fdiv_h,
 gen_helper_gvec_fdiv_s,
 gen_helper_gvec_fdiv_d,
 };
-TRANS(FDIV_v, do_fp3_vector, a, f_vector_fdiv)
+TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv)
 
 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = {
 gen_helper_gvec_fmul_h,
 gen_helper_gvec_fmul_s,
 gen_helper_gvec_fmul_d,
 };
-TRANS(FMUL_v, do_fp3_vector, a, f_vector_fmul)
+TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul)
 
 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = {
 gen_helper_gvec_fmax_h,
 gen_helper_gvec_fmax_s,
 gen_helper_gvec_fmax_d,
 };
-TRANS(FMAX_v, do_fp3_vector, a, f_vector_fmax)
+TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax)
 
 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = {
 gen_helper_gvec_fmin_h,
 gen_helper_gvec_fmin_s,
 gen_helper_gvec_fmin_d,
 };
-TRANS(FMIN_v, do_fp3_vector, a, f_vector_fmin)
+TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin)
 
 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = {
 gen_helper_gvec_fmaxnum_h,
 gen_helper_gvec_fmaxnum_s,
 gen_helper_gvec_fmaxnum_d,
 };
-TRANS(FMAXNM_v, do_fp3_vector, a, f_vector_fmaxnm)
+TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm)
 
 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = {
 gen_helper_gvec_fminnum_h,
 gen_helper_gvec_fminnum_s,
 gen_helper_gvec_fminnum_d,
 };
-TRANS(FMINNM_v, do_fp3_vector, a, f_vector_fminnm)
+TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm)
 
 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = {
 gen_helper_gvec_fmulx_h,
 gen_helper_gvec_fmulx_s,
 gen_helper_gvec_fmulx_d,
 };
-TRANS(FMULX_v, do_fp3_vector, a, f_vector_fmulx)
+TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx)
 
 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = {
 gen_helper_gvec_vfma_h,
 gen_helper_gvec_vfma_s,
 gen_helper_gvec_vfma_d,
 };
-TRANS(FMLA_v, do_fp3_vector, a, f_vector_fmla)
+TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla)
 
 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = {
 gen_helper_gvec_vfms_h,
 gen_helper_gvec_vfms_s,
 gen_helper_gvec_vfms_d,
 };
-TRANS(FMLS_v, do_fp3_vector, a, f_vector_fmls)
+TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls)
 
 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = {
 gen_helper_gvec_fceq_h,
 gen_helper_gvec_fceq_s,
 gen_helper_gvec_fceq_d,
 };
-TRANS(FCMEQ_v, do_fp3_vector, a, f_vector_fcmeq)
+TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq)
 
 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = {
 gen_helper_gvec_fcge_h,
 gen_helper_gvec_fcge_s,
 gen_helper_gvec_fcge_d,
 };
-TRANS(FCMGE_v, do_fp3_vector, a, f_vector_fcmge)
+TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge)
 
 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = {
 gen_helper_gvec_fcgt_h,
 gen_helper_gvec_fcgt_s,
 gen_helper_gvec_fcgt_d,
 };
-TRANS(FCMGT_v, do_fp3_vector, a, f_vector_fcmgt)
+TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt)
 
 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = {
 gen_helper_gvec_facge_h,
 gen_helper_gvec_facge_s,
 gen_helper_gvec_facge_d,
 };
-TRANS(FACGE_v, do_fp3_vector, a, f_vector_facge)
+TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge)
 
 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = {
 gen_helper_gvec_facgt_h,
 gen_helper_gvec_facgt_s,

[PATCH 13/13] target/arm: Delete dead code from disas_simd_indexed

2024-06-24 Thread Richard Henderson
The last insns in this block, MLA and MLS, were converted
with f80701cb44d, and this code should have been removed then.

Signed-off-by: Richard Henderson 
---
 target/arm/tcg/translate-a64.c | 93 --
 1 file changed, 93 deletions(-)

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 0a54a9ef8f..11955c0c36 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -11979,7 +11979,6 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 int h = extract32(insn, 11, 1);
 int rn = extract32(insn, 5, 5);
 int rd = extract32(insn, 0, 5);
-bool is_long = false;
 int index;
 
 switch (16 * u + opcode) {
@@ -11993,12 +11992,10 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 unallocated_encoding(s);
 return;
 }
-is_long = true;
 break;
 case 0x03: /* SQDMLAL, SQDMLAL2 */
 case 0x07: /* SQDMLSL, SQDMLSL2 */
 case 0x0b: /* SQDMULL, SQDMULL2 */
-is_long = true;
 break;
 default:
 case 0x00: /* FMLAL */
@@ -12050,96 +12047,6 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 
 if (size == 3) {
 g_assert_not_reached();
-} else if (!is_long) {
-/* 32 bit floating point, or 16 or 32 bit integer.
- * For the 16 bit scalar case we use the usual Neon helpers and
- * rely on the fact that 0 op 0 == 0 with no side effects.
- */
-TCGv_i32 tcg_idx = tcg_temp_new_i32();
-int pass, maxpasses;
-
-if (is_scalar) {
-maxpasses = 1;
-} else {
-maxpasses = is_q ? 4 : 2;
-}
-
-read_vec_element_i32(s, tcg_idx, rm, index, size);
-
-if (size == 1 && !is_scalar) {
-/* The simplest way to handle the 16x16 indexed ops is to duplicate
- * the index into both halves of the 32 bit tcg_idx and then use
- * the usual Neon helpers.
- */
-tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
-}
-
-for (pass = 0; pass < maxpasses; pass++) {
-TCGv_i32 tcg_op = tcg_temp_new_i32();
-TCGv_i32 tcg_res = tcg_temp_new_i32();
-
-read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : 
MO_32);
-
-switch (16 * u + opcode) {
-case 0x10: /* MLA */
-case 0x14: /* MLS */
-{
-static NeonGenTwoOpFn * const fns[2][2] = {
-{ gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
-{ tcg_gen_add_i32, tcg_gen_sub_i32 },
-};
-NeonGenTwoOpFn *genfn;
-bool is_sub = opcode == 0x4;
-
-if (size == 1) {
-gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
-} else {
-tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
-}
-if (opcode == 0x8) {
-break;
-}
-read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
-genfn = fns[size - 1][is_sub];
-genfn(tcg_res, tcg_op, tcg_res);
-break;
-}
-case 0x0c: /* SQDMULH */
-if (size == 1) {
-gen_helper_neon_qdmulh_s16(tcg_res, tcg_env,
-   tcg_op, tcg_idx);
-} else {
-gen_helper_neon_qdmulh_s32(tcg_res, tcg_env,
-   tcg_op, tcg_idx);
-}
-break;
-case 0x0d: /* SQRDMULH */
-if (size == 1) {
-gen_helper_neon_qrdmulh_s16(tcg_res, tcg_env,
-tcg_op, tcg_idx);
-} else {
-gen_helper_neon_qrdmulh_s32(tcg_res, tcg_env,
-tcg_op, tcg_idx);
-}
-break;
-default:
-case 0x01: /* FMLA */
-case 0x05: /* FMLS */
-case 0x09: /* FMUL */
-case 0x19: /* FMULX */
-case 0x1d: /* SQRDMLAH */
-case 0x1f: /* SQRDMLSH */
-g_assert_not_reached();
-}
-
-if (is_scalar) {
-write_fp_sreg(s, rd, tcg_res);
-} else {
-write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
-}
-}
-
-clear_vec_high(s, is_q, rd);
 } else {
 /* long ops: 16x16->32 or 32x32->64 */
 TCGv_i64 tcg_res[2];
-- 
2.34.1




[PATCH 03/13] target/arm: Fix FJCVTZS vs flush-to-zero

2024-06-24 Thread Richard Henderson
Input denormals cause the Javascript inexact bit
(output to Z) to be set.

Cc: qemu-sta...@nongnu.org
Fixes: 6c1f6f2733a ("target/arm: Implement ARMv8.3-JSConv")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2375
Signed-off-by: Richard Henderson 
---
 target/arm/vfp_helper.c   | 18 +-
 tests/tcg/aarch64/test-2375.c | 20 
 tests/tcg/aarch64/Makefile.target |  3 ++-
 3 files changed, 31 insertions(+), 10 deletions(-)
 create mode 100644 tests/tcg/aarch64/test-2375.c

diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index ce26b8a71a..50d7042fa9 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -1091,8 +1091,8 @@ const FloatRoundMode arm_rmode_to_sf_map[] = {
 uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
 {
 float_status *status = vstatus;
-uint32_t inexact, frac;
-uint32_t e_old, e_new;
+uint32_t frac, e_old, e_new;
+bool inexact;
 
 e_old = get_float_exception_flags(status);
 set_float_exception_flags(0, status);
@@ -1100,13 +1100,13 @@ uint64_t HELPER(fjcvtzs)(float64 value, void *vstatus)
 e_new = get_float_exception_flags(status);
 set_float_exception_flags(e_old | e_new, status);
 
-if (value == float64_chs(float64_zero)) {
-/* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
-inexact = 1;
-} else {
-/* Normal inexact or overflow or NaN */
-inexact = e_new & (float_flag_inexact | float_flag_invalid);
-}
+/* Normal inexact, denormal with flush-to-zero, or overflow or NaN */
+inexact = e_new & (float_flag_inexact |
+   float_flag_input_denormal |
+   float_flag_invalid);
+
+/* While not inexact for IEEE FP, -0.0 is inexact for JavaScript. */
+inexact |= value == float64_chs(float64_zero);
 
 /* Pack the result and the env->ZF representation of Z together.  */
 return deposit64(frac, 32, 32, inexact);
diff --git a/tests/tcg/aarch64/test-2375.c b/tests/tcg/aarch64/test-2375.c
new file mode 100644
index 00..f83af8b3ea
--- /dev/null
+++ b/tests/tcg/aarch64/test-2375.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* See https://gitlab.com/qemu-project/qemu/-/issues/2375 */
+
+#include 
+
+int main()
+{
+   int r, z;
+
+   asm("msr fpcr, %2\n\t"
+   "fjcvtzs %w0, %d3\n\t"
+   "cset %1, eq"
+   : "=r"(r), "=r"(z)
+   : "r"(0x0100L), /* FZ = 1 */
+ "w"(0xfcff00L));   /* denormal */
+
+assert(r == 0);
+assert(z == 0);
+return 0;
+}
diff --git a/tests/tcg/aarch64/Makefile.target 
b/tests/tcg/aarch64/Makefile.target
index 70d728ae9a..4ecbca6a41 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -41,8 +41,9 @@ endif
 
 # Pauth Tests
 ifneq ($(CROSS_CC_HAS_ARMV8_3),)
-AARCH64_TESTS += pauth-1 pauth-2 pauth-4 pauth-5
+AARCH64_TESTS += pauth-1 pauth-2 pauth-4 pauth-5 test-2375
 pauth-%: CFLAGS += -march=armv8.3-a
+test-2375: CFLAGS += -march=armv8.3-a
 run-pauth-1: QEMU_OPTS += -cpu max
 run-pauth-2: QEMU_OPTS += -cpu max
 # Choose a cpu with FEAT_Pauth but without FEAT_FPAC for pauth-[45].
-- 
2.34.1




[PATCH 09/13] target/arm: Convert BFMMLA, SMMLA, UMMLA, USMMLA to decodetree

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |  4 
 target/arm/tcg/translate-a64.c | 36 --
 2 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 15344a73de..b2c7e36969 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -952,6 +952,10 @@ UDOT_v  0.10 1110 100 . 10010 1 . . 
@qrrr_s
 USDOT_v 0.00 1110 100 . 10011 1 . . @qrrr_s
 BFDOT_v 0.10 1110 010 . 1 1 . . @qrrr_s
 BFMLAL_v0.10 1110 110 . 1 1 . . @qrrr_h
+BFMMLA  0110 1110 010 . 11101 1 . . @rrr_q1e0
+SMMLA   0100 1110 100 . 10100 1 . . @rrr_q1e0
+UMMLA   0110 1110 100 . 10100 1 . . @rrr_q1e0
+USMMLA  0100 1110 100 . 10101 1 . . @rrr_q1e0
 
 ### Advanced SIMD scalar x indexed element
 
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 95be862dde..2697c4b305 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5605,6 +5605,10 @@ TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, 
gen_helper_gvec_sdot_b)
 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b)
 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot)
+TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfmmla)
+TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b)
+TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b)
+TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b)
 
 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
 {
@@ -10949,15 +10953,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 int rot;
 
 switch (u * 16 + opcode) {
-case 0x04: /* SMMLA */
-case 0x14: /* UMMLA */
-case 0x05: /* USMMLA */
-if (!is_q || size != MO_32) {
-unallocated_encoding(s);
-return;
-}
-feature = dc_isar_feature(aa64_i8mm, s);
-break;
 case 0x18: /* FCMLA, #0 */
 case 0x19: /* FCMLA, #90 */
 case 0x1a: /* FCMLA, #180 */
@@ -10972,19 +10967,16 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 }
 feature = dc_isar_feature(aa64_fcma, s);
 break;
-case 0x1d: /* BFMMLA */
-if (size != MO_16 || !is_q) {
-unallocated_encoding(s);
-return;
-}
-feature = dc_isar_feature(aa64_bf16, s);
-break;
 default:
 case 0x02: /* SDOT (vector) */
 case 0x03: /* USDOT */
+case 0x04: /* SMMLA */
+case 0x05: /* USMMLA */
 case 0x10: /* SQRDMLAH (vector) */
 case 0x11: /* SQRDMLSH (vector) */
 case 0x12: /* UDOT (vector) */
+case 0x14: /* UMMLA */
+case 0x1d: /* BFMMLA */
 case 0x1f: /* BFDOT / BFMLAL */
 unallocated_encoding(s);
 return;
@@ -10998,15 +10990,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 }
 
 switch (opcode) {
-case 0x04: /* SMMLA, UMMLA */
-gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0,
- u ? gen_helper_gvec_ummla_b
- : gen_helper_gvec_smmla_b);
-return;
-case 0x05: /* USMMLA */
-gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b);
-return;
-
 case 0x8: /* FCMLA, #0 */
 case 0x9: /* FCMLA, #90 */
 case 0xa: /* FCMLA, #180 */
@@ -11051,9 +11034,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 }
 return;
 
-case 0xd: /* BFMMLA */
-gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
-return;
 default:
 g_assert_not_reached();
 }
-- 
2.34.1




[PATCH 11/13] target/arm: Convert FCADD to decodetree

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |  3 +++
 target/arm/tcg/translate-a64.c | 33 ++---
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index b2c7e36969..f330919851 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -957,6 +957,9 @@ SMMLA   0100 1110 100 . 10100 1 . . 
@rrr_q1e0
 UMMLA   0110 1110 100 . 10100 1 . . @rrr_q1e0
 USMMLA  0100 1110 100 . 10101 1 . . @rrr_q1e0
 
+FCADD_900.10 1110 ..0 . 11100 1 . . @qrrr_e
+FCADD_270   0.10 1110 ..0 . 0 1 . . @qrrr_e
+
 ### Advanced SIMD scalar x indexed element
 
 FMUL_si 0101  00 ..  1001 . 0 . .   @rrx_h
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 57cdde008e..a1b338263f 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5623,6 +5623,14 @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e 
*a)
 return true;
 }
 
+static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = {
+gen_helper_gvec_fcaddh,
+gen_helper_gvec_fcadds,
+gen_helper_gvec_fcaddd,
+};
+TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd)
+TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd)
+
 /*
  * Advanced SIMD scalar/vector x indexed element
  */
@@ -10957,8 +10965,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 case 0x19: /* FCMLA, #90 */
 case 0x1a: /* FCMLA, #180 */
 case 0x1b: /* FCMLA, #270 */
-case 0x1c: /* FCADD, #90 */
-case 0x1e: /* FCADD, #270 */
 if (size == 0
 || (size == 1 && !dc_isar_feature(aa64_fp16, s))
 || (size == 3 && !is_q)) {
@@ -10976,7 +10982,9 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 case 0x11: /* SQRDMLSH (vector) */
 case 0x12: /* UDOT (vector) */
 case 0x14: /* UMMLA */
+case 0x1c: /* FCADD, #90 */
 case 0x1d: /* BFMMLA */
+case 0x1e: /* FCADD, #270 */
 case 0x1f: /* BFDOT / BFMLAL */
 unallocated_encoding(s);
 return;
@@ -11013,27 +11021,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 }
 return;
 
-case 0xc: /* FCADD, #90 */
-case 0xe: /* FCADD, #270 */
-rot = extract32(opcode, 1, 1);
-switch (size) {
-case 1:
-gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
-  gen_helper_gvec_fcaddh);
-break;
-case 2:
-gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
-  gen_helper_gvec_fcadds);
-break;
-case 3:
-gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
-  gen_helper_gvec_fcaddd);
-break;
-default:
-g_assert_not_reached();
-}
-return;
-
 default:
 g_assert_not_reached();
 }
-- 
2.34.1




[PATCH 08/13] target/arm: Convert BFMLALB, BFMLALT to decodetree

2024-06-24 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/a64.decode  |  2 +
 target/arm/tcg/translate-a64.c | 77 +-
 2 files changed, 31 insertions(+), 48 deletions(-)

diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 6819fd2587..15344a73de 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -951,6 +951,7 @@ SDOT_v  0.00 1110 100 . 10010 1 . . 
@qrrr_s
 UDOT_v  0.10 1110 100 . 10010 1 . . @qrrr_s
 USDOT_v 0.00 1110 100 . 10011 1 . . @qrrr_s
 BFDOT_v 0.10 1110 010 . 1 1 . . @qrrr_s
+BFMLAL_v0.10 1110 110 . 1 1 . . @qrrr_h
 
 ### Advanced SIMD scalar x indexed element
 
@@ -1031,6 +1032,7 @@ UDOT_vi 0.10  10 ..  1110 . 0 . . 
  @qrrx_s
 SUDOT_vi0.00  00 ..   . 0 . .   @qrrx_s
 USDOT_vi0.00  10 ..   . 0 . .   @qrrx_s
 BFDOT_vi0.00  01 ..   . 0 . .   @qrrx_s
+BFMLAL_vi   0.00  11 ..   . 0 . .   @qrrx_h
 
 # Floating-point conditional select
 
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 0f44cd5aee..95be862dde 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5606,6 +5606,19 @@ TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, 
gen_helper_gvec_udot_b)
 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b)
 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector, a, gen_helper_gvec_bfdot)
 
+static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
+{
+if (!dc_isar_feature(aa64_bf16, s)) {
+return false;
+}
+if (fp_access_check(s)) {
+/* Q bit selects BFMLALB vs BFMLALT. */
+gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q,
+  gen_helper_gvec_bfmlal);
+}
+return true;
+}
+
 /*
  * Advanced SIMD scalar/vector x indexed element
  */
@@ -5946,6 +5959,20 @@ TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a,
 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx, a,
gen_helper_gvec_bfdot_idx)
 
+static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
+{
+if (!dc_isar_feature(aa64_bf16, s)) {
+return false;
+}
+if (fp_access_check(s)) {
+/* Q bit selects BFMLALB vs BFMLALT. */
+gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0,
+  (a->idx << 1) | a->q,
+  gen_helper_gvec_bfmlal_idx);
+}
+return true;
+}
+
 /*
  * Advanced SIMD scalar pairwise
  */
@@ -10952,23 +10979,13 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 }
 feature = dc_isar_feature(aa64_bf16, s);
 break;
-case 0x1f:
-switch (size) {
-case 3: /* BFMLAL{B,T} */
-feature = dc_isar_feature(aa64_bf16, s);
-break;
-default:
-case 1: /* BFDOT */
-unallocated_encoding(s);
-return;
-}
-break;
 default:
 case 0x02: /* SDOT (vector) */
 case 0x03: /* USDOT */
 case 0x10: /* SQRDMLAH (vector) */
 case 0x11: /* SQRDMLSH (vector) */
 case 0x12: /* UDOT (vector) */
+case 0x1f: /* BFDOT / BFMLAL */
 unallocated_encoding(s);
 return;
 }
@@ -11037,17 +11054,6 @@ static void 
disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
 case 0xd: /* BFMMLA */
 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla);
 return;
-case 0xf:
-switch (size) {
-case 3: /* BFMLAL{B,T} */
-gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q,
-  gen_helper_gvec_bfmlal);
-break;
-default:
-g_assert_not_reached();
-}
-return;
-
 default:
 g_assert_not_reached();
 }
@@ -12051,24 +12057,6 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 case 0x0b: /* SQDMULL, SQDMULL2 */
 is_long = true;
 break;
-case 0x0f:
-switch (size) {
-case 3: /* BFMLAL{B,T} */
-if (is_scalar || !dc_isar_feature(aa64_bf16, s)) {
-unallocated_encoding(s);
-return;
-}
-/* can't set is_fp without other incorrect size checks */
-size = MO_16;
-break;
-default:
-case 0: /* SUDOT */
-case 1: /* BFDOT */
-case 2: /* USDOT */
-unallocated_encoding(s);
-return;
-}
-break;
 case 0x11: /* FCMLA #0 */
 case 0x13: /* FCMLA #90 */
 case 0x15: /* FCMLA #180 */
@@ -12089,6 +12077,7 @@ static void disas_simd_indexed(DisasContext *s, 
uint32_t insn)
 case 0x0c: /* SQDMULH */
 case 0x0d: /* SQRDMULH */
 case 0x0e: /* SDOT */
+case 0x0f: /* 

[PATCH 01/13] target/arm: Fix VCMLA Dd, Dn, Dm[idx]

2024-06-24 Thread Richard Henderson
The inner loop, bounded by eltspersegment, must not be
larger than the outer loop, bounded by elements.

Cc: qemu-sta...@nongnu.org
Fixes: 18fc2405781 ("target/arm: Implement SVE fp complex multiply add 
(indexed)")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2376
Signed-off-by: Richard Henderson 
---
 target/arm/tcg/vec_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index b05922b425..7b34cc98af 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -907,7 +907,7 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, 
void *va,
 intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
 uint32_t neg_real = flip ^ neg_imag;
 intptr_t elements = opr_sz / sizeof(float16);
-intptr_t eltspersegment = 16 / sizeof(float16);
+intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
 intptr_t i, j;
 
 /* Shift boolean to the sign bit so we can xor to negate.  */
@@ -969,7 +969,7 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, 
void *va,
 intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
 uint32_t neg_real = flip ^ neg_imag;
 intptr_t elements = opr_sz / sizeof(float32);
-intptr_t eltspersegment = 16 / sizeof(float32);
+intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
 intptr_t i, j;
 
 /* Shift boolean to the sign bit so we can xor to negate.  */
-- 
2.34.1




[PATCH 00/13] target/arm: AdvSIMD conversion, part 2

2024-06-24 Thread Richard Henderson
Convert another hand-full of instructions, plus fixes
for two issues that are related.


r~


Richard Henderson (13):
  target/arm: Fix VCMLA Dd, Dn, Dm[idx]
  target/arm: Fix SQDMULH (by element) with Q=0
  target/arm: Fix FJCVTZS vs flush-to-zero
  target/arm: Convert SQRDMLAH, SQRDMLSH to decodetree
  target/arm: Convert SDOT, UDOT to decodetree
  target/arm: Convert SUDOT, USDOT to decodetree
  target/arm: Convert BFDOT to decodetree
  target/arm: Convert BFMLALB, BFMLALT to decodetree
  target/arm: Convert BFMMLA, SMMLA, UMMLA, USMMLA to decodetree
  target/arm: Add data argument to do_fp3_vector
  target/arm: Convert FCADD to decodetree
  target/arm: Convert FCMLA to decodetree
  target/arm: Delete dead code from disas_simd_indexed

 target/arm/helper.h   |  10 +
 target/arm/tcg/a64.decode |  42 ++
 target/arm/tcg/translate-a64.c| 811 +-
 target/arm/tcg/vec_helper.c   | 100 +++-
 target/arm/vfp_helper.c   |  18 +-
 tests/tcg/aarch64/test-2375.c |  20 +
 tests/tcg/aarch64/Makefile.target |   3 +-
 7 files changed, 422 insertions(+), 582 deletions(-)
 create mode 100644 tests/tcg/aarch64/test-2375.c

-- 
2.34.1




Re: [PATCH v4 5/5] blockdev: mirror: check for target's cluster size when using bitmap

2024-06-24 Thread Vladimir Sementsov-Ogievskiy

On 21.05.24 15:20, Fiona Ebner wrote:

When using mirror with a bitmap and the target does not do COW and is
is a diff image, i.e. one that should only contain the delta and was
not synced to previously, a too large cluster size for the target can
be problematic. In particular, when the mirror sends data to the
target aligned to the jobs granularity, but not aligned to the larger
target image's cluster size, the target's cluster would be allocated
but only be filled partially. When rebasing such a diff image later,
the corresponding cluster of the base image would get "masked" and the
part of the cluster not in the diff image is not accessible anymore.

Unfortunately, it is not always possible to check for the target
image's cluster size, e.g. when it's NBD. Because the limitation is
already documented in the QAPI description for the @bitmap parameter
and it's only required for special diff image use-case, simply skip
the check then.

Signed-off-by: Fiona Ebner 
---
  blockdev.c | 57 ++
  tests/qemu-iotests/tests/mirror-bitmap |  6 +++
  tests/qemu-iotests/tests/mirror-bitmap.out |  7 +++
  3 files changed, 70 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index 4f72a72dc7..468974108e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2769,6 +2769,59 @@ void qmp_blockdev_backup(BlockdevBackup *backup, Error 
**errp)
  blockdev_do_action(, errp);
  }
  
+static int blockdev_mirror_check_bitmap_granularity(BlockDriverState *target,

+BdrvDirtyBitmap *bitmap,
+Error **errp)
+{
+int ret;
+BlockDriverInfo bdi;
+uint32_t bitmap_granularity;
+
+GLOBAL_STATE_CODE();
+GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+if (bdrv_backing_chain_next(target)) {
+/*
+ * No need to worry about creating clusters with partial data when the
+ * target does COW.
+ */
+return 0;
+}
+
+/*
+ * If there is no backing file on the target, we cannot rely on COW if our
+ * backup cluster size is smaller than the target cluster size. Even for
+ * targets with a backing file, try to avoid COW if possible.


"Even for targes with" - I don't follow. We do "return 0" already above for 
such targets?


+ */
+ret = bdrv_get_info(target, );
+if (ret == -ENOTSUP) {
+/*
+ * Ignore if unable to get the info, e.g. when target is NBD. It's only
+ * relevant for syncing to a diff image and the documentation already
+ * states that the target's cluster size needs to small enough then.
+ */
+return 0;
+} else if (ret < 0) {
+error_setg_errno(errp, -ret,
+"Couldn't determine the cluster size of the target image, "
+"which has no backing file");
+return ret;
+}
+
+bitmap_granularity = bdrv_dirty_bitmap_granularity(bitmap);
+if (bitmap_granularity < bdi.cluster_size ||
+bitmap_granularity % bdi.cluster_size != 0) {
+error_setg(errp, "Bitmap granularity %u is not a multiple of the "
+   "target image's cluster size %u and the target image has "
+   "no backing file",
+   bitmap_granularity, bdi.cluster_size);
+return -EINVAL;
+}
+
+return 0;
+}
+
+
  /* Parameter check and block job starting for drive mirroring.
   * Caller should hold @device and @target's aio context (must be the same).
   **/
@@ -2863,6 +2916,10 @@ static void blockdev_mirror_common(const char *job_id, 
BlockDriverState *bs,
  return;
  }
  
+if (blockdev_mirror_check_bitmap_granularity(target, bitmap, errp)) {

+return;
+}
+
  if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) {
  return;
  }
diff --git a/tests/qemu-iotests/tests/mirror-bitmap 
b/tests/qemu-iotests/tests/mirror-bitmap
index 37bbe0f241..e8cd482a19 100755
--- a/tests/qemu-iotests/tests/mirror-bitmap
+++ b/tests/qemu-iotests/tests/mirror-bitmap
@@ -584,6 +584,12 @@ def test_mirror_api():
  bitmap=bitmap)
  log('')
  
+log("-- Test bitmap with too small granularity to non-COW target --\n")

+vm.qmp_log("block-dirty-bitmap-add", node=drive0.node,
+   name="bitmap-small", granularity=GRANULARITY)
+blockdev_mirror(drive0.vm, drive0.node, "mirror_target", "full",
+job_id='api_job', bitmap="bitmap-small")
+log('')
  
  def main():

  for bsync_mode in ("never", "on-success", "always"):
diff --git a/tests/qemu-iotests/tests/mirror-bitmap.out 
b/tests/qemu-iotests/tests/mirror-bitmap.out
index 5c8acc1d69..af605f3803 100644
--- a/tests/qemu-iotests/tests/mirror-bitmap.out
+++ b/tests/qemu-iotests/tests/mirror-bitmap.out
@@ -3189,3 +3189,10 @@ qemu_img compare "TEST_DIR/PID-img" 

Re: [PATCH v4 4/5] iotests: add test for bitmap mirror

2024-06-24 Thread Vladimir Sementsov-Ogievskiy

On 21.05.24 15:20, Fiona Ebner wrote:

From: Fabian Grünbichler

heavily based on/practically forked off iotest 257 for bitmap backups,
but:


really, heavily. Making a duplication is always bad idea. Could we instead just 
add test-cases to 257?



- no writes to filter node 'mirror-top' between completion and
finalization, as those seem to deadlock?


Could you give a bit more concreteness? If guest writes may lead to dead-lock, 
that's a bug, is it?



--
Best regards,
Vladimir




Re: [PATCH v4 3/5] mirror: allow specifying working bitmap

2024-06-24 Thread Vladimir Sementsov-Ogievskiy

On 21.05.24 15:20, Fiona Ebner wrote:

From: John Snow 

for the mirror job. The bitmap's granularity is used as the job's
granularity.

The new @bitmap parameter is marked unstable in the QAPI and can
currently only be used for @sync=full mode.

Clusters initially dirty in the bitmap as well as new writes are
copied to the target.

Using block-dirty-bitmap-clear and block-dirty-bitmap-merge API,
callers can simulate the three kinds of @BitmapSyncMode (which is used
by backup):
1. always: default, just pass bitmap as working bitmap.
2. never: copy bitmap and pass copy to the mirror job.
3. on-success: copy bitmap and pass copy to the mirror job and if
successful, merge bitmap into original afterwards.

When the target image is a non-COW "diff image", i.e. one that was not
used as the target of a previous mirror and the target image's cluster
size is larger than the bitmap's granularity, or when
@copy-mode=write-blocking is used, there is a pitfall, because the
cluster in the target image will be allocated, but not contain all the
data corresponding to the same region in the source image.

An idea to avoid the limitation would be to mark clusters which are
affected by unaligned writes and are not allocated in the target image
dirty, so they would be copied fully later. However, for migration,
the invariant that an actively synced mirror stays actively synced
(unless an error happens) is useful, because without that invariant,
migration might inactivate block devices when mirror still got work
to do and run into an assertion failure [0].

Another approach would be to read the missing data from the source
upon unaligned writes to be able to write the full target cluster
instead.

But certain targets like NBD do not allow querying the cluster size.
To avoid limiting/breaking the use case of syncing to an existing
target, which is arguably more common than the diff image use case,
document the limitation in QAPI.

This patch was originally based on one by Ma Haocong, but it has since
been modified pretty heavily, first by John and then again by Fiona.

[0]: 
https://lore.kernel.org/qemu-devel/1db7f571-cb7f-c293-04cc-cd856e060...@proxmox.com/

Suggested-by: Ma Haocong 
Signed-off-by: Ma Haocong 
Signed-off-by: John Snow 
[FG: switch to bdrv_dirty_bitmap_merge_internal]
Signed-off-by: Fabian Grünbichler 
Signed-off-by: Thomas Lamprecht 
[FE: rebase for 9.1
  get rid of bitmap mode parameter
  use caller-provided bitmap as working bitmap
  turn bitmap parameter experimental]
Signed-off-by: Fiona Ebner 
Acked-by: Markus Armbruster 
---
  block/mirror.c | 80 +-
  blockdev.c | 44 +++---
  include/block/block_int-global-state.h |  5 +-
  qapi/block-core.json   | 35 ++-
  tests/unit/test-block-iothread.c   |  2 +-
  5 files changed, 141 insertions(+), 25 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index ca23d6ef65..d3d0698116 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -73,6 +73,11 @@ typedef struct MirrorBlockJob {
  size_t buf_size;
  int64_t bdev_length;
  unsigned long *cow_bitmap;
+/*
+ * Whether the bitmap is created locally or provided by the caller (for
+ * incremental sync).
+ */
+bool dirty_bitmap_is_local;
  BdrvDirtyBitmap *dirty_bitmap;
  BdrvDirtyBitmapIter *dbi;
  uint8_t *buf;
@@ -691,7 +696,11 @@ static int mirror_exit_common(Job *job)
  bdrv_unfreeze_backing_chain(mirror_top_bs, target_bs);
  }
  
-bdrv_release_dirty_bitmap(s->dirty_bitmap);

+if (s->dirty_bitmap_is_local) {
+bdrv_release_dirty_bitmap(s->dirty_bitmap);
+} else {
+bdrv_enable_dirty_bitmap(s->dirty_bitmap);
+}
  
  /* Make sure that the source BDS doesn't go away during bdrv_replace_node,

   * before we can call bdrv_drained_end */
@@ -820,6 +829,16 @@ static void mirror_abort(Job *job)
  assert(ret == 0);
  }
  
+/* Always called after commit/abort. */

+static void mirror_clean(Job *job)
+{
+MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
+
+if (!s->dirty_bitmap_is_local && s->dirty_bitmap) {
+bdrv_dirty_bitmap_set_busy(s->dirty_bitmap, false);
+}


why not do that in existing mirror_exit_common, where we already do 
release/enable?


+}
+



--
Best regards,
Vladimir




Re: [PULL 00/19] SD/MMC patches for 2024-06-24

2024-06-24 Thread Richard Henderson

On 6/24/24 06:14, Philippe Mathieu-Daudé wrote:

The following changes since commit c9ba79baca7c673098361e3a687f72d458e0d18a:

   Merge tag 'pull-target-arm-20240622' 
ofhttps://git.linaro.org/people/pmaydell/qemu-arm  into staging (2024-06-22 
09:56:49 -0700)

are available in the Git repository at:

   https://github.com/philmd/qemu.git  tags/sdmmc-20240624

for you to fetch changes up to 76ae9a231487a2b127c90bcb657fd42a1f6c06f8:

   hw/sd/sdcard: Add comments around registers and commands (2024-06-24 
15:08:40 +0200)


SD/MMC patches queue

One fix and various cleanups for the SD card model.


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~




[v2 1/1] hw/i386/acpi-build: add OSHP method support for SHPC driver load

2024-06-24 Thread Shiyuan Gao via
SHPC driver will be loaded fail in i440fx machine, the dmesg shows
that OS cannot get control of SHPC hotplug and hotplug device to
the PCI bridge will fail when we use SHPC Native type:

  [3.336059] shpchp :00:03.0: Requesting control of SHPC hotplug via OSHP 
(\_SB_.PCI0.S28_)
  [3.337408] shpchp :00:03.0: Requesting control of SHPC hotplug via OSHP 
(\_SB_.PCI0)
  [3.338710] shpchp :00:03.0: Cannot get control of SHPC hotplug

Add OSHP method support for transfer control to the operating system,
after this SHPC driver will be loaded success and the hotplug device to
the PCI bridge will success when we use SHPC Native type.

  [1.703975] shpchp :00:03.0: Requesting control of SHPC hotplug via OSHP 
(\_SB_.PCI0.S18_)
  [1.704934] shpchp :00:03.0: Requesting control of SHPC hotplug via OSHP 
(\_SB_.PCI0)
  [1.705855] shpchp :00:03.0: Gained control of SHPC hotplug (\_SB_.PCI0)
  [1.707054] shpchp :00:03.0: HPC vendor_id 1b36 device_id 1 ss_vid 0 
ss_did 0

Signed-off-by: Shiyuan Gao 
---
v1 -> v2:
* add quote PCI firmware spec 3.0
* explain why an empty method is enough
---

 hw/i386/acpi-build.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index f4e366f64f..00f8abedf6 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1412,6 +1412,23 @@ static void build_acpi0017(Aml *table)
 aml_append(table, scope);
 }
 
+/*
+ * PCI Firmware Specification 3.0
+ * 4.8. The OSHP Control Method
+ */
+static Aml *build_oshp_method(void)
+{
+Aml *method;
+
+/*
+ * We don't use ACPI to control the SHPC, so just return
+ * success is enough.
+ */
+method = aml_method("OSHP", 0, AML_NOTSERIALIZED);
+aml_append(method, aml_return(aml_int(0x0)));
+return method;
+}
+
 static void
 build_dsdt(GArray *table_data, BIOSLinker *linker,
AcpiPmInfo *pm, AcpiMiscInfo *misc,
@@ -1452,6 +1469,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
 aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
 aml_append(dev, aml_pci_edsm());
+aml_append(dev, build_oshp_method());
 aml_append(sb_scope, dev);
 aml_append(dsdt, sb_scope);
 
@@ -1586,6 +1604,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 aml_append(dev, build_q35_osc_method(true));
 } else {
 aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
+aml_append(dev, build_oshp_method());
 }
 
 if (numa_node != NUMA_NODE_UNASSIGNED) {
-- 
2.36.1




Re: [PATCH 1/1] include/qemu: Provide a C++ compatible version of typeof_strip_qual

2024-06-24 Thread Roman Kiryanov
Hi Philippe, thank you for looking.

On Mon, Jun 24, 2024 at 7:27 PM Philippe Mathieu-Daudé
 wrote:
> In particular this patch seems contained well enough
> to be carried in forks were C++ _is_ used.

Will you agree to take #ifdef __cplusplus  and #error to the QEMU side
in atomic.h and
we will keep atomic.hpp on our side? The error message looks better
when atomic.hpp
is somewhere near.

Regards,
Roman.



Re: [PATCH 1/1] include/qemu: Provide a C++ compatible version of typeof_strip_qual

2024-06-24 Thread Philippe Mathieu-Daudé

Hi Felix,

On 24/6/24 22:56, Felix Wu wrote:

From: Roman Kiryanov 

to use the QEMU headers with a C++ compiler.

Signed-off-by: Felix Wu 
Signed-off-by: Roman Kiryanov 
---
  include/qemu/atomic.h   |  8 
  include/qemu/atomic.hpp | 38 ++
  2 files changed, 46 insertions(+)
  create mode 100644 include/qemu/atomic.hpp

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 99110abefb..aeaecc440a 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -20,6 +20,13 @@
  /* Compiler barrier */
  #define barrier()   ({ asm volatile("" ::: "memory"); (void)0; })
  
+#ifdef __cplusplus

+
+#ifndef typeof_strip_qual
+#error Use the typeof_strip_qual(expr) definition from atomic.hpp on C++ 
builds.
+#endif
+
+#else  /* __cpluplus */
  /* The variable that receives the old value of an atomically-accessed
   * variable must be non-qualified, because atomic builtins return values
   * through a pointer-type argument as in __atomic_load(, , MODEL).
@@ -61,6 +68,7 @@
  __builtin_types_compatible_p(typeof(expr), const volatile unsigned 
short), \
  (unsigned short)1,
 \
(expr)+0))
+#endif  /* __cpluplus */
  
  #ifndef __ATOMIC_RELAXED

  #error "Expecting C11 atomic ops"
diff --git a/include/qemu/atomic.hpp b/include/qemu/atomic.hpp
new file mode 100644
index 00..5844e3d427
--- /dev/null
+++ b/include/qemu/atomic.hpp
@@ -0,0 +1,38 @@
+/*
+ * The C++ definition for typeof_strip_qual used in atomic.h.
+ *
+ * Copyright (C) 2024 Google, Inc.
+ *
+ * Author: Roman Kiryanov 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef QEMU_ATOMIC_HPP
+#define QEMU_ATOMIC_HPP
+
+#include 
+
+/* Match the integer promotion behavior of typeof_strip_qual, see atomic.h */
+template  struct typeof_strip_qual_cpp { using result = 
decltype(+T(0)); };
+
+template <> struct typeof_strip_qual_cpp { using result = bool; };
+template <> struct typeof_strip_qual_cpp { using result = signed 
char; };
+template <> struct typeof_strip_qual_cpp { using result = 
unsigned char; };
+template <> struct typeof_strip_qual_cpp { using result = signed 
short; };
+template <> struct typeof_strip_qual_cpp { using result = 
unsigned short; };
+
+#define typeof_strip_qual(expr) \
+typeof_strip_qual_cpp< \
+std::remove_cv< \
+std::remove_reference< \
+decltype(expr) \
+>::type \
+>::type \
+>::result
+
+#endif /* QEMU_ATOMIC_HPP */


As mentioned previously by Thomas, Daniel and Peter, mainstream QEMU
doesn't use C++ and isn't being built-tested for it. I'm not against
trying to keep the code C++ compatible, but I don't see the point of
adding C++ files in the code base. In particular this patch seems
contained well enough to be carried in forks were C++ _is_ used.

Regards,

Phil.



Re: [PATCH 1/2] qom: Rename Object::class into Object::klass

2024-06-24 Thread Philippe Mathieu-Daudé

Hi Felix,

On 24/6/24 22:43, Felix Wu wrote:

From: Roman Kiryanov 

'class' is a C++ keyword and it prevents from
using the QEMU headers with a C++ compiler.

Google-Bug-Id: 331190993


I asked Roman twice about this tag meaning:
https://lore.kernel.org/qemu-devel/e865d8e3-e768-4b1f-86d3-aeabe8f1d...@linaro.org/
https://lore.kernel.org/qemu-devel/09b7e7e1-30a6-49d0-a5f8-9cfc62884...@linaro.org/
Since you are taking his work, do you mind clarifying?

Please include a cover letter for your series:
https://www.qemu.org/docs/master/devel/submitting-a-patch.html#include-a-meaningful-cover-letter

Also for headers refactors, enabling scripts/git.orderfile helps
reviewers.

Since you are posting different C++ enablement cleanups,
I suggest you add a section in our docs/devel/style.rst
requesting to keep headers C++ compatible, by not using
C++ reserved keywords, etc...
In particular because the mainstream project is not build-testing
for C++, thus we will likely merge patches breaking C++ and
make your life harder. That said, a C++ header smoke-build job
in our CI could help.


Change-Id: I9ab7d2d77edef654a9c7b7cb9cd01795a6ed65a2
Signed-off-by: Felix Wu 
Signed-off-by: Roman Kiryanov 
---
  hw/core/qdev-properties-system.c |  2 +-
  include/exec/memory.h|  2 +-
  include/qom/object.h |  2 +-
  qom/object.c | 90 
  4 files changed, 48 insertions(+), 48 deletions(-)




diff --git a/include/qom/object.h b/include/qom/object.h
index 13d3a655dd..7afdb261a8 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -153,7 +153,7 @@ struct ObjectClass
  struct Object
  {
  /* private: */
-ObjectClass *class;
+ObjectClass *klass;
  ObjectFree *free;
  GHashTable *properties;
  uint32_t ref;


I suppose the OBJECT_CLASS / OBJECT_CLASS_CHECK / OBJECT_GET_CLASS
macros aren't compiled so "class" isn't a problem there.

Since it isn't worst than our INTERFACE_CLASS() use:

Reviewed-by: Philippe Mathieu-Daudé 

Regards,

Phil.



Re: [PATCH v2] scsi-disk: Fix crash for VM configured with USB CDROM after live migration

2024-06-24 Thread Yong Huang
On Mon, Jun 24, 2024 at 6:06 PM Thomas Huth  wrote:

> On 10/06/2024 19.02, Paolo Bonzini wrote:
> > From: Hyman Huang 
> >
> > For VMs configured with the USB CDROM device:
> >
> > -drive
> file=/path/to/local/file,id=drive-usb-disk0,media=cdrom,readonly=on...
> > -device usb-storage,drive=drive-usb-disk0,id=usb-disk0...
> >
> > QEMU process may crash after live migration, to reproduce the issue,
> > configure VM (Guest OS ubuntu 20.04 or 21.10) with the following XML:
> >
> > 
> >
> >
> >
> >
> >
> > 
> > 
> >
> > Do the live migration repeatedly, crash may happen after live migratoin,
> > trace log at the source before live migration is as follows:
> >
> > 324808@1711972823.521945:usb_uhci_frame_start nr 319
> > 324808@1711972823.521978:usb_uhci_qh_load qh 0x35cb5400
> > 324808@1711972823.521989:usb_uhci_qh_load qh 0x35cb5480
> > 324808@1711972823.521997:usb_uhci_td_load qh 0x35cb5480, td 0x35cbe000,
> ctrl 0x0, token 0xffe07f69
> > 324808@1711972823.522010:usb_uhci_td_nextqh qh 0x35cb5480, td 0x35cbe000
> > 324808@1711972823.522022:usb_uhci_qh_load qh 0x35cb5680
> > 324808@1711972823.522030:usb_uhci_td_load qh 0x35cb5680, td 0x75ac5180,
> ctrl 0x1980, token 0x3c903e1
> > 324808@1711972823.522045:usb_uhci_packet_add token 0x103e1, td
> 0x75ac5180
> > 324808@1711972823.522056:usb_packet_state_change bus 0, port 2, ep 2,
> packet 0x559f9ba14b00, state undef -> setup
> > 324808@1711972823.522079:usb_msd_cmd_submit lun 0, tag 0x472, flags
> 0x0080, len 10, data-len 8
> > 324808@1711972823.522107:scsi_req_parsed target 0 lun 0 tag 1138
> command 74 dir 1 length 8
> > 324808@1711972823.522124:scsi_req_parsed_lba target 0 lun 0 tag 1138
> command 74 lba 4096
> > 324808@1711972823.522139:scsi_req_alloc target 0 lun 0 tag 1138
> > 324808@1711972823.522169:scsi_req_continue target 0 lun 0 tag 1138
> > 324808@1711972823.522181:scsi_req_data target 0 lun 0 tag 1138 len 8
> > 324808@1711972823.522194:usb_packet_state_change bus 0, port 2, ep 2,
> packet 0x559f9ba14b00, state setup -> complete
> > 324808@1711972823.522209:usb_uhci_packet_complete_success token
> 0x103e1, td 0x75ac5180
> > 324808@1711972823.522219:usb_uhci_packet_del token 0x103e1, td
> 0x75ac5180
> > 324808@1711972823.522232:usb_uhci_td_complete qh 0x35cb5680, td
> 0x75ac5180
> >
> > trace log at the destination after live migration is as follows:
> >
> > 3286206@1711972823.951646:usb_uhci_frame_start nr 320
> > 3286206@1711972823.951663:usb_uhci_qh_load qh 0x35cb5100
> > 3286206@1711972823.951671:usb_uhci_qh_load qh 0x35cb5480
> > 3286206@1711972823.951680:usb_uhci_td_load qh 0x35cb5480, td
> 0x35cbe000, ctrl 0x100, token 0xffe07f69
> > 3286206@1711972823.951693:usb_uhci_td_nextqh qh 0x35cb5480, td
> 0x35cbe000
> > 3286206@1711972823.951702:usb_uhci_qh_load qh 0x35cb5700
> > 3286206@1711972823.951709:usb_uhci_td_load qh 0x35cb5700, td
> 0x75ac5240, ctrl 0x3980, token 0xe08369
> > 3286206@1711972823.951727:usb_uhci_queue_add token 0x8369
> > 3286206@1711972823.951735:usb_uhci_packet_add token 0x8369, td
> 0x75ac5240
> > 3286206@1711972823.951746:usb_packet_state_change bus 0, port 2, ep 1,
> packet 0x56066b2fb5a0, state undef -> setup
> > 3286206@1711972823.951766:usb_msd_data_in 8/8 (scsi 8)
> > 2024-04-01 12:00:24.665+: shutting down, reason=crashed
> >
> > The backtrace reveals the following:
> >
> > Program terminated with signal SIGSEGV, Segmentation fault.
> > 0  __memmove_sse2_unaligned_erms () at
> ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:312
> > 312movq-8(%rsi,%rdx), %rcx
> > [Current thread is 1 (Thread 0x7f0a9025fc00 (LWP 3286206))]
> > (gdb) bt
> > 0  __memmove_sse2_unaligned_erms () at
> ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:312
> > 1  memcpy (__len=8, __src=, __dest=) at
> /usr/include/bits/string_fortified.h:34
> > 2  iov_from_buf_full (iov=, iov_cnt=,
> offset=, buf=0x0, bytes=bytes@entry=8) at ../util/iov.c:33
> > 3  iov_from_buf (bytes=8, buf=, offset=,
> iov_cnt=, iov=)
> > at
> /usr/src/debug/qemu-6-6.2.0-75.7.oe1.smartx.git.40.x86_64/include/qemu/iov.h:49
> > 4  usb_packet_copy (p=p@entry=0x56066b2fb5a0, ptr=,
> bytes=bytes@entry=8) at ../hw/usb/core.c:636
> > 5  usb_msd_copy_data (s=s@entry=0x56066c62c770, p=p@entry=0x56066b2fb5a0)
> at ../hw/usb/dev-storage.c:186
> > 6  usb_msd_handle_data (dev=0x56066c62c770, p=0x56066b2fb5a0) at
> ../hw/usb/dev-storage.c:496
> > 7  usb_handle_packet (dev=0x56066c62c770, p=p@entry=0x56066b2fb5a0) at
> ../hw/usb/core.c:455
> > 8  uhci_handle_td (s=s@entry=0x56066bd5f210, q=0x56066bb7fbd0, q@entry=0x0,
> qh_addr=qh_addr@entry=902518530, td=td@entry=0x7fffe6e788f0,
> td_addr=,
> > int_mask=int_mask@entry=0x7fffe6e788e4) at ../hw/usb/hcd-uhci.c:885
> > 9  uhci_process_frame (s=s@entry=0x56066bd5f210) at
> ../hw/usb/hcd-uhci.c:1061
> > 10 uhci_frame_timer (opaque=opaque@entry=0x56066bd5f210) at
> ../hw/usb/hcd-uhci.c:1159
> > 11 timerlist_run_timers (timer_list=0x56066af26bd0) at
> 

RE: [PATCH v1 1/2] aspeed/soc: fix coverity issue

2024-06-24 Thread Jamin Lin
Hi Cedric,
> -Original Message-
> From: Cédric Le Goater 
> Sent: Monday, June 24, 2024 9:58 PM
> To: Peter Maydell ; Jamin Lin
> 
> Cc: Steven Lee ; Troy Lee
> ; Andrew Jeffery ; Joel
> Stanley ; open list:ASPEED BMCs ;
> open list:All patches CC here ; Troy Lee
> ; Yunlin Tang 
> Subject: Re: [PATCH v1 1/2] aspeed/soc: fix coverity issue
> 
> On 6/24/24 2:18 PM, Peter Maydell wrote:
> > On Wed, 19 Jun 2024 at 10:35, Jamin Lin 
> wrote:
> >>
> >> Fix coverity defect: DIVIDE_BY_ZERO.
> >>
> >> Signed-off-by: Jamin Lin 
> >> ---
> >>   hw/arm/aspeed_ast27x0.c | 6 ++
> >>   1 file changed, 6 insertions(+)
> >>
> >> diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c index
> >> b6876b4862..d14a46df6f 100644
> >> --- a/hw/arm/aspeed_ast27x0.c
> >> +++ b/hw/arm/aspeed_ast27x0.c
> >> @@ -211,6 +211,12 @@ static void aspeed_ram_capacity_write(void
> *opaque, hwaddr addr, uint64_t data,
> >>   ram_size = object_property_get_uint(OBJECT(>sdmc),
> "ram-size",
> >>   _abort);
> >>
> >> +if (!ram_size) {
> >> +qemu_log_mask(LOG_GUEST_ERROR,
> >> +  "%s: ram_size is zero",  __func__);
> >> +return;
> >> +}
> >> +
> >
> > Isn't this a QEMU bug rather than a guest error? The RAM size
> > presumably should never be zero unless the board set the ram-size
> > property on the SDMC incorrectly. So the SDMC device should check (and
> > return an error from its realize
> > method) that the ram-size property is valid,
> 
> That's the case in aspeed_sdmc_set_ram_size() which is called from the
> aspeed machine init routine when the ram size is set.
> 
> Setting the machine ram size to zero on the command line doesn't report an
> error though and the size is the default.
> 
> > and then here we can just assert(ram_size != 0).
> 
> Yes.
> 
> Jamin, could you please send a v2 with the commit logs update I proposed ?
> See the patches on my aspeed-9.1 branch.
I resend v2 patch with your commit log, 
https://www.mail-archive.com/qemu-devel@nongnu.org/msg1050302.html
Do we need to drop this patch, 
https://www.mail-archive.com/qemu-devel@nongnu.org/msg1050301.html? 

Thanks-Jamin
> 
> Thanks,
> 
> C.


[PATCH v2 2/2] aspeed/sdmc: Remove extra R_MAIN_STATUS case

2024-06-24 Thread Jamin Lin via
Coverity reports that the newly added 'case R_MAIN_STATUS' is DEADCODE
because it can not be reached. This is because R_MAIN_STATUS is handled
before in the "Unprotected registers" switch statement. Remove it.

Fixes: Coverity CID 1547112
Signed-off-by: Jamin Lin 
Reviewed-by: Cédric Le Goater 
[ clg: Rewrote commit log ]
Signed-off-by: Cédric Le Goater 
---
 hw/misc/aspeed_sdmc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index 93e2e29ead..94eed9264d 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -589,7 +589,6 @@ static void aspeed_2700_sdmc_write(AspeedSDMCState *s, 
uint32_t reg,
 case R_INT_STATUS:
 case R_INT_CLEAR:
 case R_INT_MASK:
-case R_MAIN_STATUS:
 case R_ERR_STATUS:
 case R_ECC_FAIL_STATUS:
 case R_ECC_FAIL_ADDR:
-- 
2.25.1




[PATCH v2 0/2] Fix coverity issues for AST2700

2024-06-24 Thread Jamin Lin via
change from v1:
aspeed/soc: coverity defect: DIVIDE_BY_ZERO
aspeed/sdmc: coverity defect: Control flow issues (DEADCODE)

change from v2:
add more commit log from reviewer, Cédric.

Jamin Lin (2):
  aspeed/soc: Fix possible divide by zero
  aspeed/sdmc: Remove extra R_MAIN_STATUS case

 hw/arm/aspeed_ast27x0.c | 6 ++
 hw/misc/aspeed_sdmc.c   | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)

-- 
2.25.1




[PATCH v2 1/2] aspeed/soc: Fix possible divide by zero

2024-06-24 Thread Jamin Lin via
Coverity reports a possible DIVIDE_BY_ZERO issue regarding the
"ram_size" object property. This can not happen because RAM has
predefined valid sizes per SoC. Nevertheless, add a test to
close the issue.

Fixes: Coverity CID 1547113
Signed-off-by: Jamin Lin 
Reviewed-by: Cédric Le Goater 
[ clg: Rewrote commit log ]
Signed-off-by: Cédric Le Goater 
---
 hw/arm/aspeed_ast27x0.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c
index b6876b4862..d14a46df6f 100644
--- a/hw/arm/aspeed_ast27x0.c
+++ b/hw/arm/aspeed_ast27x0.c
@@ -211,6 +211,12 @@ static void aspeed_ram_capacity_write(void *opaque, hwaddr 
addr, uint64_t data,
 ram_size = object_property_get_uint(OBJECT(>sdmc), "ram-size",
 _abort);
 
+if (!ram_size) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: ram_size is zero",  __func__);
+return;
+}
+
 /*
  * Emulate ddr capacity hardware behavior.
  * If writes the data to the address which is beyond the ram size,
-- 
2.25.1




Re: [PATCH v2] target/riscv: fix instructions count handling in icount mode

2024-06-24 Thread Alistair Francis
On Tue, Jun 18, 2024 at 9:28 PM Clément Léger  wrote:
>
> When icount is enabled, rather than returning the virtual CPU time, we
> should return the instruction count itself. Add an instructions bool
> parameter to get_ticks() to correctly return icount_get_raw() when
> icount_enabled() == 1 and instruction count is queried. This will modify
> the existing behavior which was returning an instructions count close to
> the number of cycles (CPI ~= 1).
>
> Signed-off-by: Clément Léger 

Thanks!

Applied to riscv-to-apply.next

Alistair

>
> ---
>
> v2:
>  - Apply checkpatch and fixed missing braces
>
> ---
>  target/riscv/csr.c | 30 +-
>  1 file changed, 17 insertions(+), 13 deletions(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 58ef7079dc..b8915e32a2 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -762,14 +762,18 @@ static RISCVException write_vcsr(CPURISCVState *env, 
> int csrno,
>  }
>
>  /* User Timers and Counters */
> -static target_ulong get_ticks(bool shift)
> +static target_ulong get_ticks(bool shift, bool instructions)
>  {
>  int64_t val;
>  target_ulong result;
>
>  #if !defined(CONFIG_USER_ONLY)
>  if (icount_enabled()) {
> -val = icount_get();
> +if (instructions) {
> +val = icount_get_raw();
> +} else {
> +val = icount_get();
> +}
>  } else {
>  val = cpu_get_host_ticks();
>  }
> @@ -804,14 +808,14 @@ static RISCVException read_timeh(CPURISCVState *env, 
> int csrno,
>  static RISCVException read_hpmcounter(CPURISCVState *env, int csrno,
>target_ulong *val)
>  {
> -*val = get_ticks(false);
> +*val = get_ticks(false, (csrno == CSR_INSTRET));
>  return RISCV_EXCP_NONE;
>  }
>
>  static RISCVException read_hpmcounterh(CPURISCVState *env, int csrno,
> target_ulong *val)
>  {
> -*val = get_ticks(true);
> +*val = get_ticks(true, (csrno == CSR_INSTRETH));
>  return RISCV_EXCP_NONE;
>  }
>
> @@ -875,11 +879,11 @@ static RISCVException write_mhpmcounter(CPURISCVState 
> *env, int csrno,
>  int ctr_idx = csrno - CSR_MCYCLE;
>  PMUCTRState *counter = >pmu_ctrs[ctr_idx];
>  uint64_t mhpmctr_val = val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  counter->mhpmcounter_val = val;
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -counter->mhpmcounter_prev = get_ticks(false);
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +counter->mhpmcounter_prev = get_ticks(false, instr);
>  if (ctr_idx > 2) {
>  if (riscv_cpu_mxl(env) == MXL_RV32) {
>  mhpmctr_val = mhpmctr_val |
> @@ -902,12 +906,12 @@ static RISCVException write_mhpmcounterh(CPURISCVState 
> *env, int csrno,
>  PMUCTRState *counter = >pmu_ctrs[ctr_idx];
>  uint64_t mhpmctr_val = counter->mhpmcounter_val;
>  uint64_t mhpmctrh_val = val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  counter->mhpmcounterh_val = val;
>  mhpmctr_val = mhpmctr_val | (mhpmctrh_val << 32);
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -counter->mhpmcounterh_prev = get_ticks(true);
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +counter->mhpmcounterh_prev = get_ticks(true, instr);
>  if (ctr_idx > 2) {
>  riscv_pmu_setup_timer(env, mhpmctr_val, ctr_idx);
>  }
> @@ -926,6 +930,7 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState 
> *env, target_ulong *val,
>   counter->mhpmcounter_prev;
>  target_ulong ctr_val = upper_half ? counter->mhpmcounterh_val :
>  counter->mhpmcounter_val;
> +bool instr = riscv_pmu_ctr_monitor_instructions(env, ctr_idx);
>
>  if (get_field(env->mcountinhibit, BIT(ctr_idx))) {
>  /*
> @@ -946,9 +951,8 @@ static RISCVException riscv_pmu_read_ctr(CPURISCVState 
> *env, target_ulong *val,
>   * The kernel computes the perf delta by subtracting the current value 
> from
>   * the value it initialized previously (ctr_val).
>   */
> -if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) ||
> -riscv_pmu_ctr_monitor_instructions(env, ctr_idx)) {
> -*val = get_ticks(upper_half) - ctr_prev + ctr_val;
> +if (riscv_pmu_ctr_monitor_cycles(env, ctr_idx) || instr) {
> +*val = get_ticks(upper_half, instr) - ctr_prev + ctr_val;
>  } else {
>  *val = ctr_val;
>  }
> --
> 2.45.2
>
>



Re: [PATCH] i386: revert defaults to 'legacy-vm-type=true' for SEV(-ES) guests

2024-06-24 Thread Michael Roth via
On Fri, Jun 14, 2024 at 11:39:24AM +0100, Daniel P. Berrangé wrote:
> The KVM_SEV_INIT2 ioctl was only introduced in Linux 6.10, which will
> only have been released for a bit over a month when QEMU 9.1 is
> released.
> 
> The SEV(-ES) support in QEMU has been present since 2.12 dating back
> to 2018. With this in mind, the overwhealming majority of users of
> SEV(-ES) are unlikely to be running Linux >= 6.10, any time in the
> forseeable future.
> 
> IOW, defaulting new QEMU to 'legacy-vm-type=false' means latest QEMU
> machine types will be broken out of the box for most SEV(-ES) users.
> Even if the kernel is new enough, it also affects the guest measurement,
> which means that their existing tools for validating measurements will
> also be broken by the new default.
> 
> This is not a sensible default choice at this point in time. Revert to
> the historical behaviour which is compatible with what most users are
> currently running.

Part of the reason for the change is that SEV-ES measurements are
already affected by some short-comings of the legacy KVM_SEV_ES_INIT
API. Namely, if the kvm_amd.debug-swap module param is used to enable
that SEV-ES feature, then that feature will get enabled on the KVM side
and change the initial guest measurement (due to VMSA_FEATURES field
of the vCPU's VMSA changing), and userspace has no way to control that
on a per-VM basis, so measurement for any particular invocation will
be somewhat random depending on the system configuration and kernel
level.

I think that's why users of newer QEMU machine types are highly
encouraged to switch to the new KVM_SEV_INIT2 interface. I do see this
causing issues for older QEMU machine types that previously relied on
the legacy interface, since we do want to avoid measurement changing
for an existing guest that was previously working on an older kernel,
which is why this flag defaults to true for pre-9.1 machine types. But
on newer kernels there is still potential for issues relating to
debug-swap (and other VMSA features that get added to KVM in the future)
and how they may cause measurement changes underneath the covers if we
don't allow userspace the ability to control what is/isn't disabled.

Because of that I think it's less headache for userspace to have to
opt-in to legacy interface when using newer machine models. It should be
a concious decision to keep using this deprecated interface with known
limitations that could affect measurement in unexpected ways.

I was actually planning to go the other direction on this because
currently for 9.1+, QEMU will try to use KVM_SEV_INIT2 if
KVM_CAP_VM_TYPES advertises its availability, but otherwise fall back to
the above KVM_SEV_ES_INIT interface and potential inherit the issues
noted above. So I was planning on getting rid of the fallback, and
basically only allowing legacy KVM_SEV_ES_INIT for 9.1+ if the user
manually sets sev_guest->legacy_vm_type via cmdline.

-Mike

> 
> This can be re-evaluated a few years down the line, though it is more
> likely that all attention will be on SEV-SNP by this time. Distro
> vendors may still choose to change this default downstream to align
> with their new major releases where they can guarantee the kernel
> will always provide the required functionality.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  hw/i386/pc.c  |  1 -
>  qapi/qom.json | 12 ++--
>  target/i386/sev.c |  7 +++
>  3 files changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 0469af00a7..b65843c559 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -82,7 +82,6 @@
>  GlobalProperty pc_compat_9_0[] = {
>  { TYPE_X86_CPU, "x-l1-cache-per-thread", "false" },
>  { TYPE_X86_CPU, "guest-phys-bits", "0" },
> -{ "sev-guest", "legacy-vm-type", "true" },
>  { TYPE_X86_CPU, "legacy-multi-node", "on" },
>  };
>  const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0);
> diff --git a/qapi/qom.json b/qapi/qom.json
> index 8bd299265e..714ebeec8b 100644
> --- a/qapi/qom.json
> +++ b/qapi/qom.json
> @@ -912,12 +912,12 @@
>  # @handle: SEV firmware handle (default: 0)
>  #
>  # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
> -#  The newer KVM_SEV_INIT2 interface syncs additional vCPU
> -#  state when initializing the VMSA structures, which will
> -#  result in a different guest measurement. Set this to
> -#  maintain compatibility with older QEMU or kernel versions
> -#  that rely on legacy KVM_SEV_INIT behavior.
> -#  (default: false) (since 9.1)
> +#  The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, 
> syncs
> +#  additional vCPU state when initializing the VMSA 
> structures,
> +#  which will result in a different guest measurement. Toggle
> +#  this to control compatibility with older QEMU or kernel
> +# 

Re: [PULL 00/12] maintainer updates (plugins, gdbstub)

2024-06-24 Thread Richard Henderson

On 6/24/24 03:18, Alex Bennée wrote:

The following changes since commit c9ba79baca7c673098361e3a687f72d458e0d18a:

   Merge tag 'pull-target-arm-20240622' 
ofhttps://git.linaro.org/people/pmaydell/qemu-arm  into staging (2024-06-22 
09:56:49 -0700)

are available in the Git repository at:

   https://gitlab.com/stsquad/qemu.git  tags/pull-maintainer-june24-240624-1

for you to fetch changes up to fce3d48038e9f38e3e342a59f76c7f9f9b043ed2:

   accel/tcg: Avoid unnecessary call overhead from qemu_plugin_vcpu_mem_cb 
(2024-06-24 10:15:23 +0100)


maintainer updates (plugins, gdbstub):

   - add missing include guard comment to gdbstub.h
   - move gdbstub enums into separate header
   - move qtest_[get|set]_virtual_clock functions
   - allow plugins to manipulate the virtual clock
   - introduce an Instructions Per Second plugin
   - fix inject_mem_cb rw mask tests
   - allow qemu_plugin_vcpu_mem_cb to shortcut when no memory cbs


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~




Help improve 32-bit testing

2024-06-24 Thread Richard Henderson

Hiya,

I've just discovered a 32-bit build issue that is probably 3 weeks old.

While we still support 32-bit builds at all, I would request that we improve our 
cross-i686 testing.  For instance: we have cross-i686-user and cross-i686-tci.  There is 
some system build testing in the tci job, but (rightfully) not everything.


I would like a full cross-i686-system target that builds all targets, and I would like the 
debian-i686-cross image on which we base these to be more complete -- ideally, exactly 
matching x86_64.  In particular, CONFIG_SEV is not detected within the current docker 
image, which is where the current build error is located.


Do you have time to look at this?


r~



Re: [PULL 30/45] i386/sev: Add handling to encrypt/finalize guest launch data

2024-06-24 Thread Richard Henderson

On 6/3/24 23:43, Paolo Bonzini wrote:

From: Brijesh Singh 

Process any queued up launch data and encrypt/measure it into the SNP
guest instance prior to initial guest launch.

This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial
update responses.

Signed-off-by: Brijesh Singh 
Co-developed-by: Michael Roth 
Signed-off-by: Michael Roth 
Co-developed-by: Pankaj Gupta 
Signed-off-by: Pankaj Gupta 
Message-ID: <20240530111643.1091816-17-pankaj.gu...@amd.com>
Signed-off-by: Paolo Bonzini 
---
  target/i386/sev.c| 112 ++-
  target/i386/trace-events |   2 +
  2 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index e89b87d2f55..ef2e592ca76 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -756,6 +756,76 @@ out:
  return ret;
  }
  
+static const char *

+snp_page_type_to_str(int type)
+{
+switch (type) {
+case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal";
+case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero";
+case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured";
+case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets";
+case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid";
+default: return "unknown";
+}
+}
+
+static int
+sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
+  SevLaunchUpdateData *data)
+{
+int ret, fw_error;
+struct kvm_sev_snp_launch_update update = {0};
+
+if (!data->hva || !data->len) {
+error_report("SNP_LAUNCH_UPDATE called with invalid address"
+ "/ length: %p / %lx",
+ data->hva, data->len);


This patch does not compile on 32-bit x86:

../src/target/i386/sev.c: In function 'sev_snp_launch_update':
../src/target/i386/sev.c:886:22: error: format '%lx' expects argument of type 'long 
unsigned int', but argument 3 has type 'uint64_t' {aka 'long long unsigned int'} 
[-Werror=format=]

  886 | error_report("SNP_LAUNCH_UPDATE called with invalid address"
  |  ^~~
  887 |  "/ length: %p / %lx",
  888 |  data->hva, data->len);
  | ~
  | |
  | uint64_t {aka long long unsigned 
int}
../src/target/i386/sev.c:935:22: error: format '%lx' expects argument of type 'long 
unsigned int', but argument 2 has type 'hwaddr' {aka 'long long unsigned int'} 
[-Werror=format=]

  935 | error_report("SEV-SNP: expected update of GPA range %lx-%lx,"
  |  ^~~~
  936 |  "got GPA range %lx-%llx",
  937 |  data->gpa, data->gpa + data->len, data->gpa,
  |  ~
  |  |
  |  hwaddr {aka long long unsigned int}
../src/target/i386/sev.c:935:22: error: format '%lx' expects argument of type 'long 
unsigned int', but argument 3 has type 'long long unsigned int' [-Werror=format=]

  935 | error_report("SEV-SNP: expected update of GPA range %lx-%lx,"
  |  ^~~~
  936 |  "got GPA range %lx-%llx",
  937 |  data->gpa, data->gpa + data->len, data->gpa,
  | ~
  |   |
  |   long long unsigned int
../src/target/i386/sev.c:935:22: error: format '%lx' expects argument of type 'long 
unsigned int', but argument 4 has type 'hwaddr' {aka 'long long unsigned int'} 
[-Werror=format=]

  935 | error_report("SEV-SNP: expected update of GPA range %lx-%lx,"
  |  ^~~~
  936 |  "got GPA range %lx-%llx",
  937 |  data->gpa, data->gpa + data->len, data->gpa,
  |~
  ||
  |hwaddr {aka long long 
unsigned int}

In file included from ../src/target/i386/sev.c:22:
../src/target/i386/sev.c: In function 
'sev_snp_guest_set_guest_visible_workarounds':
/home/rth/qemu/src/include/qapi/error.h:319:25: error: format '%lu' expects argument of 
type 'long unsigned int', but argument 6 has type 'gsize' {aka 'unsigned int'} 
[-Werror=format=]

  319 | (fmt), ## __VA_ARGS__)
  | ^
../src/target/i386/sev.c:2149:9: note: in expansion of macro 'error_setg'
 2149 | error_setg(errp, "parameter length of %lu exceeds max of %lu",
  | ^~

[PULL 23/42] virtio-iommu: Remove the implementation of iommu_set_iova_range

2024-06-24 Thread Cédric Le Goater
From: Eric Auger 

Now that we use PCIIOMMUOps to convey information about usable IOVA
ranges we do not to implement the iommu_set_iova_ranges IOMMU MR
callback.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 hw/virtio/virtio-iommu.c | 67 
 1 file changed, 67 deletions(-)

diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 
a4c0cceb65f2452de186da10be2f449ec45fe672..b9a7ddcd142553f525fae7bdb6ed3adaae30ffed
 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -1351,72 +1351,6 @@ static int 
virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
 return 0;
 }
 
-/**
- * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
- *
- * The function turns those into reserved ranges. Once some
- * reserved ranges have been set, new reserved regions cannot be
- * added outside of the original ones.
- *
- * @mr: IOMMU MR
- * @iova_ranges: list of usable IOVA ranges
- * @errp: error handle
- */
-static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr,
-GList *iova_ranges,
-Error **errp)
-{
-IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
-GList *current_ranges = sdev->host_resv_ranges;
-GList *l, *tmp, *new_ranges = NULL;
-int ret = -EINVAL;
-
-/* check that each new resv region is included in an existing one */
-if (sdev->host_resv_ranges) {
-range_inverse_array(iova_ranges,
-_ranges,
-0, UINT64_MAX);
-
-for (tmp = new_ranges; tmp; tmp = tmp->next) {
-Range *newr = (Range *)tmp->data;
-bool included = false;
-
-for (l = current_ranges; l; l = l->next) {
-Range * r = (Range *)l->data;
-
-if (range_contains_range(r, newr)) {
-included = true;
-break;
-}
-}
-if (!included) {
-goto error;
-}
-}
-/* all new reserved ranges are included in existing ones */
-ret = 0;
-goto out;
-}
-
-if (sdev->probe_done) {
-warn_report("%s: Notified about new host reserved regions after probe",
-mr->parent_obj.name);
-}
-
-range_inverse_array(iova_ranges,
->host_resv_ranges,
-0, UINT64_MAX);
-rebuild_resv_regions(sdev);
-
-return 0;
-error:
-error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!",
-   mr->parent_obj.name);
-out:
-g_list_free_full(new_ranges, g_free);
-return ret;
-}
-
 static void virtio_iommu_system_reset(void *opaque)
 {
 VirtIOIOMMU *s = opaque;
@@ -1742,7 +1676,6 @@ static void 
virtio_iommu_memory_region_class_init(ObjectClass *klass,
 imrc->replay = virtio_iommu_replay;
 imrc->notify_flag_changed = virtio_iommu_notify_flag_changed;
 imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask;
-imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges;
 }
 
 static const TypeInfo virtio_iommu_info = {
-- 
2.45.2




[PULL 12/42] hw/pci: Introduce helper function pci_device_get_iommu_bus_devfn()

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

Extract out pci_device_get_iommu_bus_devfn() from
pci_device_iommu_address_space() to facilitate
implementation of pci_device_[set|unset]_iommu_device()
in following patch.

No functional change intended.

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Nicolin Chen 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 hw/pci/pci.c | 48 +---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 
324c1302d25f89c716d2fff06b3cd59471881a4f..02a4bb2af63feabd954ce23e9b02dd931c81ae9a
 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2648,11 +2648,27 @@ static void pci_device_class_base_init(ObjectClass 
*klass, void *data)
 }
 }
 
-AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+/*
+ * Get IOMMU root bus, aliased bus and devfn of a PCI device
+ *
+ * IOMMU root bus is needed by all call sites to call into iommu_ops.
+ * For call sites which don't need aliased BDF, passing NULL to
+ * aliased_[bus|devfn] is allowed.
+ *
+ * @piommu_bus: return root #PCIBus backed by an IOMMU for the PCI device.
+ *
+ * @aliased_bus: return aliased #PCIBus of the PCI device, optional.
+ *
+ * @aliased_devfn: return aliased devfn of the PCI device, optional.
+ */
+static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
+   PCIBus **piommu_bus,
+   PCIBus **aliased_bus,
+   int *aliased_devfn)
 {
 PCIBus *bus = pci_get_bus(dev);
 PCIBus *iommu_bus = bus;
-uint8_t devfn = dev->devfn;
+int devfn = dev->devfn;
 
 while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) {
 PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
@@ -2693,7 +2709,33 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 
 iommu_bus = parent_bus;
 }
-if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
+
+assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+assert(iommu_bus);
+
+if (pci_bus_bypass_iommu(bus) || !iommu_bus->iommu_ops) {
+iommu_bus = NULL;
+}
+
+*piommu_bus = iommu_bus;
+
+if (aliased_bus) {
+*aliased_bus = bus;
+}
+
+if (aliased_devfn) {
+*aliased_devfn = devfn;
+}
+}
+
+AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+int devfn;
+
+pci_device_get_iommu_bus_devfn(dev, _bus, , );
+if (iommu_bus) {
 return iommu_bus->iommu_ops->get_address_space(bus,
  iommu_bus->iommu_opaque, devfn);
 }
-- 
2.45.2




[PULL 28/42] vfio/common: Move dirty tracking ranges update to helper

2024-06-24 Thread Cédric Le Goater
From: Joao Martins 

Separate the changes that update the ranges from the listener, to
make it reusable in preparation to expand its use to vIOMMU support.

Signed-off-by: Joao Martins 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
[ clg: - Rebased on upstream
   - Introduced vfio_dirty_tracking_update_range()
   - Fixed typ in commit log ]
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 38 ++
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
d48cd9b9361a92d184e423ffc60aabaff40fb487..fe215918bdf66ddbe3c5db803e10ce1aa9756b90
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -839,20 +839,11 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection 
*section,
 return false;
 }
 
-static void vfio_dirty_tracking_update(MemoryListener *listener,
-   MemoryRegionSection *section)
+static void vfio_dirty_tracking_update_range(VFIODirtyRanges *range,
+ hwaddr iova, hwaddr end,
+ bool update_pci)
 {
-VFIODirtyRangesListener *dirty = container_of(listener,
-  VFIODirtyRangesListener,
-  listener);
-VFIODirtyRanges *range = >ranges;
-hwaddr iova, end, *min, *max;
-
-if (!vfio_listener_valid_section(section, "tracking_update") ||
-!vfio_get_section_iova_range(dirty->bcontainer, section,
- , , NULL)) {
-return;
-}
+hwaddr *min, *max;
 
 /*
  * The address space passed to the dirty tracker is reduced to three 
ranges:
@@ -873,8 +864,7 @@ static void vfio_dirty_tracking_update(MemoryListener 
*listener,
  * The alternative would be an IOVATree but that has a much bigger runtime
  * overhead and unnecessary complexity.
  */
-if (vfio_section_is_vfio_pci(section, dirty->bcontainer) &&
-iova >= UINT32_MAX) {
+if (update_pci && iova >= UINT32_MAX) {
 min = >minpci64;
 max = >maxpci64;
 } else {
@@ -889,7 +879,23 @@ static void vfio_dirty_tracking_update(MemoryListener 
*listener,
 }
 
 trace_vfio_device_dirty_tracking_update(iova, end, *min, *max);
-return;
+}
+
+static void vfio_dirty_tracking_update(MemoryListener *listener,
+   MemoryRegionSection *section)
+{
+VFIODirtyRangesListener *dirty =
+container_of(listener, VFIODirtyRangesListener, listener);
+hwaddr iova, end;
+
+if (!vfio_listener_valid_section(section, "tracking_update") ||
+!vfio_get_section_iova_range(dirty->bcontainer, section,
+ , , NULL)) {
+return;
+}
+
+vfio_dirty_tracking_update_range(>ranges, iova, end,
+  vfio_section_is_vfio_pci(section, dirty->bcontainer));
 }
 
 static const MemoryListener vfio_dirty_tracking_listener = {
-- 
2.45.2




[PULL 20/42] HostIOMMUDevice: Introduce get_iova_ranges callback

2024-06-24 Thread Cédric Le Goater
From: Eric Auger 

Introduce a new HostIOMMUDevice callback that allows to
retrieve the usable IOVA ranges.

Implement this callback in the legacy VFIO and IOMMUFD VFIO
host iommu devices. This relies on the VFIODevice agent's
base container iova_ranges resource.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 include/sysemu/host_iommu_device.h |  8 
 hw/vfio/container.c| 16 
 hw/vfio/iommufd.c  | 16 
 3 files changed, 40 insertions(+)

diff --git a/include/sysemu/host_iommu_device.h 
b/include/sysemu/host_iommu_device.h
index 
3e5f058e7ba80491eae04dc73c6957f7269150cf..40e0fa13efb5c023bc9b46fc99bf553cb93adf24
 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -80,6 +80,14 @@ struct HostIOMMUDeviceClass {
  * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS.
  */
 int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp);
+/**
+ * @get_iova_ranges: Return the list of usable iova_ranges along with
+ * @hiod Host IOMMU device
+ *
+ * @hiod: handle to the host IOMMU device
+ * @errp: error handle
+ */
+GList* (*get_iova_ranges)(HostIOMMUDevice *hiod, Error **errp);
 };
 
 /*
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
b728b978a26d49b5a2895fd4d1add8f0a57787ad..c48749c089a67ee4d0e6b8dd975562e2938500cd
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1164,12 +1164,28 @@ static int hiod_legacy_vfio_get_cap(HostIOMMUDevice 
*hiod, int cap,
 }
 }
 
+static GList *
+hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp)
+{
+VFIODevice *vdev = hiod->agent;
+GList *l = NULL;
+
+g_assert(vdev);
+
+if (vdev->bcontainer) {
+l = g_list_copy(vdev->bcontainer->iova_ranges);
+}
+
+return l;
+}
+
 static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data)
 {
 HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
 
 hioc->realize = hiod_legacy_vfio_realize;
 hioc->get_cap = hiod_legacy_vfio_get_cap;
+hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges;
 };
 
 static const TypeInfo types[] = {
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
dbdae1adbb66f9c8547659320ce4436825efe1a1..e502081c2ad9eda31769176f875fef60a77e2b43
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -645,11 +645,27 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice 
*hiod, void *opaque,
 return true;
 }
 
+static GList *
+hiod_iommufd_vfio_get_iova_ranges(HostIOMMUDevice *hiod, Error **errp)
+{
+VFIODevice *vdev = hiod->agent;
+GList *l = NULL;
+
+g_assert(vdev);
+
+if (vdev->bcontainer) {
+l = g_list_copy(vdev->bcontainer->iova_ranges);
+}
+
+return l;
+}
+
 static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data)
 {
 HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
 
 hiodc->realize = hiod_iommufd_vfio_realize;
+hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
 };
 
 static const TypeInfo types[] = {
-- 
2.45.2




[PULL 04/42] backends/iommufd: Introduce TYPE_HOST_IOMMU_DEVICE_IOMMUFD[_VFIO] devices

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

TYPE_HOST_IOMMU_DEVICE_IOMMUFD represents a host IOMMU device under
iommufd backend. It is abstract, because it is going to be derived
into VFIO or VDPA type'd device.

It will have its own .get_cap() implementation.

TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO is a sub-class of
TYPE_HOST_IOMMU_DEVICE_IOMMUFD, represents a VFIO type'd host IOMMU
device under iommufd backend. It will be created during VFIO device
attaching and passed to vIOMMU.

It will have its own .realize() implementation.

Opportunistically, add missed header to include/sysemu/iommufd.h.

Suggested-by: Cédric Le Goater 
Signed-off-by: Yi Liu 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/vfio/vfio-common.h |  3 +++
 include/sysemu/iommufd.h  | 16 
 backends/iommufd.c| 35 ++-
 hw/vfio/iommufd.c |  5 -
 4 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
75b167979ac221e59b2681b2704c03778823fbb0..56d171721164991b408073488330bf1d79104970
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -32,6 +32,7 @@
 #include "sysemu/sysemu.h"
 #include "hw/vfio/vfio-container-base.h"
 #include "sysemu/host_iommu_device.h"
+#include "sysemu/iommufd.h"
 
 #define VFIO_MSG_PREFIX "vfio %s: "
 
@@ -173,6 +174,8 @@ typedef struct VFIOGroup {
 } VFIOGroup;
 
 #define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE 
"-legacy-vfio"
+#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
+TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
 
 typedef struct VFIODMABuf {
 QemuDmaBuf *buf;
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 
293bfbe967215381532b8267227dde61fa9157b7..f6e6d6e1f9f4e9b1c581100912e4864b08c9ed7d
 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -1,9 +1,23 @@
+/*
+ * iommufd container backend declaration
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ * Copyright Red Hat, Inc. 2024
+ *
+ * Authors: Yi Liu 
+ *  Eric Auger 
+ *  Zhenzhong Duan 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
 #ifndef SYSEMU_IOMMUFD_H
 #define SYSEMU_IOMMUFD_H
 
 #include "qom/object.h"
 #include "exec/hwaddr.h"
 #include "exec/cpu-common.h"
+#include "sysemu/host_iommu_device.h"
 
 #define TYPE_IOMMUFD_BACKEND "iommufd"
 OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
@@ -33,4 +47,6 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t 
ioas_id, hwaddr iova,
 ram_addr_t size, void *vaddr, bool readonly);
 int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
   hwaddr iova, ram_addr_t size);
+
+#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
 #endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 
c506afbdac4beddb7dac88f74f10544f7a083e58..012f18d8d802aea40798fe3368bbdca52634f95d
 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -208,23 +208,24 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, 
uint32_t ioas_id,
 return ret;
 }
 
-static const TypeInfo iommufd_backend_info = {
-.name = TYPE_IOMMUFD_BACKEND,
-.parent = TYPE_OBJECT,
-.instance_size = sizeof(IOMMUFDBackend),
-.instance_init = iommufd_backend_init,
-.instance_finalize = iommufd_backend_finalize,
-.class_size = sizeof(IOMMUFDBackendClass),
-.class_init = iommufd_backend_class_init,
-.interfaces = (InterfaceInfo[]) {
-{ TYPE_USER_CREATABLE },
-{ }
+static const TypeInfo types[] = {
+{
+.name = TYPE_IOMMUFD_BACKEND,
+.parent = TYPE_OBJECT,
+.instance_size = sizeof(IOMMUFDBackend),
+.instance_init = iommufd_backend_init,
+.instance_finalize = iommufd_backend_finalize,
+.class_size = sizeof(IOMMUFDBackendClass),
+.class_init = iommufd_backend_class_init,
+.interfaces = (InterfaceInfo[]) {
+{ TYPE_USER_CREATABLE },
+{ }
+}
+}, {
+.name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
+.parent = TYPE_HOST_IOMMU_DEVICE,
+.abstract = true,
 }
 };
 
-static void register_types(void)
-{
-type_register_static(_backend_info);
-}
-
-type_init(register_types);
+DEFINE_TYPES(types)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
554f9a6292454f51015ab3a56df3fab6a482ccb7..e4a507d55c4df972fb4c43d31571022910bae493
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -624,7 +624,10 @@ static const TypeInfo types[] = {
 .name = TYPE_VFIO_IOMMU_IOMMUFD,
 .parent = TYPE_VFIO_IOMMU,
 .class_init = vfio_iommu_iommufd_class_init,
-},
+}, {
+.name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO,
+.parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
+}
 };
 
 DEFINE_TYPES(types)
-- 
2.45.2




[PULL 10/42] backends/iommufd: Implement HostIOMMUDeviceClass::get_cap() handler

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 backends/iommufd.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/backends/iommufd.c b/backends/iommufd.c
index 
c7e969d6f76dff8780efedde56b2015b3b8d616e..84fefbc9ee7a7228b0ed803132199fef5b56b1d7
 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -230,6 +230,28 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, 
uint32_t devid,
 return true;
 }
 
+static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
+{
+HostIOMMUDeviceCaps *caps = >caps;
+
+switch (cap) {
+case HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE:
+return caps->type;
+case HOST_IOMMU_DEVICE_CAP_AW_BITS:
+return caps->aw_bits;
+default:
+error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
+return -EINVAL;
+}
+}
+
+static void hiod_iommufd_class_init(ObjectClass *oc, void *data)
+{
+HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+hioc->get_cap = hiod_iommufd_get_cap;
+};
+
 static const TypeInfo types[] = {
 {
 .name = TYPE_IOMMUFD_BACKEND,
@@ -246,6 +268,7 @@ static const TypeInfo types[] = {
 }, {
 .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
 .parent = TYPE_HOST_IOMMU_DEVICE,
+.class_init = hiod_iommufd_class_init,
 .abstract = true,
 }
 };
-- 
2.45.2




[PULL 21/42] HostIOMMUDevice: Store the aliased bus and devfn

2024-06-24 Thread Cédric Le Goater
From: Eric Auger 

Store the aliased bus and devfn in the HostIOMMUDevice.
This will be useful to handle info that are iommu group
specific and not device specific (such as reserved
iova ranges).

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 include/sysemu/host_iommu_device.h | 2 ++
 hw/pci/pci.c   | 8 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/sysemu/host_iommu_device.h 
b/include/sysemu/host_iommu_device.h
index 
40e0fa13efb5c023bc9b46fc99bf553cb93adf24..ee6c813c8b2299ed1d1d3b34d143c20a8ec27400
 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -35,6 +35,8 @@ struct HostIOMMUDevice {
 
 char *name;
 void *agent; /* pointer to agent device, ie. VFIO or VDPA device */
+PCIBus *aliased_bus;
+int aliased_devfn;
 HostIOMMUDeviceCaps caps;
 };
 
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 
c8a8aab30646c5e37816f49f6ef9d1bdf8be241f..50b86d57907411ee7646b436dda53503eca33c13
 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2745,11 +2745,15 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
  Error **errp)
 {
-PCIBus *iommu_bus;
+PCIBus *iommu_bus, *aliased_bus;
+int aliased_devfn;
 
 /* set_iommu_device requires device's direct BDF instead of aliased BDF */
-pci_device_get_iommu_bus_devfn(dev, _bus, NULL, NULL);
+pci_device_get_iommu_bus_devfn(dev, _bus,
+   _bus, _devfn);
 if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) {
+hiod->aliased_bus = aliased_bus;
+hiod->aliased_devfn = aliased_devfn;
 return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev),
   iommu_bus->iommu_opaque,
   dev->devfn, hiod, errp);
-- 
2.45.2




[PULL 34/42] vfio/container: Introduce vfio_create_container()

2024-06-24 Thread Cédric Le Goater
This routine allocates the QEMU struct type representing the VFIO
container. It is minimal currently and future changes will do more
initialization.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/container.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
bb6abe60ee29d5b69b494523c9002f53e1b2a3c8..a8691942791006f44f7a3c34b32c67ca51766182
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -430,6 +430,16 @@ static bool vfio_set_iommu(VFIOContainer *container, int 
group_fd,
 return true;
 }
 
+static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
+Error **errp)
+{
+VFIOContainer *container;
+
+container = g_malloc0(sizeof(*container));
+container->fd = fd;
+return container;
+}
+
 static int vfio_get_iommu_info(VFIOContainer *container,
struct vfio_iommu_type1_info **info)
 {
@@ -604,13 +614,14 @@ static bool vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 goto close_fd_exit;
 }
 
-container = g_malloc0(sizeof(*container));
-container->fd = fd;
-bcontainer = >bcontainer;
-
+container = vfio_create_container(fd, group, errp);
+if (!container) {
+goto close_fd_exit;
+}
 if (!vfio_set_iommu(container, group->fd, errp)) {
 goto free_container_exit;
 }
+bcontainer = >bcontainer;
 
 if (!vfio_cpr_register_container(bcontainer, errp)) {
 goto free_container_exit;
-- 
2.45.2




[PULL 01/42] backends: Introduce HostIOMMUDevice abstract

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

A HostIOMMUDevice is an abstraction for an assigned device that is protected
by a physical IOMMU (aka host IOMMU). The userspace interaction with this
physical IOMMU can be done either through the VFIO IOMMU type 1 legacy
backend or the new iommufd backend. The assigned device can be a VFIO device
or a VDPA device. The HostIOMMUDevice is needed to interact with the host
IOMMU that protects the assigned device. It is especially useful when the
device is also protected by a virtual IOMMU as this latter use the translation
services of the physical IOMMU and is constrained by it. In that context the
HostIOMMUDevice can be passed to the virtual IOMMU to collect physical IOMMU
capabilities such as the supported address width. In the future, the virtual
IOMMU will use the HostIOMMUDevice to program the guest page tables in the
first translation stage of the physical IOMMU.

Introduce .realize() to initialize HostIOMMUDevice further after instance init.

Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 MAINTAINERS|  2 ++
 include/sysemu/host_iommu_device.h | 53 ++
 backends/host_iommu_device.c   | 33 +++
 backends/meson.build   |  1 +
 4 files changed, 89 insertions(+)
 create mode 100644 include/sysemu/host_iommu_device.h
 create mode 100644 backends/host_iommu_device.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 
f144b5af449e786ce0d132bb4dc509b46da2e82b..19f67dc5d2153bbf994821ad49cc3a4d662192ef
 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2198,6 +2198,8 @@ M: Zhenzhong Duan 
 S: Supported
 F: backends/iommufd.c
 F: include/sysemu/iommufd.h
+F: backends/host_iommu_device.c
+F: include/sysemu/host_iommu_device.h
 F: include/qemu/chardev_open.h
 F: util/chardev_open.c
 F: docs/devel/vfio-iommufd.rst
diff --git a/include/sysemu/host_iommu_device.h 
b/include/sysemu/host_iommu_device.h
new file mode 100644
index 
..db47a16189a142a25887a835be9f8a8137fe00ee
--- /dev/null
+++ b/include/sysemu/host_iommu_device.h
@@ -0,0 +1,53 @@
+/*
+ * Host IOMMU device abstract declaration
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef HOST_IOMMU_DEVICE_H
+#define HOST_IOMMU_DEVICE_H
+
+#include "qom/object.h"
+#include "qapi/error.h"
+
+#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
+OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
+
+struct HostIOMMUDevice {
+Object parent_obj;
+
+char *name;
+};
+
+/**
+ * struct HostIOMMUDeviceClass - The base class for all host IOMMU devices.
+ *
+ * Different types of host devices (e.g., VFIO or VDPA device) or devices
+ * with different backend (e.g., VFIO legacy container or IOMMUFD backend)
+ * will have different implementations of the HostIOMMUDeviceClass.
+ */
+struct HostIOMMUDeviceClass {
+ObjectClass parent_class;
+
+/**
+ * @realize: initialize host IOMMU device instance further.
+ *
+ * Mandatory callback.
+ *
+ * @hiod: pointer to a host IOMMU device instance.
+ *
+ * @opaque: pointer to agent device of this host IOMMU device,
+ *  e.g., VFIO base device or VDPA device.
+ *
+ * @errp: pass an Error out when realize fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp);
+};
+#endif
diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c
new file mode 100644
index 
..8f2dda1beb9bbea27a61c17d439aeb19ec26cc90
--- /dev/null
+++ b/backends/host_iommu_device.c
@@ -0,0 +1,33 @@
+/*
+ * Host IOMMU device abstract
+ *
+ * Copyright (C) 2024 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/host_iommu_device.h"
+
+OBJECT_DEFINE_ABSTRACT_TYPE(HostIOMMUDevice,
+host_iommu_device,
+HOST_IOMMU_DEVICE,
+OBJECT)
+
+static void host_iommu_device_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void host_iommu_device_init(Object *obj)
+{
+}
+
+static void host_iommu_device_finalize(Object *obj)
+{
+HostIOMMUDevice *hiod = HOST_IOMMU_DEVICE(obj);
+
+g_free(hiod->name);
+}
diff --git a/backends/meson.build b/backends/meson.build
index 
8b2b111497f7c6cd5cb6ca50ec6d1474a543fc9f..106312f0c8b6d76f6aa6b9cc046b85e540c10f35
 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -16,6 +16,7 @@ if host_os != 'windows'
 endif
 if host_os == 'linux'
   system_ss.add(files('hostmem-memfd.c'))

[PULL 27/42] vfio: Remove unused declarations from vfio-common.h

2024-06-24 Thread Cédric Le Goater
These were forgotten in the recent cleanups.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
776de8064f740784f95cab0311c5f15f50d60ffe..c19572f90b277193491020af28e8b5587f15bfd1
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -207,10 +207,6 @@ typedef struct VFIODisplay {
 VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
 void vfio_put_address_space(VFIOAddressSpace *space);
 
-/* SPAPR specific */
-int vfio_spapr_container_init(VFIOContainer *container, Error **errp);
-void vfio_spapr_container_deinit(VFIOContainer *container);
-
 void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
 void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index);
-- 
2.45.2




[PULL 33/42] vfio/container: Introduce vfio_get_iommu_class_name()

2024-06-24 Thread Cédric Le Goater
Rework vfio_get_iommu_class() to return a literal class name instead
of a class object. We will need this name to instantiate the object
later on. Since the default case asserts, remove the error report as
QEMU will simply abort before.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/container.c | 18 ++
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
589f37bc6d68dae18f9e46371f14d6952b2240c0..bb6abe60ee29d5b69b494523c9002f53e1b2a3c8
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -373,24 +373,20 @@ static int vfio_get_iommu_type(int container_fd,
 /*
  * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type
  */
-static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp)
+static const char *vfio_get_iommu_class_name(int iommu_type)
 {
-ObjectClass *klass = NULL;
-
 switch (iommu_type) {
 case VFIO_TYPE1v2_IOMMU:
 case VFIO_TYPE1_IOMMU:
-klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY);
+return TYPE_VFIO_IOMMU_LEGACY;
 break;
 case VFIO_SPAPR_TCE_v2_IOMMU:
 case VFIO_SPAPR_TCE_IOMMU:
-klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR);
+return TYPE_VFIO_IOMMU_SPAPR;
 break;
 default:
 g_assert_not_reached();
 };
-
-return VFIO_IOMMU_CLASS(klass);
 }
 
 static bool vfio_set_iommu(VFIOContainer *container, int group_fd,
@@ -398,6 +394,7 @@ static bool vfio_set_iommu(VFIOContainer *container, int 
group_fd,
 {
 int iommu_type;
 const VFIOIOMMUClass *vioc;
+const char *vioc_name;
 
 iommu_type = vfio_get_iommu_type(container->fd, errp);
 if (iommu_type < 0) {
@@ -426,11 +423,8 @@ static bool vfio_set_iommu(VFIOContainer *container, int 
group_fd,
 
 container->iommu_type = iommu_type;
 
-vioc = vfio_get_iommu_class(iommu_type, errp);
-if (!vioc) {
-error_setg(errp, "No available IOMMU models");
-return false;
-}
+vioc_name = vfio_get_iommu_class_name(iommu_type);
+vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name));
 
 vfio_container_init(>bcontainer, vioc);
 return true;
-- 
2.45.2




[PULL 30/42] vfio/container: Introduce vfio_address_space_insert()

2024-06-24 Thread Cédric Le Goater
It prepares ground for a future change initializing the 'space' pointer
of VFIOContainerBase. The goal is to replace vfio_container_init() by
an .instance_init() handler when VFIOContainerBase is QOMified.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h | 2 ++
 hw/vfio/common.c  | 6 ++
 hw/vfio/container.c   | 2 +-
 hw/vfio/iommufd.c | 2 +-
 4 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
c19572f90b277193491020af28e8b5587f15bfd1..825d80130bd435fe50830c8ae5b7905d18104dd6
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -206,6 +206,8 @@ typedef struct VFIODisplay {
 
 VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
 void vfio_put_address_space(VFIOAddressSpace *space);
+void vfio_address_space_insert(VFIOAddressSpace *space,
+   VFIOContainerBase *bcontainer);
 
 void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
 void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
f28641bad5cf4b71fcdc0a6c9d42b24c8d786248..8cdf26c6f5a490cfa02bdf1087a91948709aaa33
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1508,6 +1508,12 @@ void vfio_put_address_space(VFIOAddressSpace *space)
 }
 }
 
+void vfio_address_space_insert(VFIOAddressSpace *space,
+   VFIOContainerBase *bcontainer)
+{
+QLIST_INSERT_HEAD(>containers, bcontainer, next);
+}
+
 struct vfio_device_info *vfio_get_device_info(int fd)
 {
 struct vfio_device_info *info;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
c48749c089a67ee4d0e6b8dd975562e2938500cd..0237c216987ff64a6d11bef8688bb000d93a7f09
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -637,7 +637,7 @@ static bool vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 vfio_kvm_device_add_group(group);
 
 QLIST_INIT(>group_list);
-QLIST_INSERT_HEAD(>containers, bcontainer, next);
+vfio_address_space_insert(space, bcontainer);
 
 group->container = container;
 QLIST_INSERT_HEAD(>group_list, group, container_next);
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
e502081c2ad9eda31769176f875fef60a77e2b43..9f8f33e383a38827ceca0f73cb77f5ca6b123198
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -358,7 +358,7 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
 
 bcontainer = >bcontainer;
 vfio_container_init(bcontainer, space, iommufd_vioc);
-QLIST_INSERT_HEAD(>containers, bcontainer, next);
+vfio_address_space_insert(space, bcontainer);
 
 if (!iommufd_cdev_attach_container(vbasedev, container, errp)) {
 goto err_attach_container;
-- 
2.45.2




[PULL 22/42] virtio-iommu: Compute host reserved regions

2024-06-24 Thread Cédric Le Goater
From: Eric Auger 

Compute the host reserved regions in virtio_iommu_set_iommu_device().
The usable IOVA regions are retrieved from the HostIOMMUDevice.
The virtio_iommu_set_host_iova_ranges() helper turns usable regions
into complementary reserved regions while testing the inclusion
into existing ones. virtio_iommu_set_host_iova_ranges() reuse the
implementation of virtio_iommu_set_iova_ranges() which will be
removed in subsequent patches. rebuild_resv_regions() is just moved.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 hw/virtio/virtio-iommu.c | 147 ++-
 1 file changed, 113 insertions(+), 34 deletions(-)

diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 
16c8ec3ca460a6d70e83b28787398f94dd16cc99..a4c0cceb65f2452de186da10be2f449ec45fe672
 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -498,11 +498,108 @@ get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, 
int devfn) {
 return g_hash_table_lookup(viommu->host_iommu_devices, );
 }
 
+/**
+ * rebuild_resv_regions: rebuild resv regions with both the
+ * info of host resv ranges and property set resv ranges
+ */
+static int rebuild_resv_regions(IOMMUDevice *sdev)
+{
+GList *l;
+int i = 0;
+
+/* free the existing list and rebuild it from scratch */
+g_list_free_full(sdev->resv_regions, g_free);
+sdev->resv_regions = NULL;
+
+/* First add host reserved regions if any, all tagged as RESERVED */
+for (l = sdev->host_resv_ranges; l; l = l->next) {
+ReservedRegion *reg = g_new0(ReservedRegion, 1);
+Range *r = (Range *)l->data;
+
+reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
+range_set_bounds(>range, range_lob(r), range_upb(r));
+sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
+ range_lob(>range),
+ range_upb(>range));
+i++;
+}
+/*
+ * then add higher priority reserved regions set by the machine
+ * through properties
+ */
+add_prop_resv_regions(sdev);
+return 0;
+}
+
+static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
+ int devfn, GList *iova_ranges,
+ Error **errp)
+{
+IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
+IOMMUDevice *sdev;
+GList *current_ranges;
+GList *l, *tmp, *new_ranges = NULL;
+int ret = -EINVAL;
+
+if (!sbus) {
+error_report("%s no sbus", __func__);
+}
+
+sdev = sbus->pbdev[devfn];
+
+current_ranges = sdev->host_resv_ranges;
+
+g_assert(!sdev->probe_done);
+
+/* check that each new resv region is included in an existing one */
+if (sdev->host_resv_ranges) {
+range_inverse_array(iova_ranges,
+_ranges,
+0, UINT64_MAX);
+
+for (tmp = new_ranges; tmp; tmp = tmp->next) {
+Range *newr = (Range *)tmp->data;
+bool included = false;
+
+for (l = current_ranges; l; l = l->next) {
+Range * r = (Range *)l->data;
+
+if (range_contains_range(r, newr)) {
+included = true;
+break;
+}
+}
+if (!included) {
+goto error;
+}
+}
+/* all new reserved ranges are included in existing ones */
+ret = 0;
+goto out;
+}
+
+range_inverse_array(iova_ranges,
+>host_resv_ranges,
+0, UINT64_MAX);
+rebuild_resv_regions(sdev);
+
+return 0;
+error:
+error_setg(errp, "%s Conflicting host reserved ranges set!",
+   __func__);
+out:
+g_list_free_full(new_ranges, g_free);
+return ret;
+}
+
 static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
   HostIOMMUDevice *hiod, Error **errp)
 {
 VirtIOIOMMU *viommu = opaque;
+HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
 struct hiod_key *new_key;
+GList *host_iova_ranges = NULL;
 
 assert(hiod);
 
@@ -511,12 +608,28 @@ static bool virtio_iommu_set_iommu_device(PCIBus *bus, 
void *opaque, int devfn,
 return false;
 }
 
+if (hiodc->get_iova_ranges) {
+int ret;
+host_iova_ranges = hiodc->get_iova_ranges(hiod, errp);
+if (!host_iova_ranges) {
+return true; /* some old kernels may not support that capability */
+}
+ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus,
+hiod->aliased_devfn,
+

[PULL 14/42] vfio/pci: Pass HostIOMMUDevice to vIOMMU

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

With HostIOMMUDevice passed, vIOMMU can check compatibility with host
IOMMU, call into IOMMUFD specific methods, etc.

Originally-by: Yi Liu 
Signed-off-by: Nicolin Chen 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 hw/vfio/pci.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 
74a79bdf61f9aeb4860d532b6c076dd3491dd0ab..d8a76c1ee003e6f5669e8390271836fd9d839a8a
 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3121,10 +3121,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 
 vfio_bars_register(vdev);
 
-if (!vfio_add_capabilities(vdev, errp)) {
+if (!pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) {
+error_prepend(errp, "Failed to set iommu_device: ");
 goto out_teardown;
 }
 
+if (!vfio_add_capabilities(vdev, errp)) {
+goto out_unset_idev;
+}
+
 if (vdev->vga) {
 vfio_vga_quirk_setup(vdev);
 }
@@ -3141,7 +3146,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 error_setg(errp,
"cannot support IGD OpRegion feature on hotplugged "
"device");
-goto out_teardown;
+goto out_unset_idev;
 }
 
 ret = vfio_get_dev_region_info(vbasedev,
@@ -3150,11 +3155,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 if (ret) {
 error_setg_errno(errp, -ret,
  "does not support requested IGD OpRegion 
feature");
-goto out_teardown;
+goto out_unset_idev;
 }
 
 if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
-goto out_teardown;
+goto out_unset_idev;
 }
 }
 
@@ -3238,6 +3243,8 @@ out_deregister:
 if (vdev->intx.mmap_timer) {
 timer_free(vdev->intx.mmap_timer);
 }
+out_unset_idev:
+pci_device_unset_iommu_device(pdev);
 out_teardown:
 vfio_teardown_msi(vdev);
 vfio_bars_exit(vdev);
@@ -3266,6 +3273,7 @@ static void vfio_instance_finalize(Object *obj)
 static void vfio_exitfn(PCIDevice *pdev)
 {
 VFIOPCIDevice *vdev = VFIO_PCI(pdev);
+VFIODevice *vbasedev = >vbasedev;
 
 vfio_unregister_req_notifier(vdev);
 vfio_unregister_err_notifier(vdev);
@@ -3280,7 +3288,8 @@ static void vfio_exitfn(PCIDevice *pdev)
 vfio_teardown_msi(vdev);
 vfio_pci_disable_rp_atomics(vdev);
 vfio_bars_exit(vdev);
-vfio_migration_exit(>vbasedev);
+vfio_migration_exit(vbasedev);
+pci_device_unset_iommu_device(pdev);
 }
 
 static void vfio_pci_reset(DeviceState *dev)
-- 
2.45.2




[PULL 32/42] vfio/container: Modify vfio_get_iommu_type() to use a container fd

2024-06-24 Thread Cédric Le Goater
The 'container' pointer has no other use than its 'fd' attribute.
Simplify the prototype to ease future changes.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/container.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
dc85a79cb9e62b72312f79da994c53608b6cef48..589f37bc6d68dae18f9e46371f14d6952b2240c0
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -354,7 +354,7 @@ static void vfio_kvm_device_del_group(VFIOGroup *group)
 /*
  * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
  */
-static int vfio_get_iommu_type(VFIOContainer *container,
+static int vfio_get_iommu_type(int container_fd,
Error **errp)
 {
 int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
@@ -362,7 +362,7 @@ static int vfio_get_iommu_type(VFIOContainer *container,
 int i;
 
 for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
-if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
+if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
 return iommu_types[i];
 }
 }
@@ -399,7 +399,7 @@ static bool vfio_set_iommu(VFIOContainer *container, int 
group_fd,
 int iommu_type;
 const VFIOIOMMUClass *vioc;
 
-iommu_type = vfio_get_iommu_type(container, errp);
+iommu_type = vfio_get_iommu_type(container->fd, errp);
 if (iommu_type < 0) {
 return false;
 }
-- 
2.45.2




[PULL 38/42] vfio/container: Introduce an instance_init() handler

2024-06-24 Thread Cédric Le Goater
This allows us to move the initialization code from vfio_container_init(),
which we will soon remove.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/container-base.c | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
98c15e174dd78df5146ee83c05c98f3ea9c1e52c..3858f5ab1d68e897f9013161d7c5c20c0553029d
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -75,12 +75,6 @@ void vfio_container_init(VFIOContainerBase *bcontainer,
  const VFIOIOMMUClass *ops)
 {
 bcontainer->ops = ops;
-bcontainer->error = NULL;
-bcontainer->dirty_pages_supported = false;
-bcontainer->dma_max_mappings = 0;
-bcontainer->iova_ranges = NULL;
-QLIST_INIT(>giommu_list);
-QLIST_INIT(>vrdl_list);
 }
 
 void vfio_container_destroy(VFIOContainerBase *bcontainer)
@@ -99,10 +93,23 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer)
 g_list_free_full(bcontainer->iova_ranges, g_free);
 }
 
+static void vfio_container_instance_init(Object *obj)
+{
+VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
+
+bcontainer->error = NULL;
+bcontainer->dirty_pages_supported = false;
+bcontainer->dma_max_mappings = 0;
+bcontainer->iova_ranges = NULL;
+QLIST_INIT(>giommu_list);
+QLIST_INIT(>vrdl_list);
+}
+
 static const TypeInfo types[] = {
 {
 .name = TYPE_VFIO_IOMMU,
 .parent = TYPE_OBJECT,
+.instance_init = vfio_container_instance_init,
 .instance_size = sizeof(VFIOContainerBase),
 .class_size = sizeof(VFIOIOMMUClass),
 .abstract = true,
-- 
2.45.2




[PULL 26/42] vfio: Make vfio_devices_dma_logging_start() return bool

2024-06-24 Thread Cédric Le Goater
Since vfio_devices_dma_logging_start() takes an 'Error **' argument,
best practices suggest to return a bool. See the api/error.h Rules
section. It will simplify potential changes coming after.

vfio_container_set_dirty_page_tracking() could be modified in the same
way but the errno value can be saved in the migration stream when
called from vfio_listener_log_global_stop().

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
9e4c0cc95ff90209d3e8184035af0806a2bf890b..d48cd9b9361a92d184e423ffc60aabaff40fb487
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1020,7 +1020,7 @@ static void vfio_device_feature_dma_logging_start_destroy(
 g_free(feature);
 }
 
-static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
+static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
   Error **errp)
 {
 struct vfio_device_feature *feature;
@@ -1033,7 +1033,7 @@ static int 
vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
);
 if (!feature) {
 error_setg_errno(errp, errno, "Failed to prepare DMA logging");
-return -errno;
+return false;
 }
 
 QLIST_FOREACH(vbasedev, >device_list, container_next) {
@@ -1058,7 +1058,7 @@ out:
 
 vfio_device_feature_dma_logging_start_destroy(feature);
 
-return ret;
+return ret == 0;
 }
 
 static bool vfio_listener_log_global_start(MemoryListener *listener,
@@ -1067,18 +1067,18 @@ static bool 
vfio_listener_log_global_start(MemoryListener *listener,
 ERRP_GUARD();
 VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
  listener);
-int ret;
+bool ret;
 
 if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
 ret = vfio_devices_dma_logging_start(bcontainer, errp);
 } else {
-ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp);
+ret = vfio_container_set_dirty_page_tracking(bcontainer, true, errp) 
== 0;
 }
 
-if (ret) {
+if (!ret) {
 error_prepend(errp, "vfio: Could not start dirty page tracking - ");
 }
-return !ret;
+return ret;
 }
 
 static void vfio_listener_log_global_stop(MemoryListener *listener)
-- 
2.45.2




[PULL 18/42] HostIOMMUDevice: Store the VFIO/VDPA agent

2024-06-24 Thread Cédric Le Goater
From: Eric Auger 

Store the agent device (VFIO or VDPA) in the host IOMMU device.
This will allow easy access to some of its resources.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 include/sysemu/host_iommu_device.h | 1 +
 hw/vfio/container.c| 1 +
 hw/vfio/iommufd.c  | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/include/sysemu/host_iommu_device.h 
b/include/sysemu/host_iommu_device.h
index 
a57873958b03e1fcd6c0c8991a2010dde02c566c..3e5f058e7ba80491eae04dc73c6957f7269150cf
 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -34,6 +34,7 @@ struct HostIOMMUDevice {
 Object parent_obj;
 
 char *name;
+void *agent; /* pointer to agent device, ie. VFIO or VDPA device */
 HostIOMMUDeviceCaps caps;
 };
 
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
26e6f7fb4f748162d881cb22c970428f319df3c3..b728b978a26d49b5a2895fd4d1add8f0a57787ad
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1145,6 +1145,7 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice 
*hiod, void *opaque,
 
 hiod->name = g_strdup(vdev->name);
 hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev);
+hiod->agent = opaque;
 
 return true;
 }
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
409ed3dcc91cde508ac74fa693798b87e82eb9dd..dbdae1adbb66f9c8547659320ce4436825efe1a1
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -631,6 +631,8 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice 
*hiod, void *opaque,
 struct iommu_hw_info_vtd vtd;
 } data;
 
+hiod->agent = opaque;
+
 if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
  , , sizeof(data), errp)) {
 return false;
-- 
2.45.2




[PULL 37/42] vfio/container: Switch to QOM

2024-06-24 Thread Cédric Le Goater
Instead of allocating the container struct, create a QOM object of the
appropriate type.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/container.c | 6 +++---
 hw/vfio/iommufd.c   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
3ae52530a9b500bd53ec9f9e66c73253d97c9aba..ff3a6831da83c0fe11060cd57918c4d87b10197c
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -435,7 +435,7 @@ static VFIOContainer *vfio_create_container(int fd, 
VFIOGroup *group,
 vioc_name = vfio_get_iommu_class_name(iommu_type);
 vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name));
 
-container = g_malloc0(sizeof(*container));
+container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
 container->fd = fd;
 container->iommu_type = iommu_type;
 vfio_container_init(>bcontainer, vioc);
@@ -674,7 +674,7 @@ unregister_container_exit:
 vfio_cpr_unregister_container(bcontainer);
 
 free_container_exit:
-g_free(container);
+object_unref(container);
 
 close_fd_exit:
 close(fd);
@@ -718,7 +718,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
 trace_vfio_disconnect_container(container->fd);
 vfio_cpr_unregister_container(bcontainer);
 close(container->fd);
-g_free(container);
+object_unref(container);
 
 vfio_put_address_space(space);
 }
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
3e9d642034c2d2234ea701952c94a78ab32e9147..d59df858407f3cadb9405386ad673c99cdad61d0
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -239,7 +239,7 @@ static void 
iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
 memory_listener_unregister(>listener);
 vfio_container_destroy(bcontainer);
 iommufd_backend_free_id(container->be, container->ioas_id);
-g_free(container);
+object_unref(container);
 }
 
 static int iommufd_cdev_ram_block_discard_disable(bool state)
@@ -352,7 +352,7 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
 
 trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
 
-container = g_malloc0(sizeof(*container));
+container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
 container->be = vbasedev->iommufd;
 container->ioas_id = ioas_id;
 
-- 
2.45.2




[PULL 40/42] vfio/container: Remove vfio_container_init()

2024-06-24 Thread Cédric Le Goater
It's now empty.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-container-base.h | 2 --
 hw/vfio/container-base.c  | 5 -
 hw/vfio/container.c   | 3 ---
 hw/vfio/iommufd.c | 1 -
 4 files changed, 11 deletions(-)

diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
6b57cd8e7f5d7d2817f6e3b96ce4566d2630bb12..6242a62771caa8cf19440a53ad6f4db862ca12d7
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -86,8 +86,6 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase 
*bcontainer,
 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
 
-void vfio_container_init(VFIOContainerBase *bcontainer,
- const VFIOIOMMUClass *ops);
 void vfio_container_destroy(VFIOContainerBase *bcontainer);
 
 
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
24669d4d7472f49ac3adf2618a32bf7d82c5c344..970ae2356a92f87df44e1dd58ff8c67045a24ef1
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -83,11 +83,6 @@ int vfio_container_query_dirty_bitmap(const 
VFIOContainerBase *bcontainer,
errp);
 }
 
-void vfio_container_init(VFIOContainerBase *bcontainer,
- const VFIOIOMMUClass *ops)
-{
-}
-
 void vfio_container_destroy(VFIOContainerBase *bcontainer)
 {
 VFIOGuestIOMMU *giommu, *tmp;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
a2f5fbad00cd228e27a47df5cd683dbb34296113..3f2032d5c496de078c277ebacc49d7db89f4cc65
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -419,7 +419,6 @@ static VFIOContainer *vfio_create_container(int fd, 
VFIOGroup *group,
 Error **errp)
 {
 int iommu_type;
-const VFIOIOMMUClass *vioc;
 const char *vioc_name;
 VFIOContainer *container;
 
@@ -433,12 +432,10 @@ static VFIOContainer *vfio_create_container(int fd, 
VFIOGroup *group,
 }
 
 vioc_name = vfio_get_iommu_class_name(iommu_type);
-vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name));
 
 container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
 container->fd = fd;
 container->iommu_type = iommu_type;
-vfio_container_init(>bcontainer, vioc);
 return container;
 }
 
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
7bc76f80b48ea5422e68fd4d4cb3f5bca90993f6..09b71a6617807c621275c74b924cfd39eb643961
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -357,7 +357,6 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
 container->ioas_id = ioas_id;
 
 bcontainer = >bcontainer;
-vfio_container_init(bcontainer, iommufd_vioc);
 vfio_address_space_insert(space, bcontainer);
 
 if (!iommufd_cdev_attach_container(vbasedev, container, errp)) {
-- 
2.45.2




[PULL 31/42] vfio/container: Simplify vfio_container_init()

2024-06-24 Thread Cédric Le Goater
Assign the base container VFIOAddressSpace 'space' pointer in
vfio_address_space_insert(). The ultimate goal is to remove
vfio_container_init() and instead rely on an .instance_init() handler
to perfom the initialization of VFIOContainerBase.

To be noted that vfio_connect_container() will assign the 'space'
pointer later in the execution flow. This should not have any
consequence.

Reviewed-by: Zhenzhong Duan 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-container-base.h | 1 -
 hw/vfio/common.c  | 1 +
 hw/vfio/container-base.c  | 3 +--
 hw/vfio/container.c   | 6 +++---
 hw/vfio/iommufd.c | 2 +-
 5 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
442c0dfc4c1774753c239c2c8360dcd1540d44fa..d505f63607ec40e6aa44aeb3e20848ac780562a1
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -87,7 +87,6 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase 
*bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
 
 void vfio_container_init(VFIOContainerBase *bcontainer,
- VFIOAddressSpace *space,
  const VFIOIOMMUClass *ops);
 void vfio_container_destroy(VFIOContainerBase *bcontainer);
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
8cdf26c6f5a490cfa02bdf1087a91948709aaa33..1686a0bed23bd95467bfb00a0c39a4d966e49cae
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1512,6 +1512,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
VFIOContainerBase *bcontainer)
 {
 QLIST_INSERT_HEAD(>containers, bcontainer, next);
+bcontainer->space = space;
 }
 
 struct vfio_device_info *vfio_get_device_info(int fd)
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
760d9d0622b2e847ecb3368c88df772efb06043f..280f0dd2db1fc3939fe9925ce00a2c50d0e14196
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -71,11 +71,10 @@ int vfio_container_query_dirty_bitmap(const 
VFIOContainerBase *bcontainer,
errp);
 }
 
-void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace 
*space,
+void vfio_container_init(VFIOContainerBase *bcontainer,
  const VFIOIOMMUClass *ops)
 {
 bcontainer->ops = ops;
-bcontainer->space = space;
 bcontainer->error = NULL;
 bcontainer->dirty_pages_supported = false;
 bcontainer->dma_max_mappings = 0;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
0237c216987ff64a6d11bef8688bb000d93a7f09..dc85a79cb9e62b72312f79da994c53608b6cef48
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -394,7 +394,7 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int 
iommu_type, Error **errp)
 }
 
 static bool vfio_set_iommu(VFIOContainer *container, int group_fd,
-   VFIOAddressSpace *space, Error **errp)
+   Error **errp)
 {
 int iommu_type;
 const VFIOIOMMUClass *vioc;
@@ -432,7 +432,7 @@ static bool vfio_set_iommu(VFIOContainer *container, int 
group_fd,
 return false;
 }
 
-vfio_container_init(>bcontainer, space, vioc);
+vfio_container_init(>bcontainer, vioc);
 return true;
 }
 
@@ -614,7 +614,7 @@ static bool vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 container->fd = fd;
 bcontainer = >bcontainer;
 
-if (!vfio_set_iommu(container, group->fd, space, errp)) {
+if (!vfio_set_iommu(container, group->fd, errp)) {
 goto free_container_exit;
 }
 
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
9f8f33e383a38827ceca0f73cb77f5ca6b123198..e5d9334142418514215528b9523f12c031792c7f
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -357,7 +357,7 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
 container->ioas_id = ioas_id;
 
 bcontainer = >bcontainer;
-vfio_container_init(bcontainer, space, iommufd_vioc);
+vfio_container_init(bcontainer, iommufd_vioc);
 vfio_address_space_insert(space, bcontainer);
 
 if (!iommufd_cdev_attach_container(vbasedev, container, errp)) {
-- 
2.45.2




[PULL 41/42] vfio/container: Introduce vfio_iommu_legacy_instance_init()

2024-06-24 Thread Cédric Le Goater
Just as we did for the VFIOContainerBase object, introduce an
instance_init() handler for the legacy VFIOContainer object and do the
specific initialization there.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/container.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
3f2032d5c496de078c277ebacc49d7db89f4cc65..45123acbdd6a681f4ce7cae7aa2509100ea225ab
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -639,7 +639,6 @@ static bool vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 
 vfio_kvm_device_add_group(group);
 
-QLIST_INIT(>group_list);
 vfio_address_space_insert(space, bcontainer);
 
 group->container = container;
@@ -1183,6 +1182,13 @@ hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod, 
Error **errp)
 return l;
 }
 
+static void vfio_iommu_legacy_instance_init(Object *obj)
+{
+VFIOContainer *container = VFIO_IOMMU_LEGACY(obj);
+
+QLIST_INIT(>group_list);
+}
+
 static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data)
 {
 HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
@@ -1196,6 +1202,7 @@ static const TypeInfo types[] = {
 {
 .name = TYPE_VFIO_IOMMU_LEGACY,
 .parent = TYPE_VFIO_IOMMU,
+.instance_init = vfio_iommu_legacy_instance_init,
 .instance_size = sizeof(VFIOContainer),
 .class_init = vfio_iommu_legacy_class_init,
 }, {
-- 
2.45.2




[PULL 29/42] vfio/common: Extract vIOMMU code from vfio_sync_dirty_bitmap()

2024-06-24 Thread Cédric Le Goater
From: Avihai Horon 

Extract vIOMMU code from vfio_sync_dirty_bitmap() to a new function and
restructure the code.

This is done in preparation for optimizing vIOMMU device dirty page
tracking. No functional changes intended.

Signed-off-by: Avihai Horon 
Signed-off-by: Joao Martins 
[ clg: - Rebased on upstream
   - Fixed typo in commit log ]
Reviewed-by: Zhenzhong Duan 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/common.c | 63 +---
 1 file changed, 38 insertions(+), 25 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
fe215918bdf66ddbe3c5db803e10ce1aa9756b90..f28641bad5cf4b71fcdc0a6c9d42b24c8d786248
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1302,37 +1302,50 @@ 
vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
 );
 }
 
+static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer,
+MemoryRegionSection *section)
+{
+VFIOGuestIOMMU *giommu;
+bool found = false;
+Int128 llend;
+vfio_giommu_dirty_notifier gdn;
+int idx;
+
+QLIST_FOREACH(giommu, >giommu_list, giommu_next) {
+if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
+giommu->n.start == section->offset_within_region) {
+found = true;
+break;
+}
+}
+
+if (!found) {
+return 0;
+}
+
+gdn.giommu = giommu;
+idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr,
+ MEMTXATTRS_UNSPECIFIED);
+
+llend = int128_add(int128_make64(section->offset_within_region),
+   section->size);
+llend = int128_sub(llend, int128_one());
+
+iommu_notifier_init(, vfio_iommu_map_dirty_notify, 
IOMMU_NOTIFIER_MAP,
+section->offset_within_region, int128_get64(llend),
+idx);
+memory_region_iommu_replay(giommu->iommu_mr, );
+
+return 0;
+}
+
 static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer,
   MemoryRegionSection *section, Error **errp)
 {
 ram_addr_t ram_addr;
 
 if (memory_region_is_iommu(section->mr)) {
-VFIOGuestIOMMU *giommu;
-
-QLIST_FOREACH(giommu, >giommu_list, giommu_next) {
-if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
-giommu->n.start == section->offset_within_region) {
-Int128 llend;
-vfio_giommu_dirty_notifier gdn = { .giommu = giommu };
-int idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr,
-   MEMTXATTRS_UNSPECIFIED);
-
-llend = 
int128_add(int128_make64(section->offset_within_region),
-   section->size);
-llend = int128_sub(llend, int128_one());
-
-iommu_notifier_init(,
-vfio_iommu_map_dirty_notify,
-IOMMU_NOTIFIER_MAP,
-section->offset_within_region,
-int128_get64(llend),
-idx);
-memory_region_iommu_replay(giommu->iommu_mr, );
-break;
-}
-}
-return 0;
+return vfio_sync_iommu_dirty_bitmap(bcontainer, section);
 } else if (memory_region_has_ram_discard_manager(section->mr)) {
 int ret;
 
-- 
2.45.2




[PULL 15/42] intel_iommu: Extract out vtd_cap_init() to initialize cap/ecap

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

Extract cap/ecap initialization in vtd_cap_init() to make code
cleaner.

No functional change intended.

Reviewed-by: Eric Auger 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 93 ---
 1 file changed, 51 insertions(+), 42 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 
c4350e0ff0eaff7a2eaa8dc88582ce6e2f0368b0..c69c0d285b1fa85da22440934178243942fe42ab
 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3934,30 +3934,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion 
*iommu_mr, IOMMUNotifier *n)
 return;
 }
 
-/* Do the initialization. It will also be called when reset, so pay
- * attention when adding new initialization stuff.
- */
-static void vtd_init(IntelIOMMUState *s)
+static void vtd_cap_init(IntelIOMMUState *s)
 {
 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
 
-memset(s->csr, 0, DMAR_REG_SIZE);
-memset(s->wmask, 0, DMAR_REG_SIZE);
-memset(s->w1cmask, 0, DMAR_REG_SIZE);
-memset(s->womask, 0, DMAR_REG_SIZE);
-
-s->root = 0;
-s->root_scalable = false;
-s->dmar_enabled = false;
-s->intr_enabled = false;
-s->iq_head = 0;
-s->iq_tail = 0;
-s->iq = 0;
-s->iq_size = 0;
-s->qi_enabled = false;
-s->iq_last_desc_type = VTD_INV_DESC_NONE;
-s->iq_dw = false;
-s->next_frcd_reg = 0;
 s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
  VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
  VTD_CAP_MGAW(s->aw_bits);
@@ -3974,27 +3954,6 @@ static void vtd_init(IntelIOMMUState *s)
 }
 s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
 
-/*
- * Rsvd field masks for spte
- */
-vtd_spte_rsvd[0] = ~0ULL;
-vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits,
-  x86_iommu->dt_supported);
-vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
-vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
-vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
-
-vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits,
- 
x86_iommu->dt_supported);
-vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
- 
x86_iommu->dt_supported);
-
-if (s->scalable_mode || s->snoop_control) {
-vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
-vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
-vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
-}
-
 if (x86_iommu_ir_supported(x86_iommu)) {
 s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
 if (s->intr_eim == ON_OFF_AUTO_ON) {
@@ -4027,6 +3986,56 @@ static void vtd_init(IntelIOMMUState *s)
 if (s->pasid) {
 s->ecap |= VTD_ECAP_PASID;
 }
+}
+
+/*
+ * Do the initialization. It will also be called when reset, so pay
+ * attention when adding new initialization stuff.
+ */
+static void vtd_init(IntelIOMMUState *s)
+{
+X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+memset(s->csr, 0, DMAR_REG_SIZE);
+memset(s->wmask, 0, DMAR_REG_SIZE);
+memset(s->w1cmask, 0, DMAR_REG_SIZE);
+memset(s->womask, 0, DMAR_REG_SIZE);
+
+s->root = 0;
+s->root_scalable = false;
+s->dmar_enabled = false;
+s->intr_enabled = false;
+s->iq_head = 0;
+s->iq_tail = 0;
+s->iq = 0;
+s->iq_size = 0;
+s->qi_enabled = false;
+s->iq_last_desc_type = VTD_INV_DESC_NONE;
+s->iq_dw = false;
+s->next_frcd_reg = 0;
+
+vtd_cap_init(s);
+
+/*
+ * Rsvd field masks for spte
+ */
+vtd_spte_rsvd[0] = ~0ULL;
+vtd_spte_rsvd[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits,
+  x86_iommu->dt_supported);
+vtd_spte_rsvd[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
+vtd_spte_rsvd[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
+vtd_spte_rsvd[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
+
+vtd_spte_rsvd_large[2] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits,
+x86_iommu->dt_supported);
+vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits,
+x86_iommu->dt_supported);
+
+if (s->scalable_mode || s->snoop_control) {
+vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP;
+vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP;
+vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP;
+}
 
 vtd_reset_caches(s);
 
-- 
2.45.2




[PULL 25/42] memory: Remove IOMMU MR iommu_set_iova_range API

2024-06-24 Thread Cédric Le Goater
From: Eric Auger 

Since the host IOVA ranges are now passed through the
PCIIOMMUOps set_host_resv_regions and we have removed
the only implementation of iommu_set_iova_range() in
the virtio-iommu and the only call site in vfio/common,
let's retire the IOMMU MR API and its memory wrapper.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 include/exec/memory.h | 32 
 system/memory.c   | 13 -
 2 files changed, 45 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 
2d7c278b9f43dd9ea6a12ab1d3948dfaa49be1e2..0903513d132840051bc5a02da99216c025669ba4
 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -530,26 +530,6 @@ struct IOMMUMemoryRegionClass {
  int (*iommu_set_page_size_mask)(IOMMUMemoryRegion *iommu,
  uint64_t page_size_mask,
  Error **errp);
-/**
- * @iommu_set_iova_ranges:
- *
- * Propagate information about the usable IOVA ranges for a given IOMMU
- * memory region. Used for example to propagate host physical device
- * reserved memory region constraints to the virtual IOMMU.
- *
- * Optional method: if this method is not provided, then the default IOVA
- * aperture is used.
- *
- * @iommu: the IOMMUMemoryRegion
- *
- * @iova_ranges: list of ordered IOVA ranges (at least one range)
- *
- * Returns 0 on success, or a negative error. In case of failure, the error
- * object must be created.
- */
- int (*iommu_set_iova_ranges)(IOMMUMemoryRegion *iommu,
-  GList *iova_ranges,
-  Error **errp);
 };
 
 typedef struct RamDiscardListener RamDiscardListener;
@@ -1951,18 +1931,6 @@ int 
memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
uint64_t page_size_mask,
Error **errp);
 
-/**
- * memory_region_iommu_set_iova_ranges - Set the usable IOVA ranges
- * for a given IOMMU MR region
- *
- * @iommu: IOMMU memory region
- * @iova_ranges: list of ordered IOVA ranges (at least one range)
- * @errp: pointer to Error*, to store an error if it happens.
- */
-int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu,
-GList *iova_ranges,
-Error **errp);
-
 /**
  * memory_region_name: get a memory region's name
  *
diff --git a/system/memory.c b/system/memory.c
index 
47c600df635b133e3791fd6f02397e62510982e7..2d6952136066da696aca911bba530ddc472e5d70
 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -1914,19 +1914,6 @@ int 
memory_region_iommu_set_page_size_mask(IOMMUMemoryRegion *iommu_mr,
 return ret;
 }
 
-int memory_region_iommu_set_iova_ranges(IOMMUMemoryRegion *iommu_mr,
-GList *iova_ranges,
-Error **errp)
-{
-IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
-int ret = 0;
-
-if (imrc->iommu_set_iova_ranges) {
-ret = imrc->iommu_set_iova_ranges(iommu_mr, iova_ranges, errp);
-}
-return ret;
-}
-
 int memory_region_register_iommu_notifier(MemoryRegion *mr,
   IOMMUNotifier *n, Error **errp)
 {
-- 
2.45.2




[PULL 07/42] backends/iommufd: Introduce helper function iommufd_backend_get_device_info()

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

Introduce a helper function iommufd_backend_get_device_info() to get
host IOMMU related information through iommufd uAPI.

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/sysemu/iommufd.h |  3 +++
 backends/iommufd.c   | 22 ++
 2 files changed, 25 insertions(+)

diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 
f6e6d6e1f9f4e9b1c581100912e4864b08c9ed7d..9edfec604595c7ed0e4032472bb73c9b4d2ea559
 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -47,6 +47,9 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t 
ioas_id, hwaddr iova,
 ram_addr_t size, void *vaddr, bool readonly);
 int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
   hwaddr iova, ram_addr_t size);
+bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
+ uint32_t *type, void *data, uint32_t len,
+ Error **errp);
 
 #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
 #endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 
012f18d8d802aea40798fe3368bbdca52634f95d..c7e969d6f76dff8780efedde56b2015b3b8d616e
 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -208,6 +208,28 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t 
ioas_id,
 return ret;
 }
 
+bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
+ uint32_t *type, void *data, uint32_t len,
+ Error **errp)
+{
+struct iommu_hw_info info = {
+.size = sizeof(info),
+.dev_id = devid,
+.data_len = len,
+.data_uptr = (uintptr_t)data,
+};
+
+if (ioctl(be->fd, IOMMU_GET_HW_INFO, )) {
+error_setg_errno(errp, errno, "Failed to get hardware info");
+return false;
+}
+
+g_assert(type);
+*type = info.out_data_type;
+
+return true;
+}
+
 static const TypeInfo types[] = {
 {
 .name = TYPE_IOMMUFD_BACKEND,
-- 
2.45.2




[PULL 13/42] hw/pci: Introduce pci_device_[set|unset]_iommu_device()

2024-06-24 Thread Cédric Le Goater
From: Yi Liu 

pci_device_[set|unset]_iommu_device() call pci_device_get_iommu_bus_devfn()
to get iommu_bus->iommu_ops and call [set|unset]_iommu_device callback to
set/unset HostIOMMUDevice for a given PCI device.

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Nicolin Chen 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/pci/pci.h | 38 +-
 hw/pci/pci.c | 27 +++
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 
eaa3fc99d8844d2c92d62194a86cd4f6be2f141f..eb26cac810981e00aa47bf9d9c0a7bf3de7ef456
 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -3,6 +3,7 @@
 
 #include "exec/memory.h"
 #include "sysemu/dma.h"
+#include "sysemu/host_iommu_device.h"
 
 /* PCI includes legacy ISA access.  */
 #include "hw/isa/isa.h"
@@ -383,10 +384,45 @@ typedef struct PCIIOMMUOps {
  *
  * @devfn: device and function number
  */
-   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+/**
+ * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU
+ *
+ * Optional callback, if not implemented in vIOMMU, then vIOMMU can't
+ * retrieve host information from the associated HostIOMMUDevice.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ *
+ * @dev: the #HostIOMMUDevice to attach.
+ *
+ * @errp: pass an Error out only when return false
+ *
+ * Returns: true if HostIOMMUDevice is attached or else false with errp 
set.
+ */
+bool (*set_iommu_device)(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *dev, Error **errp);
+/**
+ * @unset_iommu_device: detach a HostIOMMUDevice from a vIOMMU
+ *
+ * Optional callback.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ */
+void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn);
 } PCIIOMMUOps;
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
+ Error **errp);
+void pci_device_unset_iommu_device(PCIDevice *dev);
 
 /**
  * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 
02a4bb2af63feabd954ce23e9b02dd931c81ae9a..c8a8aab30646c5e37816f49f6ef9d1bdf8be241f
 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2742,6 +2742,33 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
 return _space_memory;
 }
 
+bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
+ Error **errp)
+{
+PCIBus *iommu_bus;
+
+/* set_iommu_device requires device's direct BDF instead of aliased BDF */
+pci_device_get_iommu_bus_devfn(dev, _bus, NULL, NULL);
+if (iommu_bus && iommu_bus->iommu_ops->set_iommu_device) {
+return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev),
+  iommu_bus->iommu_opaque,
+  dev->devfn, hiod, errp);
+}
+return true;
+}
+
+void pci_device_unset_iommu_device(PCIDevice *dev)
+{
+PCIBus *iommu_bus;
+
+pci_device_get_iommu_bus_devfn(dev, _bus, NULL, NULL);
+if (iommu_bus && iommu_bus->iommu_ops->unset_iommu_device) {
+return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev),
+
iommu_bus->iommu_opaque,
+dev->devfn);
+}
+}
+
 void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
 {
 /*
-- 
2.45.2




[PULL 08/42] vfio/iommufd: Implement HostIOMMUDeviceClass::realize() handler

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

It calls iommufd_backend_get_device_info() to get host IOMMU
related information and translate it into HostIOMMUDeviceCaps
for query with .get_cap().

For aw_bits, use the same way as legacy backend by calling
vfio_device_get_aw_bits() which is common for different vendor
IOMMU.

Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 hw/vfio/iommufd.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
e4a507d55c4df972fb4c43d31571022910bae493..1674c61227b69f5de2a32dbb8013f854c199d294
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -619,6 +619,35 @@ static void vfio_iommu_iommufd_class_init(ObjectClass 
*klass, void *data)
 vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
 };
 
+static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
+  Error **errp)
+{
+VFIODevice *vdev = opaque;
+HostIOMMUDeviceCaps *caps = >caps;
+enum iommu_hw_info_type type;
+union {
+struct iommu_hw_info_vtd vtd;
+} data;
+
+if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
+ , , sizeof(data), errp)) {
+return false;
+}
+
+hiod->name = g_strdup(vdev->name);
+caps->type = type;
+caps->aw_bits = vfio_device_get_aw_bits(vdev);
+
+return true;
+}
+
+static void hiod_iommufd_vfio_class_init(ObjectClass *oc, void *data)
+{
+HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+hiodc->realize = hiod_iommufd_vfio_realize;
+};
+
 static const TypeInfo types[] = {
 {
 .name = TYPE_VFIO_IOMMU_IOMMUFD,
@@ -627,6 +656,7 @@ static const TypeInfo types[] = {
 }, {
 .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO,
 .parent = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
+.class_init = hiod_iommufd_vfio_class_init,
 }
 };
 
-- 
2.45.2




[PULL 42/42] vfio/container: Move vfio_container_destroy() to an instance_finalize() handler

2024-06-24 Thread Cédric Le Goater
vfio_container_destroy() clears the resources allocated
VFIOContainerBase object. Now that VFIOContainerBase is a QOM object,
add an instance_finalize() handler to do the cleanup. It will be
called through object_unref().

Suggested-by: Zhenzhong Duan 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-container-base.h | 3 ---
 hw/vfio/container-base.c  | 4 +++-
 hw/vfio/container.c   | 2 --
 hw/vfio/iommufd.c | 1 -
 4 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
6242a62771caa8cf19440a53ad6f4db862ca12d7..419e45ee7a5ac960dae4a993127fc9ee66d48db2
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -86,9 +86,6 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase 
*bcontainer,
 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
 
-void vfio_container_destroy(VFIOContainerBase *bcontainer);
-
-
 #define TYPE_VFIO_IOMMU "vfio-iommu"
 #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
 #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
970ae2356a92f87df44e1dd58ff8c67045a24ef1..50b1664f89a8192cf4021498e59f2a92cd2f6e89
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -83,8 +83,9 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase 
*bcontainer,
errp);
 }
 
-void vfio_container_destroy(VFIOContainerBase *bcontainer)
+static void vfio_container_instance_finalize(Object *obj)
 {
+VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
 VFIOGuestIOMMU *giommu, *tmp;
 
 QLIST_REMOVE(bcontainer, next);
@@ -116,6 +117,7 @@ static const TypeInfo types[] = {
 .name = TYPE_VFIO_IOMMU,
 .parent = TYPE_OBJECT,
 .instance_init = vfio_container_instance_init,
+.instance_finalize = vfio_container_instance_finalize,
 .instance_size = sizeof(VFIOContainerBase),
 .class_size = sizeof(VFIOIOMMUClass),
 .abstract = true,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
45123acbdd6a681f4ce7cae7aa2509100ea225ab..2e7ecdf10edc4d84963a45ae9507096965da64fc
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -712,8 +712,6 @@ static void vfio_disconnect_container(VFIOGroup *group)
 if (QLIST_EMPTY(>group_list)) {
 VFIOAddressSpace *space = bcontainer->space;
 
-vfio_container_destroy(bcontainer);
-
 trace_vfio_disconnect_container(container->fd);
 vfio_cpr_unregister_container(bcontainer);
 close(container->fd);
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
09b71a6617807c621275c74b924cfd39eb643961..c2f158e60386502eef267769ac9bce1effb67033
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -237,7 +237,6 @@ static void 
iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
 return;
 }
 memory_listener_unregister(>listener);
-vfio_container_destroy(bcontainer);
 iommufd_backend_free_id(container->be, container->ioas_id);
 object_unref(container);
 }
-- 
2.45.2




[PULL 36/42] vfio/container: Change VFIOContainerBase to use QOM

2024-06-24 Thread Cédric Le Goater
VFIOContainerBase was made a QOM interface because we believed that a
QOM object would expose all the IOMMU backends to the QEMU machine and
human interface. This only applies to user creatable devices or objects.

Change the VFIOContainerBase nature from interface to object and make
the necessary adjustments in the VFIO_IOMMU hierarchy.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-common.h |  4 
 include/hw/vfio/vfio-container-base.h | 12 +++-
 hw/vfio/container-base.c  |  4 +++-
 hw/vfio/container.c   |  1 +
 hw/vfio/iommufd.c |  1 +
 hw/vfio/spapr.c   |  3 +++
 6 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
825d80130bd435fe50830c8ae5b7905d18104dd6..e8ddf92bb18547f0d3b811b3d757cbae7fec8b8d
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -84,6 +84,8 @@ typedef struct VFIOContainer {
 QLIST_HEAD(, VFIOGroup) group_list;
 } VFIOContainer;
 
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY);
+
 typedef struct VFIOHostDMAWindow {
 hwaddr min_iova;
 hwaddr max_iova;
@@ -99,6 +101,8 @@ typedef struct VFIOIOMMUFDContainer {
 uint32_t ioas_id;
 } VFIOIOMMUFDContainer;
 
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD);
+
 typedef struct VFIODeviceOps VFIODeviceOps;
 
 typedef struct VFIODevice {
diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
d505f63607ec40e6aa44aeb3e20848ac780562a1..b079b76f68975c5701a289ce9012e912a8e44fc6
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -34,6 +34,7 @@ typedef struct VFIOAddressSpace {
  * This is the base object for vfio container backends
  */
 typedef struct VFIOContainerBase {
+Object parent;
 const VFIOIOMMUClass *ops;
 VFIOAddressSpace *space;
 MemoryListener listener;
@@ -96,17 +97,10 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer);
 #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
 #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
 
-/*
- * VFIOContainerBase is not an abstract QOM object because it felt
- * unnecessary to expose all the IOMMU backends to the QEMU machine
- * and human interface. However, we can still abstract the IOMMU
- * backend handlers using a QOM interface class. This provides more
- * flexibility when referencing the various implementations.
- */
-DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU)
+OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
 
 struct VFIOIOMMUClass {
-InterfaceClass parent_class;
+ObjectClass parent_class;
 
 /* Properties */
 const char *hiod_typename;
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
280f0dd2db1fc3939fe9925ce00a2c50d0e14196..98c15e174dd78df5146ee83c05c98f3ea9c1e52c
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -102,8 +102,10 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer)
 static const TypeInfo types[] = {
 {
 .name = TYPE_VFIO_IOMMU,
-.parent = TYPE_INTERFACE,
+.parent = TYPE_OBJECT,
+.instance_size = sizeof(VFIOContainerBase),
 .class_size = sizeof(VFIOIOMMUClass),
+.abstract = true,
 },
 };
 
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
31bdc46a96d1626b237227a25007957e1d472757..3ae52530a9b500bd53ec9f9e66c73253d97c9aba
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1196,6 +1196,7 @@ static const TypeInfo types[] = {
 {
 .name = TYPE_VFIO_IOMMU_LEGACY,
 .parent = TYPE_VFIO_IOMMU,
+.instance_size = sizeof(VFIOContainer),
 .class_init = vfio_iommu_legacy_class_init,
 }, {
 .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
e5d9334142418514215528b9523f12c031792c7f..3e9d642034c2d2234ea701952c94a78ab32e9147
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -672,6 +672,7 @@ static const TypeInfo types[] = {
 {
 .name = TYPE_VFIO_IOMMU_IOMMUFD,
 .parent = TYPE_VFIO_IOMMU,
+.instance_size = sizeof(VFIOIOMMUFDContainer),
 .class_init = vfio_iommu_iommufd_class_init,
 }, {
 .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO,
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 
47b040f1bcca7dd0b5cf052d941b43541e98a3c5..018bd2048194a6a2db83ed740025a7060181698f
 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -30,6 +30,8 @@ typedef struct VFIOSpaprContainer {
 QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
 } VFIOSpaprContainer;
 
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR);
+
 static bool vfio_prereg_listener_skipped_section(MemoryRegionSection 

[PULL 24/42] hw/vfio: Remove memory_region_iommu_set_iova_ranges() call

2024-06-24 Thread Cédric Le Goater
From: Eric Auger 

As we have just removed the only implementation of
iommu_set_iova_ranges IOMMU MR callback in the virtio-iommu,
let's remove the call to the memory wrapper. Usable IOVA ranges
are now conveyed through the PCIIOMMUOps in VFIO-PCI.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 hw/vfio/common.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
f20a7b5bba6b44ea4b181eab12a7ddd5175e8366..9e4c0cc95ff90209d3e8184035af0806a2bf890b
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -630,16 +630,6 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
 goto fail;
 }
 
-if (bcontainer->iova_ranges) {
-ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
-  bcontainer->iova_ranges,
-  );
-if (ret) {
-g_free(giommu);
-goto fail;
-}
-}
-
 ret = memory_region_register_iommu_notifier(section->mr, >n,
 );
 if (ret) {
-- 
2.45.2




[PULL 35/42] vfio/container: Discover IOMMU type before creating the container

2024-06-24 Thread Cédric Le Goater
Since the QEMU struct type representing the VFIO container is deduced
from the IOMMU type exposed by the host, this type should be well
defined *before* creating the container struct. This will be necessary
to instantiate a QOM object of the correct type in future changes.

Rework vfio_set_iommu() to extract the part doing the container
initialization and move it under vfio_create_container().

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 hw/vfio/container.c | 47 ++---
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
a8691942791006f44f7a3c34b32c67ca51766182..31bdc46a96d1626b237227a25007957e1d472757
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -389,54 +389,56 @@ static const char *vfio_get_iommu_class_name(int 
iommu_type)
 };
 }
 
-static bool vfio_set_iommu(VFIOContainer *container, int group_fd,
-   Error **errp)
+static bool vfio_set_iommu(int container_fd, int group_fd,
+   int *iommu_type, Error **errp)
 {
-int iommu_type;
-const VFIOIOMMUClass *vioc;
-const char *vioc_name;
-
-iommu_type = vfio_get_iommu_type(container->fd, errp);
-if (iommu_type < 0) {
-return false;
-}
-
-if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, >fd)) {
+if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, _fd)) {
 error_setg_errno(errp, errno, "Failed to set group container");
 return false;
 }
 
-while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
-if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) {
+if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
 /*
  * On sPAPR, despite the IOMMU subdriver always advertises v1 and
  * v2, the running platform may not support v2 and there is no
  * way to guess it until an IOMMU group gets added to the 
container.
  * So in case it fails with v2, try v1 as a fallback.
  */
-iommu_type = VFIO_SPAPR_TCE_IOMMU;
+*iommu_type = VFIO_SPAPR_TCE_IOMMU;
 continue;
 }
 error_setg_errno(errp, errno, "Failed to set iommu for container");
 return false;
 }
 
-container->iommu_type = iommu_type;
-
-vioc_name = vfio_get_iommu_class_name(iommu_type);
-vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name));
-
-vfio_container_init(>bcontainer, vioc);
 return true;
 }
 
 static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
 Error **errp)
 {
+int iommu_type;
+const VFIOIOMMUClass *vioc;
+const char *vioc_name;
 VFIOContainer *container;
 
+iommu_type = vfio_get_iommu_type(fd, errp);
+if (iommu_type < 0) {
+return NULL;
+}
+
+if (!vfio_set_iommu(fd, group->fd, _type, errp)) {
+return NULL;
+}
+
+vioc_name = vfio_get_iommu_class_name(iommu_type);
+vioc = VFIO_IOMMU_CLASS(object_class_by_name(vioc_name));
+
 container = g_malloc0(sizeof(*container));
 container->fd = fd;
+container->iommu_type = iommu_type;
+vfio_container_init(>bcontainer, vioc);
 return container;
 }
 
@@ -618,9 +620,6 @@ static bool vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 if (!container) {
 goto close_fd_exit;
 }
-if (!vfio_set_iommu(container, group->fd, errp)) {
-goto free_container_exit;
-}
 bcontainer = >bcontainer;
 
 if (!vfio_cpr_register_container(bcontainer, errp)) {
-- 
2.45.2




[PULL 17/42] intel_iommu: Check compatibility with host IOMMU capabilities

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

If check fails, host device (either VFIO or VDPA device) is not
compatible with current vIOMMU config and should not be passed to
guest.

Only aw_bits is checked for now, we don't care about other caps
before scalable modern mode is introduced.

Signed-off-by: Yi Liu 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 
019d1c9c800cde1d84f50e29eac02bea0e3329f3..37c21a0aec655770f5b0190c9e4edcf63f8057fa
 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3837,6 +3837,30 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, 
PCIBus *bus,
 return vtd_dev_as;
 }
 
+static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+   Error **errp)
+{
+HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
+int ret;
+
+if (!hiodc->get_cap) {
+error_setg(errp, ".get_cap() not implemented");
+return false;
+}
+
+/* Common checks */
+ret = hiodc->get_cap(hiod, HOST_IOMMU_DEVICE_CAP_AW_BITS, errp);
+if (ret < 0) {
+return false;
+}
+if (s->aw_bits > ret) {
+error_setg(errp, "aw-bits %d > host aw-bits %d", s->aw_bits, ret);
+return false;
+}
+
+return true;
+}
+
 static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
  HostIOMMUDevice *hiod, Error **errp)
 {
@@ -3857,6 +3881,11 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void 
*opaque, int devfn,
 return false;
 }
 
+if (!vtd_check_hiod(s, hiod, errp)) {
+vtd_iommu_unlock(s);
+return false;
+}
+
 new_key = g_malloc(sizeof(*new_key));
 new_key->bus = bus;
 new_key->devfn = devfn;
-- 
2.45.2




[PULL 06/42] vfio/container: Implement HostIOMMUDeviceClass::realize() handler

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

The realize function populates the capabilities. For now only the
aw_bits caps is computed for legacy backend.

Introduce a helper function vfio_device_get_aw_bits() which calls
range_get_last_bit() to get host aw_bits and package it in
HostIOMMUDeviceCaps for query with .get_cap(). This helper will
also be used by iommufd backend.

Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/container.c   | 19 +++
 hw/vfio/helpers.c | 17 +
 3 files changed, 37 insertions(+)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
56d171721164991b408073488330bf1d79104970..105b8b7e804d3de43868d447e21eb9bedc50808f
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -289,4 +289,5 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error 
**errp);
 void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
 void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
   DeviceState *dev, bool ram_discard);
+int vfio_device_get_aw_bits(VFIODevice *vdev);
 #endif /* HW_VFIO_VFIO_COMMON_H */
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
c4fca2dfcab32781fb301181a1ef67238015a76f..2f62c13214412618b412240b61efcbe1b1c79ed5
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1136,6 +1136,24 @@ static void vfio_iommu_legacy_class_init(ObjectClass 
*klass, void *data)
 vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
 };
 
+static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
+ Error **errp)
+{
+VFIODevice *vdev = opaque;
+
+hiod->name = g_strdup(vdev->name);
+hiod->caps.aw_bits = vfio_device_get_aw_bits(vdev);
+
+return true;
+}
+
+static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data)
+{
+HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+hioc->realize = hiod_legacy_vfio_realize;
+};
+
 static const TypeInfo types[] = {
 {
 .name = TYPE_VFIO_IOMMU_LEGACY,
@@ -1144,6 +1162,7 @@ static const TypeInfo types[] = {
 }, {
 .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
 .parent = TYPE_HOST_IOMMU_DEVICE,
+.class_init = hiod_legacy_vfio_class_init,
 }
 };
 
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 
27ea26aa48f67e6518f871ac651ab8d2703cc611..b14edd46edc9069bb148359a1b419253ff4e5ef0
 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -658,3 +658,20 @@ void vfio_device_init(VFIODevice *vbasedev, int type, 
VFIODeviceOps *ops,
 
 vbasedev->ram_block_discard_allowed = ram_discard;
 }
+
+int vfio_device_get_aw_bits(VFIODevice *vdev)
+{
+/*
+ * iova_ranges is a sorted list. For old kernels that support
+ * VFIO but not support query of iova ranges, iova_ranges is NULL,
+ * in this case HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX(64) is returned.
+ */
+GList *l = g_list_last(vdev->bcontainer->iova_ranges);
+
+if (l) {
+Range *range = l->data;
+return range_get_last_bit(range) + 1;
+}
+
+return HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX;
+}
-- 
2.45.2




[PULL 39/42] vfio/container: Remove VFIOContainerBase::ops

2024-06-24 Thread Cédric Le Goater
Instead, use VFIO_IOMMU_GET_CLASS() to get the class pointer.

Reviewed-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Tested-by: Eric Auger 
Signed-off-by: Cédric Le Goater 
---
 include/hw/vfio/vfio-container-base.h |  1 -
 hw/vfio/common.c  |  2 +-
 hw/vfio/container-base.c  | 37 +--
 hw/vfio/container.c   | 15 ++-
 hw/vfio/iommufd.c |  4 +--
 hw/vfio/pci.c |  4 +--
 6 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
b079b76f68975c5701a289ce9012e912a8e44fc6..6b57cd8e7f5d7d2817f6e3b96ce4566d2630bb12
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -35,7 +35,6 @@ typedef struct VFIOAddressSpace {
  */
 typedef struct VFIOContainerBase {
 Object parent;
-const VFIOIOMMUClass *ops;
 VFIOAddressSpace *space;
 MemoryListener listener;
 Error *error;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
1686a0bed23bd95467bfb00a0c39a4d966e49cae..7cdb969fd396ae3815cb175ad631d93d7cca7006
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1573,5 +1573,5 @@ void vfio_detach_device(VFIODevice *vbasedev)
 return;
 }
 object_unref(vbasedev->hiod);
-vbasedev->bcontainer->ops->detach_device(vbasedev);
+VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev);
 }
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 
3858f5ab1d68e897f9013161d7c5c20c0553029d..24669d4d7472f49ac3adf2618a32bf7d82c5c344
 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -19,62 +19,73 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
void *vaddr, bool readonly)
 {
-g_assert(bcontainer->ops->dma_map);
-return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly);
+VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+g_assert(vioc->dma_map);
+return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
 }
 
 int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
  hwaddr iova, ram_addr_t size,
  IOMMUTLBEntry *iotlb)
 {
-g_assert(bcontainer->ops->dma_unmap);
-return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb);
+VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+g_assert(vioc->dma_unmap);
+return vioc->dma_unmap(bcontainer, iova, size, iotlb);
 }
 
 bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section,
Error **errp)
 {
-if (!bcontainer->ops->add_window) {
+VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+if (!vioc->add_window) {
 return true;
 }
 
-return bcontainer->ops->add_window(bcontainer, section, errp);
+return vioc->add_window(bcontainer, section, errp);
 }
 
 void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
 {
-if (!bcontainer->ops->del_window) {
+VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+if (!vioc->del_window) {
 return;
 }
 
-return bcontainer->ops->del_window(bcontainer, section);
+return vioc->del_window(bcontainer, section);
 }
 
 int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
bool start, Error **errp)
 {
+VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
 if (!bcontainer->dirty_pages_supported) {
 return 0;
 }
 
-g_assert(bcontainer->ops->set_dirty_page_tracking);
-return bcontainer->ops->set_dirty_page_tracking(bcontainer, start, errp);
+g_assert(vioc->set_dirty_page_tracking);
+return vioc->set_dirty_page_tracking(bcontainer, start, errp);
 }
 
 int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
 {
-g_assert(bcontainer->ops->query_dirty_bitmap);
-return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size,
+VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+g_assert(vioc->query_dirty_bitmap);
+return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
errp);
 }
 
 void vfio_container_init(VFIOContainerBase *bcontainer,
  const VFIOIOMMUClass *ops)
 {
-bcontainer->ops = ops;
 }
 
 void vfio_container_destroy(VFIOContainerBase *bcontainer)
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
ff3a6831da83c0fe11060cd57918c4d87b10197c..a2f5fbad00cd228e27a47df5cd683dbb34296113
 

[PULL 16/42] intel_iommu: Implement [set|unset]_iommu_device() callbacks

2024-06-24 Thread Cédric Le Goater
From: Yi Liu 

Implement [set|unset]_iommu_device() callbacks in Intel vIOMMU.
In set call, we take a reference of HostIOMMUDevice and store it
in hash table indexed by PCI BDF.

Note this BDF index is device's real BDF not the aliased one which
is different from the index of VTDAddressSpace. There can be multiple
assigned devices under same virtual iommu group and share same
VTDAddressSpace, but each has its own HostIOMMUDevice.

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/i386/intel_iommu.h |  2 +
 hw/i386/intel_iommu.c | 81 +++
 2 files changed, 83 insertions(+)

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 
7fa0a695c87bb8569fd6985e299fc0a1cc4b0c0c..1eb05c29fc9c703a61f06d90616694e74fb61c15
 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -292,6 +292,8 @@ struct IntelIOMMUState {
 /* list of registered notifiers */
 QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
 
+GHashTable *vtd_host_iommu_dev; /* HostIOMMUDevice */
+
 /* interrupt remapping */
 bool intr_enabled;  /* Whether guest enabled IR */
 dma_addr_t intr_root;   /* Interrupt remapping table pointer */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 
c69c0d285b1fa85da22440934178243942fe42ab..019d1c9c800cde1d84f50e29eac02bea0e3329f3
 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -61,6 +61,12 @@ struct vtd_as_key {
 uint32_t pasid;
 };
 
+/* bus/devfn is PCI device's real BDF not the aliased one */
+struct vtd_hiod_key {
+PCIBus *bus;
+uint8_t devfn;
+};
+
 struct vtd_iotlb_key {
 uint64_t gfn;
 uint32_t pasid;
@@ -250,6 +256,25 @@ static guint vtd_as_hash(gconstpointer v)
 return (guint)(value << 8 | key->devfn);
 }
 
+/* Same implementation as vtd_as_hash() */
+static guint vtd_hiod_hash(gconstpointer v)
+{
+return vtd_as_hash(v);
+}
+
+static gboolean vtd_hiod_equal(gconstpointer v1, gconstpointer v2)
+{
+const struct vtd_hiod_key *key1 = v1;
+const struct vtd_hiod_key *key2 = v2;
+
+return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
+static void vtd_hiod_destroy(gpointer v)
+{
+object_unref(v);
+}
+
 static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
   gpointer user_data)
 {
@@ -3812,6 +3837,58 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, 
PCIBus *bus,
 return vtd_dev_as;
 }
 
+static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+IntelIOMMUState *s = opaque;
+struct vtd_as_key key = {
+.bus = bus,
+.devfn = devfn,
+};
+struct vtd_as_key *new_key;
+
+assert(hiod);
+
+vtd_iommu_lock(s);
+
+if (g_hash_table_lookup(s->vtd_host_iommu_dev, )) {
+error_setg(errp, "Host IOMMU device already exist");
+vtd_iommu_unlock(s);
+return false;
+}
+
+new_key = g_malloc(sizeof(*new_key));
+new_key->bus = bus;
+new_key->devfn = devfn;
+
+object_ref(hiod);
+g_hash_table_insert(s->vtd_host_iommu_dev, new_key, hiod);
+
+vtd_iommu_unlock(s);
+
+return true;
+}
+
+static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+IntelIOMMUState *s = opaque;
+struct vtd_as_key key = {
+.bus = bus,
+.devfn = devfn,
+};
+
+vtd_iommu_lock(s);
+
+if (!g_hash_table_lookup(s->vtd_host_iommu_dev, )) {
+vtd_iommu_unlock(s);
+return;
+}
+
+g_hash_table_remove(s->vtd_host_iommu_dev, );
+
+vtd_iommu_unlock(s);
+}
+
 /* Unmap the whole range in the notifier's scope. */
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
 {
@@ -4116,6 +4193,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void 
*opaque, int devfn)
 
 static PCIIOMMUOps vtd_iommu_ops = {
 .get_address_space = vtd_host_dma_iommu,
+.set_iommu_device = vtd_dev_set_iommu_device,
+.unset_iommu_device = vtd_dev_unset_iommu_device,
 };
 
 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
@@ -4235,6 +4314,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
  g_free, g_free);
 s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
   g_free, g_free);
+s->vtd_host_iommu_dev = g_hash_table_new_full(vtd_hiod_hash, 
vtd_hiod_equal,
+  g_free, vtd_hiod_destroy);
 vtd_init(s);
 pci_setup_iommu(bus, _iommu_ops, dev);
 /* Pseudo address space under root PCI bus. */
-- 
2.45.2




[PULL 00/42] vfio queue

2024-06-24 Thread Cédric Le Goater
The following changes since commit d89b64beea65f77c21a553cb54cb97b75c53dc21:

  Merge tag 'pull-request-2024-06-24' of https://gitlab.com/thuth/qemu into 
staging (2024-06-24 11:57:11 -0700)

are available in the Git repository at:

  https://github.com/legoater/qemu/ tags/pull-vfio-20240624

for you to fetch changes up to 96b7af4388b38bc1f66467a9c7c8ee9d3bff500f:

  vfio/container: Move vfio_container_destroy() to an instance_finalize() 
handler (2024-06-24 23:15:31 +0200)


vfio queue:

* Add a host IOMMU device abstraction
* VIRTIO-IOMMU/VFIO: Fix host iommu geometry handling
* QOMify VFIOContainer


Avihai Horon (1):
  vfio/common: Extract vIOMMU code from vfio_sync_dirty_bitmap()

Cédric Le Goater (15):
  vfio: Make vfio_devices_dma_logging_start() return bool
  vfio: Remove unused declarations from vfio-common.h
  vfio/container: Introduce vfio_address_space_insert()
  vfio/container: Simplify vfio_container_init()
  vfio/container: Modify vfio_get_iommu_type() to use a container fd
  vfio/container: Introduce vfio_get_iommu_class_name()
  vfio/container: Introduce vfio_create_container()
  vfio/container: Discover IOMMU type before creating the container
  vfio/container: Change VFIOContainerBase to use QOM
  vfio/container: Switch to QOM
  vfio/container: Introduce an instance_init() handler
  vfio/container: Remove VFIOContainerBase::ops
  vfio/container: Remove vfio_container_init()
  vfio/container: Introduce vfio_iommu_legacy_instance_init()
  vfio/container: Move vfio_container_destroy() to an instance_finalize() 
handler

Eric Auger (8):
  HostIOMMUDevice: Store the VFIO/VDPA agent
  virtio-iommu: Implement set|unset]_iommu_device() callbacks
  HostIOMMUDevice: Introduce get_iova_ranges callback
  HostIOMMUDevice: Store the aliased bus and devfn
  virtio-iommu: Compute host reserved regions
  virtio-iommu: Remove the implementation of iommu_set_iova_range
  hw/vfio: Remove memory_region_iommu_set_iova_ranges() call
  memory: Remove IOMMU MR iommu_set_iova_range API

Joao Martins (1):
  vfio/common: Move dirty tracking ranges update to helper

Yi Liu (2):
  hw/pci: Introduce pci_device_[set|unset]_iommu_device()
  intel_iommu: Implement [set|unset]_iommu_device() callbacks

Zhenzhong Duan (15):
  backends: Introduce HostIOMMUDevice abstract
  backends/host_iommu_device: Introduce HostIOMMUDeviceCaps
  vfio/container: Introduce TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO device
  backends/iommufd: Introduce TYPE_HOST_IOMMU_DEVICE_IOMMUFD[_VFIO] devices
  range: Introduce range_get_last_bit()
  vfio/container: Implement HostIOMMUDeviceClass::realize() handler
  backends/iommufd: Introduce helper function 
iommufd_backend_get_device_info()
  vfio/iommufd: Implement HostIOMMUDeviceClass::realize() handler
  vfio/container: Implement HostIOMMUDeviceClass::get_cap() handler
  backends/iommufd: Implement HostIOMMUDeviceClass::get_cap() handler
  vfio: Create host IOMMU device instance
  hw/pci: Introduce helper function pci_device_get_iommu_bus_devfn()
  vfio/pci: Pass HostIOMMUDevice to vIOMMU
  intel_iommu: Extract out vtd_cap_init() to initialize cap/ecap
  intel_iommu: Check compatibility with host IOMMU capabilities

 MAINTAINERS   |   2 +
 include/exec/memory.h |  32 
 include/hw/i386/intel_iommu.h |   2 +
 include/hw/pci/pci.h  |  38 -
 include/hw/vfio/vfio-common.h |  18 ++-
 include/hw/vfio/vfio-container-base.h |  22 +--
 include/hw/virtio/virtio-iommu.h  |   2 +
 include/qemu/range.h  |  11 ++
 include/sysemu/host_iommu_device.h| 102 
 include/sysemu/iommufd.h  |  19 +++
 backends/host_iommu_device.c  |  33 
 backends/iommufd.c|  76 +++--
 hw/i386/intel_iommu.c | 203 ++-
 hw/pci/pci.c  |  79 -
 hw/vfio/common.c  | 150 ++---
 hw/vfio/container-base.c  |  70 
 hw/vfio/container.c   | 164 +--
 hw/vfio/helpers.c |  17 ++
 hw/vfio/iommufd.c |  68 +++-
 hw/vfio/pci.c |  23 ++-
 hw/vfio/spapr.c   |   3 +
 hw/virtio/virtio-iommu.c  | 296 ++
 system/memory.c   |  13 --
 backends/meson.build  |   1 +
 24 files changed, 1066 insertions(+), 378 deletions(-)
 create mode 100644 include/sysemu/host_iommu_device.h
 create mode 100644 backends/host_iommu_device.c




[PULL 19/42] virtio-iommu: Implement set|unset]_iommu_device() callbacks

2024-06-24 Thread Cédric Le Goater
From: Eric Auger 

Implement PCIIOMMUOPs [set|unset]_iommu_device() callbacks.
In set(), the HostIOMMUDevice handle is stored in a hash
table indexed by PCI BDF. The object will allow to retrieve
information related to the physical IOMMU.

Signed-off-by: Eric Auger 
Reviewed-by: Zhenzhong Duan 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/virtio/virtio-iommu.h |  2 +
 hw/virtio/virtio-iommu.c | 82 
 2 files changed, 84 insertions(+)

diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h
index 
83a52cc446d30443b8c89170e35829047bc24866..bdb3da72d0854272b01736ccc07af0a26009d23c
 100644
--- a/include/hw/virtio/virtio-iommu.h
+++ b/include/hw/virtio/virtio-iommu.h
@@ -25,6 +25,7 @@
 #include "hw/pci/pci.h"
 #include "qom/object.h"
 #include "qapi/qapi-types-virtio.h"
+#include "sysemu/host_iommu_device.h"
 
 #define TYPE_VIRTIO_IOMMU "virtio-iommu-device"
 #define TYPE_VIRTIO_IOMMU_PCI "virtio-iommu-pci"
@@ -57,6 +58,7 @@ struct VirtIOIOMMU {
 struct virtio_iommu_config config;
 uint64_t features;
 GHashTable *as_by_busptr;
+GHashTable *host_iommu_devices;
 IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX];
 PCIBus *primary_bus;
 ReservedRegion *prop_resv_regions;
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 
1326c6ec417c33ab5d8fdf22608dcab735f463c4..16c8ec3ca460a6d70e83b28787398f94dd16cc99
 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -69,6 +69,11 @@ typedef struct VirtIOIOMMUMapping {
 uint32_t flags;
 } VirtIOIOMMUMapping;
 
+struct hiod_key {
+PCIBus *bus;
+uint8_t devfn;
+};
+
 static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev)
 {
 return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
@@ -462,8 +467,82 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, 
void *opaque,
 return >as;
 }
 
+static gboolean hiod_equal(gconstpointer v1, gconstpointer v2)
+{
+const struct hiod_key *key1 = v1;
+const struct hiod_key *key2 = v2;
+
+return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
+static guint hiod_hash(gconstpointer v)
+{
+const struct hiod_key *key = v;
+guint value = (guint)(uintptr_t)key->bus;
+
+return (guint)(value << 8 | key->devfn);
+}
+
+static void hiod_destroy(gpointer v)
+{
+object_unref(v);
+}
+
+static HostIOMMUDevice *
+get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) {
+struct hiod_key key = {
+.bus = bus,
+.devfn = devfn,
+};
+
+return g_hash_table_lookup(viommu->host_iommu_devices, );
+}
+
+static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+  HostIOMMUDevice *hiod, Error **errp)
+{
+VirtIOIOMMU *viommu = opaque;
+struct hiod_key *new_key;
+
+assert(hiod);
+
+if (get_host_iommu_device(viommu, bus, devfn)) {
+error_setg(errp, "Host IOMMU device already exists");
+return false;
+}
+
+new_key = g_malloc(sizeof(*new_key));
+new_key->bus = bus;
+new_key->devfn = devfn;
+
+object_ref(hiod);
+g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod);
+
+return true;
+}
+
+static void
+virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn)
+{
+VirtIOIOMMU *viommu = opaque;
+HostIOMMUDevice *hiod;
+struct hiod_key key = {
+.bus = bus,
+.devfn = devfn,
+};
+
+hiod = g_hash_table_lookup(viommu->host_iommu_devices, );
+if (!hiod) {
+return;
+}
+
+g_hash_table_remove(viommu->host_iommu_devices, );
+}
+
 static const PCIIOMMUOps virtio_iommu_ops = {
 .get_address_space = virtio_iommu_find_add_as,
+.set_iommu_device = virtio_iommu_set_iommu_device,
+.unset_iommu_device = virtio_iommu_unset_iommu_device,
 };
 
 static int virtio_iommu_attach(VirtIOIOMMU *s,
@@ -1357,6 +1436,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, 
Error **errp)
 
 s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free);
 
+s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal,
+  g_free, hiod_destroy);
+
 if (s->primary_bus) {
 pci_setup_iommu(s->primary_bus, _iommu_ops, s);
 } else {
-- 
2.45.2




[PULL 02/42] backends/host_iommu_device: Introduce HostIOMMUDeviceCaps

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

HostIOMMUDeviceCaps's elements map to the host IOMMU's capabilities.
Different platform IOMMU can support different elements.

Currently only two elements, type and aw_bits, type hints the host
platform IOMMU type, i.e., INTEL vtd, ARM smmu, etc; aw_bits hints
host IOMMU address width.

Introduce .get_cap() handler to check if HOST_IOMMU_DEVICE_CAP_XXX
is supported.

Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/sysemu/host_iommu_device.h | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/include/sysemu/host_iommu_device.h 
b/include/sysemu/host_iommu_device.h
index 
db47a16189a142a25887a835be9f8a8137fe00ee..a57873958b03e1fcd6c0c8991a2010dde02c566c
 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -15,6 +15,18 @@
 #include "qom/object.h"
 #include "qapi/error.h"
 
+/**
+ * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
+ *
+ * @type: host platform IOMMU type.
+ *
+ * @aw_bits: host IOMMU address width. 0xff if no limitation.
+ */
+typedef struct HostIOMMUDeviceCaps {
+uint32_t type;
+uint8_t aw_bits;
+} HostIOMMUDeviceCaps;
+
 #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
 OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
 
@@ -22,6 +34,7 @@ struct HostIOMMUDevice {
 Object parent_obj;
 
 char *name;
+HostIOMMUDeviceCaps caps;
 };
 
 /**
@@ -49,5 +62,30 @@ struct HostIOMMUDeviceClass {
  * Returns: true on success, false on failure.
  */
 bool (*realize)(HostIOMMUDevice *hiod, void *opaque, Error **errp);
+/**
+ * @get_cap: check if a host IOMMU device capability is supported.
+ *
+ * Optional callback, if not implemented, hint not supporting query
+ * of @cap.
+ *
+ * @hiod: pointer to a host IOMMU device instance.
+ *
+ * @cap: capability to check.
+ *
+ * @errp: pass an Error out when fails to query capability.
+ *
+ * Returns: <0 on failure, 0 if a @cap is unsupported, or else
+ * 1 or some positive value for some special @cap,
+ * i.e., HOST_IOMMU_DEVICE_CAP_AW_BITS.
+ */
+int (*get_cap)(HostIOMMUDevice *hiod, int cap, Error **errp);
 };
+
+/*
+ * Host IOMMU device capability list.
+ */
+#define HOST_IOMMU_DEVICE_CAP_IOMMU_TYPE0
+#define HOST_IOMMU_DEVICE_CAP_AW_BITS   1
+
+#define HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX   64
 #endif
-- 
2.45.2




[PULL 03/42] vfio/container: Introduce TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO device

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO represents a host IOMMU device under
VFIO legacy container backend.

It will have its own realize implementation.

Suggested-by: Eric Auger 
Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/vfio/vfio-common.h | 3 +++
 hw/vfio/container.c   | 5 -
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
4cb1ab8645dcdf604f3c2bb29328668fd5eb7284..75b167979ac221e59b2681b2704c03778823fbb0
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -31,6 +31,7 @@
 #endif
 #include "sysemu/sysemu.h"
 #include "hw/vfio/vfio-container-base.h"
+#include "sysemu/host_iommu_device.h"
 
 #define VFIO_MSG_PREFIX "vfio %s: "
 
@@ -171,6 +172,8 @@ typedef struct VFIOGroup {
 bool ram_block_discard_allowed;
 } VFIOGroup;
 
+#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE 
"-legacy-vfio"
+
 typedef struct VFIODMABuf {
 QemuDmaBuf *buf;
 uint32_t pos_x, pos_y, pos_updates;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
096cc972586df14a4a68074b7038a9661b1558e2..c4fca2dfcab32781fb301181a1ef67238015a76f
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1141,7 +1141,10 @@ static const TypeInfo types[] = {
 .name = TYPE_VFIO_IOMMU_LEGACY,
 .parent = TYPE_VFIO_IOMMU,
 .class_init = vfio_iommu_legacy_class_init,
-},
+}, {
+.name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
+.parent = TYPE_HOST_IOMMU_DEVICE,
+}
 };
 
 DEFINE_TYPES(types)
-- 
2.45.2




[PULL 11/42] vfio: Create host IOMMU device instance

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

Create host IOMMU device instance in vfio_attach_device() and call
.realize() to initialize it further.

Introuduce attribute VFIOIOMMUClass::hiod_typename and initialize
it based on VFIO backend type. It will facilitate HostIOMMUDevice
creation in vfio_attach_device().

Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/hw/vfio/vfio-common.h |  1 +
 include/hw/vfio/vfio-container-base.h |  3 +++
 hw/vfio/common.c  | 16 +++-
 hw/vfio/container.c   |  2 ++
 hw/vfio/iommufd.c |  2 ++
 5 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 
105b8b7e804d3de43868d447e21eb9bedc50808f..776de8064f740784f95cab0311c5f15f50d60ffe
 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -127,6 +127,7 @@ typedef struct VFIODevice {
 OnOffAuto pre_copy_dirty_page_tracking;
 bool dirty_pages_supported;
 bool dirty_tracking;
+HostIOMMUDevice *hiod;
 int devid;
 IOMMUFDBackend *iommufd;
 } VFIODevice;
diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 
2776481fc97ef5720b10a4e3b3e6deaa075ece75..442c0dfc4c1774753c239c2c8360dcd1540d44fa
 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -109,6 +109,9 @@ DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, 
TYPE_VFIO_IOMMU)
 struct VFIOIOMMUClass {
 InterfaceClass parent_class;
 
+/* Properties */
+const char *hiod_typename;
+
 /* basic feature */
 bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
 int (*dma_map)(const VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
f9619a1dfbc689b10d70a60ce21b9b018f32391f..f20a7b5bba6b44ea4b181eab12a7ddd5175e8366
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1528,6 +1528,7 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
 {
 const VFIOIOMMUClass *ops =
 VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
+HostIOMMUDevice *hiod;
 
 if (vbasedev->iommufd) {
 ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
@@ -1535,7 +1536,19 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,
 
 assert(ops);
 
-return ops->attach_device(name, vbasedev, as, errp);
+if (!ops->attach_device(name, vbasedev, as, errp)) {
+return false;
+}
+
+hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
+if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
+object_unref(hiod);
+ops->detach_device(vbasedev);
+return false;
+}
+vbasedev->hiod = hiod;
+
+return true;
 }
 
 void vfio_detach_device(VFIODevice *vbasedev)
@@ -1543,5 +1556,6 @@ void vfio_detach_device(VFIODevice *vbasedev)
 if (!vbasedev->bcontainer) {
 return;
 }
+object_unref(vbasedev->hiod);
 vbasedev->bcontainer->ops->detach_device(vbasedev);
 }
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
99beeba422ebfe49caed4fcd57afe5514dea8b39..26e6f7fb4f748162d881cb22c970428f319df3c3
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1126,6 +1126,8 @@ static void vfio_iommu_legacy_class_init(ObjectClass 
*klass, void *data)
 {
 VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
 
+vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO;
+
 vioc->setup = vfio_legacy_setup;
 vioc->dma_map = vfio_legacy_dma_map;
 vioc->dma_unmap = vfio_legacy_dma_unmap;
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 
1674c61227b69f5de2a32dbb8013f854c199d294..409ed3dcc91cde508ac74fa693798b87e82eb9dd
 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -612,6 +612,8 @@ static void vfio_iommu_iommufd_class_init(ObjectClass 
*klass, void *data)
 {
 VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
 
+vioc->hiod_typename = TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO;
+
 vioc->dma_map = iommufd_cdev_map;
 vioc->dma_unmap = iommufd_cdev_unmap;
 vioc->attach_device = iommufd_cdev_attach;
-- 
2.45.2




[PULL 09/42] vfio/container: Implement HostIOMMUDeviceClass::get_cap() handler

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 hw/vfio/container.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 
2f62c13214412618b412240b61efcbe1b1c79ed5..99beeba422ebfe49caed4fcd57afe5514dea8b39
 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1147,11 +1147,26 @@ static bool hiod_legacy_vfio_realize(HostIOMMUDevice 
*hiod, void *opaque,
 return true;
 }
 
+static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
+Error **errp)
+{
+HostIOMMUDeviceCaps *caps = >caps;
+
+switch (cap) {
+case HOST_IOMMU_DEVICE_CAP_AW_BITS:
+return caps->aw_bits;
+default:
+error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
+return -EINVAL;
+}
+}
+
 static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data)
 {
 HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
 
 hioc->realize = hiod_legacy_vfio_realize;
+hioc->get_cap = hiod_legacy_vfio_get_cap;
 };
 
 static const TypeInfo types[] = {
-- 
2.45.2




[PULL 05/42] range: Introduce range_get_last_bit()

2024-06-24 Thread Cédric Le Goater
From: Zhenzhong Duan 

This helper get the highest 1 bit position of the upper bound.

If the range is empty or upper bound is zero, -1 is returned.

Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
Reviewed-by: Eric Auger 
Reviewed-by: Michael S. Tsirkin 
---
 include/qemu/range.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/include/qemu/range.h b/include/qemu/range.h
index 
205e1da76dc5b29327f590b8293a826ced63c25d..4ce694a398311a32e9b5e3ca97aed97c90c0ae09
 100644
--- a/include/qemu/range.h
+++ b/include/qemu/range.h
@@ -20,6 +20,8 @@
 #ifndef QEMU_RANGE_H
 #define QEMU_RANGE_H
 
+#include "qemu/bitops.h"
+
 /*
  * Operations on 64 bit address ranges.
  * Notes:
@@ -217,6 +219,15 @@ static inline int ranges_overlap(uint64_t first1, uint64_t 
len1,
 return !(last2 < first1 || last1 < first2);
 }
 
+/* Get highest non-zero bit position of a range */
+static inline int range_get_last_bit(Range *range)
+{
+if (range_is_empty(range)) {
+return -1;
+}
+return 63 - clz64(range->upb);
+}
+
 /*
  * Return -1 if @a < @b, 1 @a > @b, and 0 if they touch or overlap.
  * Both @a and @b must not be empty.
-- 
2.45.2




Re: [PATCH v4 0/8] VIRTIO-IOMMU/VFIO: Fix host iommu geometry handling for hotplugged devices

2024-06-24 Thread Cédric Le Goater

On 6/14/24 11:52 AM, Eric Auger wrote:

This series is based on Zhenzhong HostIOMMUDevice:

[PATCH v7 00/17] Add a host IOMMU device abstraction to check with vIOMMU
https://lore.kernel.org/all/20240605083043.317831-1-zhenzhong.d...@intel.com/

It allows to convey host IOVA reserved regions to the virtio-iommu and
uses the HostIOMMUDevice infrastructure. This replaces the usage of
IOMMU MR ops which fail to satisfy this need for hotplugged devices.

See below for additional background.

In [1] we attempted to fix a case where a VFIO-PCI device protected
with a virtio-iommu was assigned to an x86 guest. On x86 the physical
IOMMU may have an address width (gaw) of 39 or 48 bits whereas the
virtio-iommu used to expose a 64b address space by default.
Hence the guest was trying to use the full 64b space and we hit
DMA MAP failures. To work around this issue we managed to pass
usable IOVA regions (excluding the out of range space) from VFIO
to the virtio-iommu device. This was made feasible by introducing
a new IOMMU Memory Region callback dubbed iommu_set_iova_regions().
This latter gets called when the IOMMU MR is enabled which
causes the vfio_listener_region_add() to be called.

For coldplugged devices the technique works because we make sure all
the IOMMU MR are enabled once on the machine init done: 94df5b2180
("virtio-iommu: Fix 64kB host page size VFIO device assignment")
for granule freeze. But I would be keen to get rid of this trick.

However with VFIO-PCI hotplug, this technique fails due to the
race between the call to the callback in the add memory listener
and the virtio-iommu probe request. Indeed the probe request gets
called before the attach to the domain. So in that case the usable
regions are communicated after the probe request and fail to be
conveyed to the guest.

Using an IOMMU MR Ops is unpractical because this relies on the IOMMU
MR to have been enabled and the corresponding vfio_listener_region_add()
to be executed. Instead this series proposes to replace the usage of this
API by the recently introduced PCIIOMMUOps: ba7d12eb8c  ("hw/pci: modify
pci_setup_iommu() to set PCIIOMMUOps"). That way, the callback can be
called earlier, once the usable IOVA regions have been collected by
VFIO, without the need for the IOMMU MR to be enabled.

This series also removes the spurious message:
qemu-system-aarch64: warning: virtio-iommu-memory-region-7-0: Notified about 
new host reserved regions after probe

In the short term this may also be used for passing the page size
mask, which would allow to get rid of the hacky transient IOMMU
MR enablement mentionned above.

[1] [PATCH v4 00/12] VIRTIO-IOMMU/VFIO: Don't assume 64b IOVA space
 https://lore.kernel.org/all/20231019134651.842175-1-eric.au...@redhat.com/

Extra Notes:
With that series, the reserved memory regions are communicated on time
so that the virtio-iommu probe request grabs them. However this is not
sufficient. In some cases (my case), I still see some DMA MAP failures
and the guest keeps on using IOVA ranges outside the geometry of the
physical IOMMU. This is due to the fact the VFIO-PCI device is in the
same iommu group as the pcie root port. Normally the kernel
iova_reserve_iommu_regions (dma-iommu.c) is supposed to call reserve_iova()
for each reserved IOVA, which carves them out of the allocator. When
iommu_dma_init_domain() gets called for the hotplugged vfio-pci device
the iova domain is already allocated and set and we don't call
iova_reserve_iommu_regions() again for the vfio-pci device. So its
corresponding reserved regions are not properly taken into account.

This is not trivial to fix because theoretically the 1st attached
devices could already have allocated IOVAs within the reserved regions
of the second device. Also we are somehow hijacking the reserved
memory regions to model the geometry of the physical IOMMU so not sure
any attempt to fix that upstream will be accepted. At the moment one
solution is to make sure assigned devices end up in singleton group.
Another solution is to work on a different approach where the gaw
can be passed as an option to the virtio-iommu device, similarly at
what is done with intel iommu.

This series can be found at:
https://github.com/eauger/qemu/tree/iommufd_nesting_preq_v7_resv_regions_v4

History:
v3 -> v4:
- add one patch to add aliased pci bus and devfn in the HostIOMMUDevice
- Use those for resv regions computation
- Remove VirtioHostIOMMUDevice and simply use the base object

v2 -> v3:
- moved the series from RFC to patch
- collected Zhenzhong's R-bs and took into account most of his comments
   (see replies on v2)


Eric Auger (8):
   HostIOMMUDevice: Store the VFIO/VDPA agent
   virtio-iommu: Implement set|unset]_iommu_device() callbacks
   HostIOMMUDevice: Introduce get_iova_ranges callback
   HostIOMMUDevice: Store the aliased bus and devfn
   virtio-iommu: Compute host reserved regions
   virtio-iommu: Remove the implementation of iommu_set_iova_range
   hw/vfio: 

Re: [PATCH v2 00/17] vfio: QOMify VFIOContainer

2024-06-24 Thread Cédric Le Goater

On 6/17/24 8:33 AM, Cédric Le Goater wrote:

Hello,

The series starts with simple changes (patch 1-4). Two of which were
initially sent by Joao in a series adding VFIO migration support with
vIOMMU [1].

The changes following prepare VFIOContainer for QOMification, switch
the container models to QOM when ready and add some final cleanups.

Applies on top of :

  * [v7] Add a host IOMMU device abstraction to check with vIOMMU
https://lore.kernel.org/all/20240605083043.317831-1-zhenzhong.d...@intel.com
  * [v4] VIRTIO-IOMMU/VFIO: Fix host iommu geometry
https://lore.kernel.org/all/20240614095402.904691-1-eric.au...@redhat.com

Thanks,

C.

[1] 
https://lore.kernel.org/qemu-devel/20230622214845.3980-1-joao.m.mart...@oracle.com/


Changes in v2:
  - Used OBJECT_DECLARE_SIMPLE_TYPE
  - Introduced a instance_finalize() handler

Avihai Horon (1):
   vfio/common: Extract vIOMMU code from vfio_sync_dirty_bitmap()

Cédric Le Goater (15):
   vfio: Make vfio_devices_dma_logging_start() return bool
   vfio: Remove unused declarations from vfio-common.h
   vfio/container: Introduce vfio_address_space_insert()
   vfio/container: Simplify vfio_container_init()
   vfio/container: Modify vfio_get_iommu_type() to use a container fd
   vfio/container: Introduce vfio_get_iommu_class_name()
   vfio/container: Introduce vfio_create_container()
   vfio/container: Discover IOMMU type before creating the container
   vfio/container: Change VFIOContainerBase to use QOM
   vfio/container: Switch to QOM
   vfio/container: Introduce an instance_init() handler
   vfio/container: Remove VFIOContainerBase::ops
   vfio/container: Remove vfio_container_init()
   vfio/container: Introduce vfio_iommu_legacy_instance_init()
   vfio/container: Move vfio_container_destroy() to an
 instance_finalize() handler

Joao Martins (1):
   vfio/common: Move dirty tracking ranges update to helper

  include/hw/vfio/vfio-common.h |  10 ++-
  include/hw/vfio/vfio-container-base.h |  19 +---
  hw/vfio/common.c  | 124 --
  hw/vfio/container-base.c  |  70 +--
  hw/vfio/container.c   | 106 --
  hw/vfio/iommufd.c |  13 ++-
  hw/vfio/pci.c |   4 +-
  hw/vfio/spapr.c   |   3 +
  8 files changed, 196 insertions(+), 153 deletions(-)



Applied to vfio-next.

Thanks,

C.






Re: [PATCH v7 00/17] Add a host IOMMU device abstraction to check with vIOMMU

2024-06-24 Thread Cédric Le Goater

On 6/5/24 10:30 AM, Zhenzhong Duan wrote:

Hi,

This series introduce a HostIOMMUDevice abstraction and sub-classes.
Also HostIOMMUDeviceCaps structure in HostIOMMUDevice and a new interface
between vIOMMU and HostIOMMUDevice.

A HostIOMMUDevice is an abstraction for an assigned device that is protected
by a physical IOMMU (aka host IOMMU). The userspace interaction with this
physical IOMMU can be done either through the VFIO IOMMU type 1 legacy
backend or the new iommufd backend. The assigned device can be a VFIO device
or a VDPA device. The HostIOMMUDevice is needed to interact with the host
IOMMU that protects the assigned device. It is especially useful when the
device is also protected by a virtual IOMMU as this latter use the translation
services of the physical IOMMU and is constrained by it. In that context the
HostIOMMUDevice can be passed to the virtual IOMMU to collect physical IOMMU
capabilities such as the supported address width. In the future, the virtual
IOMMU will use the HostIOMMUDevice to program the guest page tables in the
first translation stage of the physical IOMMU.

HostIOMMUDeviceClass::realize() is introduced to initialize
HostIOMMUDeviceCaps and other fields of HostIOMMUDevice variants.

HostIOMMUDeviceClass::get_cap() is introduced to query host IOMMU
device capabilities.

The class tree is as below:

   HostIOMMUDevice
  | .caps
  | .realize()
  | .get_cap()
  |
 .---.
 ||  |
HostIOMMUDeviceLegacyVFIO  {HostIOMMUDeviceLegacyVDPA}  HostIOMMUDeviceIOMMUFD
 ||  | [.iommufd]
 | [.devid]
 | [.ioas_id]
 | [.attach_hwpt()]
 | [.detach_hwpt()]
 |
 .--.
 |  |
  HostIOMMUDeviceIOMMUFDVFIO  
{HostIOMMUDeviceIOMMUFDVDPA}
   | [.vdev]| {.vdev}

* The attributes in [] will be implemented in nesting series.
* The classes in {} will be implemented in future.
* .vdev in different class points to different agent device,
* i.e., VFIODevice or VDPADevice.

PATCH1-4: Introduce HostIOMMUDevice and its sub classes
PATCH5-10: Implement .realize() and .get_cap() handler
PATCH11-14: Create HostIOMMUDevice instance and pass to vIOMMU
PATCH15-17: Implement compatibility check between host IOMMU and 
vIOMMU(intel_iommu)

Test done:
make check
vfio device hotplug/unplug with different backend on linux
reboot, kexec
build test on linux and windows11

Qemu code can be found at:
https://github.com/yiliu1765/qemu/tree/zhenzhong/iommufd_nesting_preq_v7

Besides the compatibility check in this series, in nesting series, this
host IOMMU device is extended for much wider usage. For anyone interested
on the nesting series, here is the link:
https://github.com/yiliu1765/qemu/tree/zhenzhong/iommufd_nesting_rfcv2

Thanks
Zhenzhong

Changelog:
v7:
- drop config CONFIG_HOST_IOMMU_DEVICE (Cédric)
- introduce HOST_IOMMU_DEVICE_CAP_AW_BITS_MAX (Eric)
- use iova_ranges method in iommufd.realize() (Eric)
- introduce HostIOMMUDevice::name to facilitate tracing (Eric)
- implement a custom destroy hash function (Cédric)
- drop VTDHostIOMMUDevice and save HostIOMMUDevice in hash table (Eric)
- move patch5 after patch1 (Eric)
- squash patch3 and 4, squash patch12 and 13 (Eric)
- refine comments (Eric)
- collect Eric's R-B

v6:
- open coded host_iommu_device_get_cap() to avoid #ifdef in intel_iommu.c 
(Cédric)

v5:
- pci_device_set_iommu_device return true (Cédric)
- fix build failure on windows (thanks Cédric found that issue)

v4:
- move properties vdev, iommufd and devid to nesting series where need it 
(Cédric)
- fix 32bit build with clz64 (Cédric)
- change check_cap naming to get_cap (Cédric)
- return bool if error is passed through errp (Cédric)
- drop HostIOMMUDevice[LegacyVFIO|IOMMUFD|IOMMUFDVFIO] declaration (Cédric)
- drop HOST_IOMMU_DEVICE_CAP_IOMMUFD (Cédric)
- replace include directive with forward declaration (Cédric)

v3:
- refine declaration and doc for HostIOMMUDevice (Cédric, Philippe)
- introduce HostIOMMUDeviceCaps, .realize() and .check_cap() (Cédric)
- introduce helper range_get_last_bit() for range operation (Cédric)
- separate pci_device_get_iommu_bus_devfn() in a prereq patch (Cédric)
- replace HIOD_ abbreviation with HOST_IOMMU_DEVICE_ (Cédric)
- add header in 

Re: [PATCH v2] Consider discard option when writing zeros

2024-06-24 Thread Nir Soffer
On Mon, Jun 24, 2024 at 7:08 PM Kevin Wolf  wrote:

> Am 24.06.2024 um 17:23 hat Stefan Hajnoczi geschrieben:
> > On Wed, Jun 19, 2024 at 08:43:25PM +0300, Nir Soffer wrote:
> > > Tested using:
> >
> > Hi Nir,
> > This looks like a good candidate for the qemu-iotests test suite. Adding
> > it to the automated tests will protect against future regressions.
> >
> > Please add the script and the expected output to
> > tests/qemu-iotests/test/write-zeroes-unmap and run it using
> > `(cd build && tests/qemu-iotests/check write-zeroes-unmap)`.
> >
> > See the existing test cases in tests/qemu-iotests/ and
> > tests/qemu-iotests/tests/ for examples. Some are shell scripts and
> > others are Python. I think shell makes sense for this test case. You
> > can copy the test framework boilerplate from an existing test case.
>
> 'du' can't be used like this in qemu-iotests because it makes
> assumptions that depend on the filesystem. A test case replicating what
> Nir did manually would likely fail on XFS with its preallocation.
>

This is why I did not try to add a new qemu-iotest yet.


> Maybe we could operate on a file exposed by the FUSE export that is
> backed by qcow2, and then you can use 'qemu-img map' on that qcow2 image
> to verify the allocation status. Somewhat complicated, but I think it
> could work.
>

Do we have examples of using the FUSE export? It sounds complicated but
being able to test on any file system is awesome. The complexity can be
hidden behind simple test helpers.

Another option is to use a specific file system created for the tests, for
example
on a loop device. We used userstorage[1] in ovirt to test on specific file
systems
with known sector size.

But more important, are you ok with the change?

I'm not sure about not creating sparse images by default - this is not
consistent
with qemu-img convert and qemu-nbd, which do sparsify by default. The old
behavior seems better.

[1] https://github.com/nirs/userstorage

Nir


[PATCH 1/1] include/qemu: Provide a C++ compatible version of typeof_strip_qual

2024-06-24 Thread Felix Wu
From: Roman Kiryanov 

to use the QEMU headers with a C++ compiler.

Signed-off-by: Felix Wu 
Signed-off-by: Roman Kiryanov 
---
 include/qemu/atomic.h   |  8 
 include/qemu/atomic.hpp | 38 ++
 2 files changed, 46 insertions(+)
 create mode 100644 include/qemu/atomic.hpp

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 99110abefb..aeaecc440a 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -20,6 +20,13 @@
 /* Compiler barrier */
 #define barrier()   ({ asm volatile("" ::: "memory"); (void)0; })
 
+#ifdef __cplusplus
+
+#ifndef typeof_strip_qual
+#error Use the typeof_strip_qual(expr) definition from atomic.hpp on C++ 
builds.
+#endif
+
+#else  /* __cpluplus */
 /* The variable that receives the old value of an atomically-accessed
  * variable must be non-qualified, because atomic builtins return values
  * through a pointer-type argument as in __atomic_load(, , MODEL).
@@ -61,6 +68,7 @@
 __builtin_types_compatible_p(typeof(expr), const volatile unsigned 
short), \
 (unsigned short)1, 
\
   (expr)+0))
+#endif  /* __cpluplus */
 
 #ifndef __ATOMIC_RELAXED
 #error "Expecting C11 atomic ops"
diff --git a/include/qemu/atomic.hpp b/include/qemu/atomic.hpp
new file mode 100644
index 00..5844e3d427
--- /dev/null
+++ b/include/qemu/atomic.hpp
@@ -0,0 +1,38 @@
+/*
+ * The C++ definition for typeof_strip_qual used in atomic.h.
+ *
+ * Copyright (C) 2024 Google, Inc.
+ *
+ * Author: Roman Kiryanov 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef QEMU_ATOMIC_HPP
+#define QEMU_ATOMIC_HPP
+
+#include 
+
+/* Match the integer promotion behavior of typeof_strip_qual, see atomic.h */
+template  struct typeof_strip_qual_cpp { using result = 
decltype(+T(0)); };
+
+template <> struct typeof_strip_qual_cpp { using result = bool; };
+template <> struct typeof_strip_qual_cpp { using result = signed 
char; };
+template <> struct typeof_strip_qual_cpp { using result = 
unsigned char; };
+template <> struct typeof_strip_qual_cpp { using result = signed 
short; };
+template <> struct typeof_strip_qual_cpp { using result = 
unsigned short; };
+
+#define typeof_strip_qual(expr) \
+typeof_strip_qual_cpp< \
+std::remove_cv< \
+std::remove_reference< \
+decltype(expr) \
+>::type \
+>::type \
+>::result
+
+#endif /* QEMU_ATOMIC_HPP */
-- 
2.45.2.741.gdbec12cfda-goog




Re: [PULL 00/11] s390x and qtest patches 2024-06-24

2024-06-24 Thread Richard Henderson

On 6/24/24 02:10, Thomas Huth wrote:

The following changes since commit c9ba79baca7c673098361e3a687f72d458e0d18a:

   Merge tag 'pull-target-arm-20240622' 
ofhttps://git.linaro.org/people/pmaydell/qemu-arm  into staging (2024-06-22 
09:56:49 -0700)

are available in the Git repository at:

   https://gitlab.com/thuth/qemu.git  tags/pull-request-2024-06-24

for you to fetch changes up to d6a7c3f44cf3f60c066dbf087ef79d4b12acc642:

   target/s390x: Add a CONFIG switch to disable legacy CPUs (2024-06-24 
08:22:30 +0200)


* s390x error reporting clean ups
* fix memleak in qos_fuzz.c
* use correct byte order for pid field in s390x dumps
* Add a CONFIG switch to disable legacy s390x CPUs

---


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/9.1 as 
appropriate.


r~




[PATCH 1/2] qom: Rename Object::class into Object::klass

2024-06-24 Thread Felix Wu
From: Roman Kiryanov 

'class' is a C++ keyword and it prevents from
using the QEMU headers with a C++ compiler.

Google-Bug-Id: 331190993
Change-Id: I9ab7d2d77edef654a9c7b7cb9cd01795a6ed65a2
Signed-off-by: Felix Wu 
Signed-off-by: Roman Kiryanov 
---
 hw/core/qdev-properties-system.c |  2 +-
 include/exec/memory.h|  2 +-
 include/qom/object.h |  2 +-
 qom/object.c | 90 
 4 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index f13350b4fb..a6781841af 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -431,7 +431,7 @@ static void set_netdev(Object *obj, Visitor *v, const char 
*name,
 }
 
 if (peers[i]->info->check_peer_type) {
-if (!peers[i]->info->check_peer_type(peers[i], obj->class, errp)) {
+if (!peers[i]->info->check_peer_type(peers[i], obj->klass, errp)) {
 goto out;
 }
 }
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 2d7c278b9f..e5bd75956e 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1808,7 +1808,7 @@ static inline IOMMUMemoryRegion 
*memory_region_get_iommu(MemoryRegion *mr)
 static inline IOMMUMemoryRegionClass *memory_region_get_iommu_class_nocheck(
 IOMMUMemoryRegion *iommu_mr)
 {
-return (IOMMUMemoryRegionClass *) (((Object *)iommu_mr)->class);
+return (IOMMUMemoryRegionClass *) (((Object *)iommu_mr)->klass);
 }
 
 #define memory_region_is_iommu(mr) (memory_region_get_iommu(mr) != NULL)
diff --git a/include/qom/object.h b/include/qom/object.h
index 13d3a655dd..7afdb261a8 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -153,7 +153,7 @@ struct ObjectClass
 struct Object
 {
 /* private: */
-ObjectClass *class;
+ObjectClass *klass;
 ObjectFree *free;
 GHashTable *properties;
 uint32_t ref;
diff --git a/qom/object.c b/qom/object.c
index 157a45c5f8..133cd08763 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -68,7 +68,7 @@ struct TypeImpl
 const char *parent;
 TypeImpl *parent_type;
 
-ObjectClass *class;
+ObjectClass *klass;
 
 int num_interfaces;
 InterfaceImpl interfaces[MAX_INTERFACES];
@@ -304,11 +304,11 @@ static void type_initialize_interface(TypeImpl *ti, 
TypeImpl *interface_type,
 type_initialize(iface_impl);
 g_free((char *)info.name);
 
-new_iface = (InterfaceClass *)iface_impl->class;
-new_iface->concrete_class = ti->class;
+new_iface = (InterfaceClass *)iface_impl->klass;
+new_iface->concrete_class = ti->klass;
 new_iface->interface_type = interface_type;
 
-ti->class->interfaces = g_slist_append(ti->class->interfaces, new_iface);
+ti->klass->interfaces = g_slist_append(ti->klass->interfaces, new_iface);
 }
 
 static void object_property_free(gpointer data)
@@ -329,7 +329,7 @@ static void type_initialize(TypeImpl *ti)
 {
 TypeImpl *parent;
 
-if (ti->class) {
+if (ti->klass) {
 return;
 }
 
@@ -350,7 +350,7 @@ static void type_initialize(TypeImpl *ti)
 assert(!ti->instance_finalize);
 assert(!ti->num_interfaces);
 }
-ti->class = g_malloc0(ti->class_size);
+ti->klass = g_malloc0(ti->class_size);
 
 parent = type_get_parent(ti);
 if (parent) {
@@ -360,10 +360,10 @@ static void type_initialize(TypeImpl *ti)
 
 g_assert(parent->class_size <= ti->class_size);
 g_assert(parent->instance_size <= ti->instance_size);
-memcpy(ti->class, parent->class, parent->class_size);
-ti->class->interfaces = NULL;
+memcpy(ti->klass, parent->klass, parent->class_size);
+ti->klass->interfaces = NULL;
 
-for (e = parent->class->interfaces; e; e = e->next) {
+for (e = parent->klass->interfaces; e; e = e->next) {
 InterfaceClass *iface = e->data;
 ObjectClass *klass = OBJECT_CLASS(iface);
 
@@ -377,7 +377,7 @@ static void type_initialize(TypeImpl *ti)
  ti->interfaces[i].typename, parent->name);
 abort();
 }
-for (e = ti->class->interfaces; e; e = e->next) {
+for (e = ti->klass->interfaces; e; e = e->next) {
 TypeImpl *target_type = OBJECT_CLASS(e->data)->type;
 
 if (type_is_ancestor(target_type, t)) {
@@ -393,20 +393,20 @@ static void type_initialize(TypeImpl *ti)
 }
 }
 
-ti->class->properties = g_hash_table_new_full(g_str_hash, g_str_equal, 
NULL,
+ti->klass->properties = g_hash_table_new_full(g_str_hash, g_str_equal, 
NULL,
   object_property_free);
 
-ti->class->type = ti;
+ti->klass->type = ti;
 
 while (parent) {
 if (parent->class_base_init) {
-parent->class_base_init(ti->class, ti->class_data);
+

[PATCH 2/2] include/qom: Rename typename into type_name

2024-06-24 Thread Felix Wu
From: Roman Kiryanov 

`typename` is a C++ keyword and it prevents from
using the QEMU headers with a C++ compiler.

Google-Bug-Id: 331190993
Change-Id: Iff313ca5ec157a1a3826b4f5665073534d961a26
Signed-off-by: Felix Wu 
Signed-off-by: Roman Kiryanov 
---
 hw/core/bus.c  |   8 +--
 include/hw/qdev-core.h |   4 +-
 include/qom/object.h   |  78 +--
 qom/object.c   | 120 -
 4 files changed, 105 insertions(+), 105 deletions(-)

diff --git a/hw/core/bus.c b/hw/core/bus.c
index b9d89495cd..07c5a83673 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -152,18 +152,18 @@ static void bus_unparent(Object *obj)
 bus->parent = NULL;
 }
 
-void qbus_init(void *bus, size_t size, const char *typename,
+void qbus_init(void *bus, size_t size, const char *type_name,
DeviceState *parent, const char *name)
 {
-object_initialize(bus, size, typename);
+object_initialize(bus, size, type_name);
 qbus_init_internal(bus, parent, name);
 }
 
-BusState *qbus_new(const char *typename, DeviceState *parent, const char *name)
+BusState *qbus_new(const char *type_name, DeviceState *parent, const char 
*name)
 {
 BusState *bus;
 
-bus = BUS(object_new(typename));
+bus = BUS(object_new(type_name));
 qbus_init_internal(bus, parent, name);
 
 return bus;
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 5336728a23..ede4b74bd8 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -867,9 +867,9 @@ DeviceState *qdev_find_recursive(BusState *bus, const char 
*id);
 typedef int (qbus_walkerfn)(BusState *bus, void *opaque);
 typedef int (qdev_walkerfn)(DeviceState *dev, void *opaque);
 
-void qbus_init(void *bus, size_t size, const char *typename,
+void qbus_init(void *bus, size_t size, const char *type_name,
DeviceState *parent, const char *name);
-BusState *qbus_new(const char *typename, DeviceState *parent, const char 
*name);
+BusState *qbus_new(const char *type_name, DeviceState *parent, const char 
*name);
 bool qbus_realize(BusState *bus, Error **errp);
 void qbus_unrealize(BusState *bus);
 
diff --git a/include/qom/object.h b/include/qom/object.h
index 7afdb261a8..4e69a3506d 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -617,7 +617,7 @@ Object *object_new_with_class(ObjectClass *klass);
 
 /**
  * object_new:
- * @typename: The name of the type of the object to instantiate.
+ * @type_name: The name of the type of the object to instantiate.
  *
  * This function will initialize a new object using heap allocated memory.
  * The returned object has a reference count of 1, and will be freed when
@@ -625,11 +625,11 @@ Object *object_new_with_class(ObjectClass *klass);
  *
  * Returns: The newly allocated and instantiated object.
  */
-Object *object_new(const char *typename);
+Object *object_new(const char *type_name);
 
 /**
  * object_new_with_props:
- * @typename:  The name of the type of the object to instantiate.
+ * @type_name:  The name of the type of the object to instantiate.
  * @parent: the parent object
  * @id: The unique ID of the object
  * @errp: pointer to error object
@@ -673,7 +673,7 @@ Object *object_new(const char *typename);
  *
  * Returns: The newly allocated, instantiated & initialized object.
  */
-Object *object_new_with_props(const char *typename,
+Object *object_new_with_props(const char *type_name,
   Object *parent,
   const char *id,
   Error **errp,
@@ -681,7 +681,7 @@ Object *object_new_with_props(const char *typename,
 
 /**
  * object_new_with_propv:
- * @typename:  The name of the type of the object to instantiate.
+ * @type_name:  The name of the type of the object to instantiate.
  * @parent: the parent object
  * @id: The unique ID of the object
  * @errp: pointer to error object
@@ -689,7 +689,7 @@ Object *object_new_with_props(const char *typename,
  *
  * See object_new_with_props() for documentation.
  */
-Object *object_new_with_propv(const char *typename,
+Object *object_new_with_propv(const char *type_name,
   Object *parent,
   const char *id,
   Error **errp,
@@ -755,13 +755,13 @@ bool object_set_propv(Object *obj, Error **errp, va_list 
vargs);
  * object_initialize:
  * @obj: A pointer to the memory to be used for the object.
  * @size: The maximum size available at @obj for the object.
- * @typename: The name of the type of the object to instantiate.
+ * @type_name: The name of the type of the object to instantiate.
  *
  * This function will initialize an object.  The memory for the object should
  * have already been allocated.  The returned object has a reference count of 
1,
  * and will be finalized when the last reference is dropped.
  */
-void object_initialize(void *obj, size_t size, const char *typename);
+void 

[PATCH v4 03/14] hw/riscv: add RISC-V IOMMU base emulation

2024-06-24 Thread Daniel Henrique Barboza
From: Tomasz Jeznach 

The RISC-V IOMMU specification is now ratified as-per the RISC-V
international process. The latest frozen specifcation can be found at:

https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf

Add the foundation of the device emulation for RISC-V IOMMU, which
includes an IOMMU that has no capabilities but MSI interrupt support and
fault queue interfaces. We'll add more features incrementally in the
next patches.

Co-developed-by: Sebastien Boeuf 
Signed-off-by: Sebastien Boeuf 
Signed-off-by: Tomasz Jeznach 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/Kconfig|4 +
 hw/riscv/meson.build|1 +
 hw/riscv/riscv-iommu-bits.h |2 +
 hw/riscv/riscv-iommu.c  | 1641 +++
 hw/riscv/riscv-iommu.h  |  142 +++
 hw/riscv/trace-events   |   11 +
 hw/riscv/trace.h|1 +
 include/hw/riscv/iommu.h|   36 +
 meson.build |1 +
 9 files changed, 1839 insertions(+)
 create mode 100644 hw/riscv/riscv-iommu.c
 create mode 100644 hw/riscv/riscv-iommu.h
 create mode 100644 hw/riscv/trace-events
 create mode 100644 hw/riscv/trace.h
 create mode 100644 include/hw/riscv/iommu.h

diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index a2030e3a6f..f69d6e3c8e 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -1,3 +1,6 @@
+config RISCV_IOMMU
+bool
+
 config RISCV_NUMA
 bool
 
@@ -47,6 +50,7 @@ config RISCV_VIRT
 select SERIAL
 select RISCV_ACLINT
 select RISCV_APLIC
+select RISCV_IOMMU
 select RISCV_IMSIC
 select SIFIVE_PLIC
 select SIFIVE_TEST
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index f872674093..cbc99c6e8e 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -10,5 +10,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: 
files('sifive_u.c'))
 riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
 riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: 
files('microchip_pfsoc.c'))
 riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
+riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c'))
 
 hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
index f29b916acb..8a1af73685 100644
--- a/hw/riscv/riscv-iommu-bits.h
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -82,6 +82,7 @@ struct riscv_iommu_pq_record {
 
 /* 5.4 Features control register (32bits) */
 #define RISCV_IOMMU_REG_FCTL0x0008
+#define RISCV_IOMMU_FCTL_BE BIT(0)
 #define RISCV_IOMMU_FCTL_WSIBIT(1)
 
 /* 5.5 Device-directory-table pointer (64bits) */
@@ -311,6 +312,7 @@ enum riscv_iommu_fq_causes {
 
 /* Translation attributes fields */
 #define RISCV_IOMMU_PC_TA_V BIT_ULL(0)
+#define RISCV_IOMMU_PC_TA_RESERVED  GENMASK_ULL(63, 32)
 
 /* First stage context fields */
 #define RISCV_IOMMU_PC_FSC_PPN  GENMASK_ULL(43, 0)
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
new file mode 100644
index 00..37d7d1e657
--- /dev/null
+++ b/hw/riscv/riscv-iommu.c
@@ -0,0 +1,1641 @@
+/*
+ * QEMU emulation of an RISC-V IOMMU
+ *
+ * Copyright (C) 2021-2023, Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_device.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/riscv_hart.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/timer.h"
+
+#include "cpu_bits.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "trace.h"
+
+#define LIMIT_CACHE_CTX   (1U << 7)
+#define LIMIT_CACHE_IOT   (1U << 20)
+
+/* Physical page number coversions */
+#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
+#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
+
+typedef struct RISCVIOMMUContext RISCVIOMMUContext;
+typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
+
+/* Device assigned I/O address space */
+struct RISCVIOMMUSpace {
+IOMMUMemoryRegion iova_mr;  /* IOVA memory region for attached device */
+AddressSpace iova_as;   /* IOVA address space for attached device */
+RISCVIOMMUState *iommu; /* Managing IOMMU device state */
+uint32_t devid; /* Requester identifier, AKA device_id */
+bool notifier; 

[PATCH v4 11/14] hw/riscv/riscv-iommu: add DBG support

2024-06-24 Thread Daniel Henrique Barboza
From: Tomasz Jeznach 

DBG support adds three additional registers: tr_req_iova, tr_req_ctl and
tr_response.

The DBG cap is always enabled. No on/off toggle is provided for it.

Signed-off-by: Tomasz Jeznach 
Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Frank Chang 
---
 hw/riscv/riscv-iommu-bits.h | 17 +++
 hw/riscv/riscv-iommu.c  | 59 +
 2 files changed, 76 insertions(+)

diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
index c61207e618..9be2ab15d9 100644
--- a/hw/riscv/riscv-iommu-bits.h
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -84,6 +84,7 @@ struct riscv_iommu_pq_record {
 #define RISCV_IOMMU_CAP_ATS BIT_ULL(25)
 #define RISCV_IOMMU_CAP_T2GPA   BIT_ULL(26)
 #define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28)
+#define RISCV_IOMMU_CAP_DBG BIT_ULL(31)
 #define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32)
 #define RISCV_IOMMU_CAP_PD8 BIT_ULL(38)
 #define RISCV_IOMMU_CAP_PD17BIT_ULL(39)
@@ -186,6 +187,22 @@ enum {
 RISCV_IOMMU_INTR_COUNT
 };
 
+/* 5.24 Translation request IOVA (64bits) */
+#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258
+
+/* 5.25 Translation request control (64bits) */
+#define RISCV_IOMMU_REG_TR_REQ_CTL  0x0260
+#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY  BIT_ULL(0)
+#define RISCV_IOMMU_TR_REQ_CTL_NW   BIT_ULL(3)
+#define RISCV_IOMMU_TR_REQ_CTL_PID  GENMASK_ULL(31, 12)
+#define RISCV_IOMMU_TR_REQ_CTL_DID  GENMASK_ULL(63, 40)
+
+/* 5.26 Translation request response (64bits) */
+#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268
+#define RISCV_IOMMU_TR_RESPONSE_FAULT   BIT_ULL(0)
+#define RISCV_IOMMU_TR_RESPONSE_S   BIT_ULL(9)
+#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD
+
 /* 5.27 Interrupt cause to vector (64bits) */
 #define RISCV_IOMMU_REG_IVEC0x02F8
 
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
index b20a09df2a..b7a9c4cec7 100644
--- a/hw/riscv/riscv-iommu.c
+++ b/hw/riscv/riscv-iommu.c
@@ -1739,6 +1739,50 @@ static void 
riscv_iommu_process_pq_control(RISCVIOMMUState *s)
 riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr);
 }
 
+static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
+{
+uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA);
+uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL);
+unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID);
+unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID);
+RISCVIOMMUContext *ctx;
+void *ref;
+
+if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) {
+return;
+}
+
+ctx = riscv_iommu_ctx(s, devid, pid, );
+if (ctx == NULL) {
+riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE,
+ RISCV_IOMMU_TR_RESPONSE_FAULT |
+ (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10));
+} else {
+IOMMUTLBEntry iotlb = {
+.iova = iova,
+.perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW,
+.addr_mask = ~0,
+.target_as = NULL,
+};
+int fault = riscv_iommu_translate(s, ctx, , false);
+if (fault) {
+iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
+} else {
+iova = iotlb.translated_addr & ~iotlb.addr_mask;
+iova >>= TARGET_PAGE_BITS;
+iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
+
+/* We do not support superpages (> 4kbs) for now */
+iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
+}
+riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
+}
+
+riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0,
+RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
+riscv_iommu_ctx_put(s, ref);
+}
+
 typedef void riscv_iommu_process_fn(RISCVIOMMUState *s);
 
 static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
@@ -1862,6 +1906,12 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, 
hwaddr addr,
 
 return MEMTX_OK;
 
+case RISCV_IOMMU_REG_TR_REQ_CTL:
+process_fn = riscv_iommu_process_dbg;
+regb = RISCV_IOMMU_REG_TR_REQ_CTL;
+busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
+break;
+
 default:
 break;
 }
@@ -2034,6 +2084,9 @@ static void riscv_iommu_realize(DeviceState *dev, Error 
**errp)
 s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
   RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
 }
+/* Enable translation debug interface */
+s->cap |= RISCV_IOMMU_CAP_DBG;
+
 /* Report QEMU target physical address space limits */
 s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS,
TARGET_PHYS_ADDR_SPACE_BITS);
@@ -2090,6 +2143,12 @@ static void riscv_iommu_realize(DeviceState *dev, Error 
**errp)
 stl_le_p(>regs_wc[RISCV_IOMMU_REG_IPSR], ~0);
 

[PATCH v4 13/14] qtest/riscv-iommu-test: add init queues test

2024-06-24 Thread Daniel Henrique Barboza
Add an additional test to further exercise the IOMMU where we attempt to
initialize the command, fault and page-request queues.

These steps are taken from chapter 6.2 of the RISC-V IOMMU spec,
"Guidelines for initialization". It emulates what we expect from the
software/OS when initializing the IOMMU.

Signed-off-by: Daniel Henrique Barboza 
Reviewed-by: Frank Chang 
---
 tests/qtest/libqos/riscv-iommu.h |  29 +++
 tests/qtest/riscv-iommu-test.c   | 141 +++
 2 files changed, 170 insertions(+)

diff --git a/tests/qtest/libqos/riscv-iommu.h b/tests/qtest/libqos/riscv-iommu.h
index d123efb41f..c62ddedbac 100644
--- a/tests/qtest/libqos/riscv-iommu.h
+++ b/tests/qtest/libqos/riscv-iommu.h
@@ -62,6 +62,35 @@
 
 #define RISCV_IOMMU_REG_IPSR0x0054
 
+#define RISCV_IOMMU_REG_IVEC0x02F8
+#define RISCV_IOMMU_REG_IVEC_CIVGENMASK_ULL(3, 0)
+#define RISCV_IOMMU_REG_IVEC_FIVGENMASK_ULL(7, 4)
+#define RISCV_IOMMU_REG_IVEC_PIVGENMASK_ULL(15, 12)
+
+#define RISCV_IOMMU_REG_CQB 0x0018
+#define RISCV_IOMMU_CQB_PPN_START   10
+#define RISCV_IOMMU_CQB_PPN_LEN 44
+#define RISCV_IOMMU_CQB_LOG2SZ_START0
+#define RISCV_IOMMU_CQB_LOG2SZ_LEN  5
+
+#define RISCV_IOMMU_REG_CQT 0x0024
+
+#define RISCV_IOMMU_REG_FQB 0x0028
+#define RISCV_IOMMU_FQB_PPN_START   10
+#define RISCV_IOMMU_FQB_PPN_LEN 44
+#define RISCV_IOMMU_FQB_LOG2SZ_START0
+#define RISCV_IOMMU_FQB_LOG2SZ_LEN  5
+
+#define RISCV_IOMMU_REG_FQT 0x0034
+
+#define RISCV_IOMMU_REG_PQB 0x0038
+#define RISCV_IOMMU_PQB_PPN_START   10
+#define RISCV_IOMMU_PQB_PPN_LEN 44
+#define RISCV_IOMMU_PQB_LOG2SZ_START0
+#define RISCV_IOMMU_PQB_LOG2SZ_LEN  5
+
+#define RISCV_IOMMU_REG_PQT 0x0044
+
 typedef struct QRISCVIOMMU {
 QOSGraphObject obj;
 QPCIDevice dev;
diff --git a/tests/qtest/riscv-iommu-test.c b/tests/qtest/riscv-iommu-test.c
index 7f0dbd0211..9e2afcb4b9 100644
--- a/tests/qtest/riscv-iommu-test.c
+++ b/tests/qtest/riscv-iommu-test.c
@@ -33,6 +33,20 @@ static uint64_t riscv_iommu_read_reg64(QRISCVIOMMU *r_iommu, 
int reg_offset)
 return reg;
 }
 
+static void riscv_iommu_write_reg32(QRISCVIOMMU *r_iommu, int reg_offset,
+uint32_t val)
+{
+qpci_memwrite(_iommu->dev, r_iommu->reg_bar, reg_offset,
+  , sizeof(val));
+}
+
+static void riscv_iommu_write_reg64(QRISCVIOMMU *r_iommu, int reg_offset,
+uint64_t val)
+{
+qpci_memwrite(_iommu->dev, r_iommu->reg_bar, reg_offset,
+  , sizeof(val));
+}
+
 static void test_pci_config(void *obj, void *data, QGuestAllocator *t_alloc)
 {
 QRISCVIOMMU *r_iommu = obj;
@@ -84,10 +98,137 @@ static void test_reg_reset(void *obj, void *data, 
QGuestAllocator *t_alloc)
 g_assert_cmpuint(reg, ==, 0);
 }
 
+/*
+ * Common timeout-based poll for CQCSR, FQCSR and PQCSR. All
+ * their ON bits are mapped as RISCV_IOMMU_QUEUE_ACTIVE (16),
+ */
+static void qtest_wait_for_queue_active(QRISCVIOMMU *r_iommu,
+uint32_t queue_csr)
+{
+QTestState *qts = global_qtest;
+guint64 timeout_us = 2 * 1000 * 1000;
+gint64 start_time = g_get_monotonic_time();
+uint32_t reg;
+
+for (;;) {
+qtest_clock_step(qts, 100);
+
+reg = riscv_iommu_read_reg32(r_iommu, queue_csr);
+if (reg & RISCV_IOMMU_QUEUE_ACTIVE) {
+break;
+}
+g_assert(g_get_monotonic_time() - start_time <= timeout_us);
+}
+}
+
+/*
+ * Goes through the queue activation procedures of chapter 6.2,
+ * "Guidelines for initialization", of the RISCV-IOMMU spec.
+ */
+static void test_iommu_init_queues(void *obj, void *data,
+   QGuestAllocator *t_alloc)
+{
+QRISCVIOMMU *r_iommu = obj;
+uint64_t reg64, q_addr;
+uint32_t reg;
+int k;
+
+reg64 = riscv_iommu_read_reg64(r_iommu, RISCV_IOMMU_REG_CAP);
+g_assert_cmpuint(reg64 & RISCV_IOMMU_CAP_VERSION, ==, 0x10);
+
+/*
+ * Program the command queue. Write 0xF to civ, assert that
+ * we have 4 writable bits (k = 4). The amount of entries N in the
+ * command queue is 2^4 = 16. We need to alloc a N*16 bytes
+ * buffer and use it to set cqb.
+ */
+riscv_iommu_write_reg32(r_iommu, RISCV_IOMMU_REG_IVEC,
+0x & RISCV_IOMMU_REG_IVEC_CIV);
+reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_IVEC);
+g_assert_cmpuint(reg & RISCV_IOMMU_REG_IVEC_CIV, ==, 0xF);
+
+q_addr = guest_alloc(t_alloc, 16 * 16);
+reg64 = 0;
+k = 4;
+deposit64(reg64, RISCV_IOMMU_CQB_PPN_START,
+  RISCV_IOMMU_CQB_PPN_LEN, q_addr);
+deposit64(reg64, RISCV_IOMMU_CQB_LOG2SZ_START,
+  RISCV_IOMMU_CQB_LOG2SZ_LEN, k - 1);
+riscv_iommu_write_reg64(r_iommu, RISCV_IOMMU_REG_CQB, reg64);
+
+/* cqt = 0, 

  1   2   3   >