date:20230712

The variable 'c' isn't needed because it can be replaced by '*p'
completely. Remove the unecessary variable 'c' to simplify the
function a bit.

No functional change intended.

Signed-off-by: Gavin Shan 
---
 softmmu/memory.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/softmmu/memory.c b/softmmu/memory.c
index 7d9494ce70..1ae285bab8 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -1151,7 +1151,6 @@ static char *memory_region_escape_name(const char *name)
 {
 const char *p;
 char *escaped, *q;
-uint8_t c;
 size_t bytes = 0;
 
 for (p = name; *p; p++) {
@@ -1163,14 +1162,14 @@ static char *memory_region_escape_name(const char *name)
 
 escaped = g_malloc(bytes + 1);
 for (p = name, q = escaped; *p; p++) {
-c = *p;
-if (unlikely(memory_region_need_escape(c))) {
+if (likely(!memory_region_need_escape(*p))) {
+*q++ = *p;
+} else {
 *q++ = '\\';
 *q++ = 'x';
-*q++ = "0123456789abcdef"[c >> 4];
-c = "0123456789abcdef"[c & 15];
+*q++ = "0123456789abcdef"[*p >> 4];
+*q++ = "0123456789abcdef"[*p & 15];
 }
-*q++ = c;
 }
 *q = 0;
 return escaped;
-- 
2.41.0

Re: [PATCH] docs/system/target-riscv.rst: tidy CPU firmware section

2023-07-12 Thread Michael Tokarev


12.07.2023 17:37, Daniel Henrique Barboza wrote:

This is how the content of the "RISC-V CPU firmware" section is
displayed after the html is generated:

"When using the sifive_u or virt machine there are three different
firmware boot options: 1. -bios default - This is the default behaviour
if no -bios option is included. (...) 3. -bios  - Tells QEMU to
load the specified file as the firmware."

It's all in the same paragraph, in a numbered list, and no special
formatting for the options.

Tidy it a bit by adding line breaks between items and its description.
Remove the numbered list. And apply formatting for the options cited in
the middle of the text.

Cc: qemu-triv...@nongnu.org
Signed-off-by: Daniel Henrique Barboza 


I'll pick this up for trivial-patches, but since it's the only patch there
now, it's IMHO better to apply it together with other riscv changes if
there will be any for 8.1.  So let's pick it to both trees and the first
to apply wins.

Thanks,

/mjt

[PATCH v3 3/6] throttle: support read-only and write-only

Only one direction is necessary in several scenarios:
- a read-only disk
- operations on a device are considered as *write* only. For example,
  encrypt/decrypt/sign/verify operations on a cryptodev use a single
  *write* timer(read timer callback is defined, but never invoked).

Allow a single direction in throttle, this reduces memory, and uplayer
does not need a dummy callback any more.

Reviewed-by: Alberto Garcia 
Signed-off-by: zhenwei pi 
---
 util/throttle.c | 32 ++--
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/util/throttle.c b/util/throttle.c
index 5642e61763..c0bd0c26c3 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -199,12 +199,17 @@ static bool throttle_compute_timer(ThrottleState *ts,
 void throttle_timers_attach_aio_context(ThrottleTimers *tt,
 AioContext *new_context)
 {
-tt->timers[THROTTLE_READ] =
-aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-  tt->timer_cb[THROTTLE_READ], tt->timer_opaque);
-tt->timers[THROTTLE_WRITE] =
-aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-  tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque);
+if (tt->timer_cb[THROTTLE_READ]) {
+tt->timers[THROTTLE_READ] =
+aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+  tt->timer_cb[THROTTLE_READ], tt->timer_opaque);
+}
+
+if (tt->timer_cb[THROTTLE_WRITE]) {
+tt->timers[THROTTLE_WRITE] =
+aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+  tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque);
+}
 }
 
 /*
@@ -235,6 +240,7 @@ void throttle_timers_init(ThrottleTimers *tt,
   QEMUTimerCB *write_timer_cb,
   void *timer_opaque)
 {
+assert(read_timer_cb || write_timer_cb);
 memset(tt, 0, sizeof(ThrottleTimers));
 
 tt->clock_type = clock_type;
@@ -247,7 +253,9 @@ void throttle_timers_init(ThrottleTimers *tt,
 /* destroy a timer */
 static void throttle_timer_destroy(QEMUTimer **timer)
 {
-assert(*timer != NULL);
+if (*timer == NULL) {
+return;
+}
 
 timer_free(*timer);
 *timer = NULL;
@@ -272,7 +280,7 @@ void throttle_timers_destroy(ThrottleTimers *tt)
 /* is any throttling timer configured */
 bool throttle_timers_are_initialized(ThrottleTimers *tt)
 {
-if (tt->timers[0]) {
+if (tt->timers[THROTTLE_READ] || tt->timers[THROTTLE_WRITE]) {
 return true;
 }
 
@@ -424,8 +432,12 @@ bool throttle_schedule_timer(ThrottleState *ts,
 {
 int64_t now = qemu_clock_get_ns(tt->clock_type);
 int64_t next_timestamp;
+QEMUTimer *timer;
 bool must_wait;
 
+timer = is_write ? tt->timers[THROTTLE_WRITE] : tt->timers[THROTTLE_READ];
+assert(timer);
+
 must_wait = throttle_compute_timer(ts,
is_write,
now,
@@ -437,12 +449,12 @@ bool throttle_schedule_timer(ThrottleState *ts,
 }
 
 /* request throttled and timer pending -> do nothing */
-if (timer_pending(tt->timers[is_write])) {
+if (timer_pending(timer)) {
 return true;
 }
 
 /* request throttled and timer not pending -> arm timer */
-timer_mod(tt->timers[is_write], next_timestamp);
+timer_mod(timer, next_timestamp);
 return true;
 }
 
-- 
2.34.1

[PATCH v3 4/6] test-throttle: test read only and write only

Reviewed-by: Alberto Garcia 
Signed-off-by: zhenwei pi 
---
 tests/unit/test-throttle.c | 66 ++
 1 file changed, 66 insertions(+)

diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c
index a60b5fe22e..5547837a58 100644
--- a/tests/unit/test-throttle.c
+++ b/tests/unit/test-throttle.c
@@ -184,6 +184,70 @@ static void test_init(void)
 throttle_timers_destroy(tt);
 }
 
+static void test_init_readonly(void)
+{
+int i;
+
+tt = &tgm.throttle_timers;
+
+/* fill the structures with crap */
+memset(&ts, 1, sizeof(ts));
+memset(tt, 1, sizeof(*tt));
+
+/* init structures */
+throttle_init(&ts);
+throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
+ read_timer_cb, NULL, &ts);
+
+/* check initialized fields */
+g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
+g_assert(tt->timers[THROTTLE_READ]);
+g_assert(!tt->timers[THROTTLE_WRITE]);
+
+/* check other fields where cleared */
+g_assert(!ts.previous_leak);
+g_assert(!ts.cfg.op_size);
+for (i = 0; i < BUCKETS_COUNT; i++) {
+g_assert(!ts.cfg.buckets[i].avg);
+g_assert(!ts.cfg.buckets[i].max);
+g_assert(!ts.cfg.buckets[i].level);
+}
+
+throttle_timers_destroy(tt);
+}
+
+static void test_init_writeonly(void)
+{
+int i;
+
+tt = &tgm.throttle_timers;
+
+/* fill the structures with crap */
+memset(&ts, 1, sizeof(ts));
+memset(tt, 1, sizeof(*tt));
+
+/* init structures */
+throttle_init(&ts);
+throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
+ NULL, write_timer_cb, &ts);
+
+/* check initialized fields */
+g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
+g_assert(!tt->timers[THROTTLE_READ]);
+g_assert(tt->timers[THROTTLE_WRITE]);
+
+/* check other fields where cleared */
+g_assert(!ts.previous_leak);
+g_assert(!ts.cfg.op_size);
+for (i = 0; i < BUCKETS_COUNT; i++) {
+g_assert(!ts.cfg.buckets[i].avg);
+g_assert(!ts.cfg.buckets[i].max);
+g_assert(!ts.cfg.buckets[i].level);
+}
+
+throttle_timers_destroy(tt);
+}
+
 static void test_destroy(void)
 {
 int i;
@@ -752,6 +816,8 @@ int main(int argc, char **argv)
 g_test_add_func("/throttle/leak_bucket",test_leak_bucket);
 g_test_add_func("/throttle/compute_wait",   test_compute_wait);
 g_test_add_func("/throttle/init",   test_init);
+g_test_add_func("/throttle/init_readonly",  test_init_readonly);
+g_test_add_func("/throttle/init_writeonly", test_init_writeonly);
 g_test_add_func("/throttle/destroy",test_destroy);
 g_test_add_func("/throttle/have_timer", test_have_timer);
 g_test_add_func("/throttle/detach_attach",  test_detach_attach);
-- 
2.34.1

[PATCH v3 2/6] test-throttle: use enum ThrottleType

Use enum ThrottleType instead in the throttle test codes.

Reviewed-by: Alberto Garcia 
Signed-off-by: zhenwei pi 
---
 tests/unit/test-throttle.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/unit/test-throttle.c b/tests/unit/test-throttle.c
index 7adb5e6652..a60b5fe22e 100644
--- a/tests/unit/test-throttle.c
+++ b/tests/unit/test-throttle.c
@@ -169,8 +169,8 @@ static void test_init(void)
 
 /* check initialized fields */
 g_assert(tt->clock_type == QEMU_CLOCK_VIRTUAL);
-g_assert(tt->timers[0]);
-g_assert(tt->timers[1]);
+g_assert(tt->timers[THROTTLE_READ]);
+g_assert(tt->timers[THROTTLE_WRITE]);
 
 /* check other fields where cleared */
 g_assert(!ts.previous_leak);
@@ -191,7 +191,7 @@ static void test_destroy(void)
 throttle_timers_init(tt, ctx, QEMU_CLOCK_VIRTUAL,
  read_timer_cb, write_timer_cb, &ts);
 throttle_timers_destroy(tt);
-for (i = 0; i < 2; i++) {
+for (i = 0; i < THROTTLE_MAX; i++) {
 g_assert(!tt->timers[i]);
 }
 }
-- 
2.34.1

[PATCH v3 1/6] throttle: introduce enum ThrottleType

Use enum ThrottleType instead of number index.

Reviewed-by: Alberto Garcia 
Signed-off-by: zhenwei pi 
---
 include/qemu/throttle.h | 11 ---
 util/throttle.c | 16 +---
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index 05f6346137..ba6293eeef 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -99,13 +99,18 @@ typedef struct ThrottleState {
 int64_t previous_leak;/* timestamp of the last leak done */
 } ThrottleState;
 
+typedef enum {
+THROTTLE_READ = 0,
+THROTTLE_WRITE,
+THROTTLE_MAX
+} ThrottleType;
+
 typedef struct ThrottleTimers {
-QEMUTimer *timers[2]; /* timers used to do the throttling */
+QEMUTimer *timers[THROTTLE_MAX];/* timers used to do the throttling */
 QEMUClockType clock_type; /* the clock used */
 
 /* Callbacks */
-QEMUTimerCB *read_timer_cb;
-QEMUTimerCB *write_timer_cb;
+QEMUTimerCB *timer_cb[THROTTLE_MAX];
 void *timer_opaque;
 } ThrottleTimers;
 
diff --git a/util/throttle.c b/util/throttle.c
index 81f247a8d1..5642e61763 100644
--- a/util/throttle.c
+++ b/util/throttle.c
@@ -199,10 +199,12 @@ static bool throttle_compute_timer(ThrottleState *ts,
 void throttle_timers_attach_aio_context(ThrottleTimers *tt,
 AioContext *new_context)
 {
-tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-  tt->read_timer_cb, tt->timer_opaque);
-tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
-  tt->write_timer_cb, tt->timer_opaque);
+tt->timers[THROTTLE_READ] =
+aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+  tt->timer_cb[THROTTLE_READ], tt->timer_opaque);
+tt->timers[THROTTLE_WRITE] =
+aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+  tt->timer_cb[THROTTLE_WRITE], tt->timer_opaque);
 }
 
 /*
@@ -236,8 +238,8 @@ void throttle_timers_init(ThrottleTimers *tt,
 memset(tt, 0, sizeof(ThrottleTimers));
 
 tt->clock_type = clock_type;
-tt->read_timer_cb = read_timer_cb;
-tt->write_timer_cb = write_timer_cb;
+tt->timer_cb[THROTTLE_READ] = read_timer_cb;
+tt->timer_cb[THROTTLE_WRITE] = write_timer_cb;
 tt->timer_opaque = timer_opaque;
 throttle_timers_attach_aio_context(tt, aio_context);
 }
@@ -256,7 +258,7 @@ void throttle_timers_detach_aio_context(ThrottleTimers *tt)
 {
 int i;
 
-for (i = 0; i < 2; i++) {
+for (i = 0; i < THROTTLE_MAX; i++) {
 throttle_timer_destroy(&tt->timers[i]);
 }
 }
-- 
2.34.1

[PATCH v3 0/6] Misc fixes for throttle

v2 -> v3:
- patch 1 -> patch 5 are already reviewed by Alberto
- append patch 6: throttle: use enum ThrottleType instead of bool is_write

v1 -> v2:
- rename 'ThrottleTimerType' to 'ThrottleType'
- add assertion to throttle_schedule_timer

v1:
- introduce enum ThrottleTimerType instead of timers[0], timer[1]...
- support read-only and write-only for throttle
- adapt related test codes
- cryptodev uses a write-only throttle timer

Zhenwei Pi (6):
  throttle: introduce enum ThrottleType
  test-throttle: use enum ThrottleType
  throttle: support read-only and write-only
  test-throttle: test read only and write only
  cryptodev: use NULL throttle timer cb for read direction
  throttle: use enum ThrottleType instead of bool is_write

 backends/cryptodev.c| 12 +++---
 block/throttle-groups.c |  6 ++-
 fsdev/qemu-fsdev-throttle.c |  8 ++--
 include/qemu/throttle.h | 15 +---
 tests/unit/test-throttle.c  | 76 ++---
 util/throttle.c | 64 +++
 6 files changed, 136 insertions(+), 45 deletions(-)

-- 
2.34.1

[PATCH v3 6/6] throttle: use enum ThrottleType instead of bool is_write

enum ThrottleType is already there, use ThrottleType instead of
'bool is_write' for throttle API, also modify related codes from
block, fsdev, cryptodev and tests.

Signed-off-by: zhenwei pi 
---
 backends/cryptodev.c|  9 +
 block/throttle-groups.c |  6 --
 fsdev/qemu-fsdev-throttle.c |  8 +---
 include/qemu/throttle.h |  4 ++--
 tests/unit/test-throttle.c  |  4 ++--
 util/throttle.c | 30 --
 6 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/backends/cryptodev.c b/backends/cryptodev.c
index 5cfa25c61c..06142eae57 100644
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -242,10 +242,11 @@ static void cryptodev_backend_throttle_timer_cb(void 
*opaque)
 continue;
 }
 
-throttle_account(&backend->ts, true, ret);
+throttle_account(&backend->ts, THROTTLE_WRITE, ret);
 cryptodev_backend_operation(backend, op_info);
 if (throttle_enabled(&backend->tc) &&
-throttle_schedule_timer(&backend->ts, &backend->tt, true)) {
+throttle_schedule_timer(&backend->ts, &backend->tt,
+THROTTLE_WRITE)) {
 break;
 }
 }
@@ -261,7 +262,7 @@ int cryptodev_backend_crypto_operation(
 goto do_account;
 }
 
-if (throttle_schedule_timer(&backend->ts, &backend->tt, true) ||
+if (throttle_schedule_timer(&backend->ts, &backend->tt, THROTTLE_WRITE) ||
 !QTAILQ_EMPTY(&backend->opinfos)) {
 QTAILQ_INSERT_TAIL(&backend->opinfos, op_info, next);
 return 0;
@@ -273,7 +274,7 @@ do_account:
 return ret;
 }
 
-throttle_account(&backend->ts, true, ret);
+throttle_account(&backend->ts, THROTTLE_WRITE, ret);
 
 return cryptodev_backend_operation(backend, op_info);
 }
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index fb203c3ced..429b9d1dae 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -270,6 +270,7 @@ static bool 
throttle_group_schedule_timer(ThrottleGroupMember *tgm,
 ThrottleState *ts = tgm->throttle_state;
 ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
 ThrottleTimers *tt = &tgm->throttle_timers;
+ThrottleType throttle = is_write ? THROTTLE_WRITE : THROTTLE_READ;
 bool must_wait;
 
 if (qatomic_read(&tgm->io_limits_disabled)) {
@@ -281,7 +282,7 @@ static bool 
throttle_group_schedule_timer(ThrottleGroupMember *tgm,
 return true;
 }
 
-must_wait = throttle_schedule_timer(ts, tt, is_write);
+must_wait = throttle_schedule_timer(ts, tt, throttle);
 
 /* If a timer just got armed, set tgm as the current token */
 if (must_wait) {
@@ -364,6 +365,7 @@ void coroutine_fn 
throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm
 bool must_wait;
 ThrottleGroupMember *token;
 ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
+ThrottleType throttle = is_write ? THROTTLE_WRITE : THROTTLE_READ;
 
 assert(bytes >= 0);
 
@@ -386,7 +388,7 @@ void coroutine_fn 
throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm
 }
 
 /* The I/O will be executed, so do the accounting */
-throttle_account(tgm->throttle_state, is_write, bytes);
+throttle_account(tgm->throttle_state, throttle, bytes);
 
 /* Schedule the next request */
 schedule_next_request(tgm, is_write);
diff --git a/fsdev/qemu-fsdev-throttle.c b/fsdev/qemu-fsdev-throttle.c
index 5c83a1cc09..4aa5bc0196 100644
--- a/fsdev/qemu-fsdev-throttle.c
+++ b/fsdev/qemu-fsdev-throttle.c
@@ -97,16 +97,18 @@ void fsdev_throttle_init(FsThrottle *fst)
 void coroutine_fn fsdev_co_throttle_request(FsThrottle *fst, bool is_write,
 struct iovec *iov, int iovcnt)
 {
+ThrottleType throttle = is_write ? THROTTLE_WRITE : THROTTLE_READ;
+
 if (throttle_enabled(&fst->cfg)) {
-if (throttle_schedule_timer(&fst->ts, &fst->tt, is_write) ||
+if (throttle_schedule_timer(&fst->ts, &fst->tt, throttle) ||
 !qemu_co_queue_empty(&fst->throttled_reqs[is_write])) {
 qemu_co_queue_wait(&fst->throttled_reqs[is_write], NULL);
 }
 
-throttle_account(&fst->ts, is_write, iov_size(iov, iovcnt));
+throttle_account(&fst->ts, throttle, iov_size(iov, iovcnt));
 
 if (!qemu_co_queue_empty(&fst->throttled_reqs[is_write]) &&
-!throttle_schedule_timer(&fst->ts, &fst->tt, is_write)) {
+!throttle_schedule_timer(&fst->ts, &fst->tt, throttle)) {
 qemu_co_queue_next(&fst->throttled_reqs[is_write]);
 }
 }
diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h
index ba6293eeef..1cd6b0c397 100644
--- a/include/qemu/throttle.h
+++ b/include/qemu/throttle.h
@@ -154,9 +154,9 @@ void throttle_config_init(ThrottleConfig *cfg);
 /* usage */
 bool throttle_schedule_timer(ThrottleState *ts,
  ThrottleTim

[PATCH v3 5/6] cryptodev: use NULL throttle timer cb for read direction

Operations on a crytpodev are considered as *write* only, the callback
of read direction is never invoked. Use NULL instead of an unreachable
path(cryptodev_backend_throttle_timer_cb on read direction).

Reviewed-by: Alberto Garcia 
Signed-off-by: zhenwei pi 
---
 backends/cryptodev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/backends/cryptodev.c b/backends/cryptodev.c
index 7d29517843..5cfa25c61c 100644
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -331,8 +331,7 @@ static void cryptodev_backend_set_throttle(CryptoDevBackend 
*backend, int field,
 if (!enabled) {
 throttle_init(&backend->ts);
 throttle_timers_init(&backend->tt, qemu_get_aio_context(),
- QEMU_CLOCK_REALTIME,
- cryptodev_backend_throttle_timer_cb, /* FIXME */
+ QEMU_CLOCK_REALTIME, NULL,
  cryptodev_backend_throttle_timer_cb, backend);
 }
 
-- 
2.34.1

[PATCH 1/1] dump: kdump-zlib data pages not dumped with pvtime/aarch64

2023-07-12 Thread Dongli Zhang

The kdump-zlib data pages are not dumped from aarch64 host when the
'pvtime' is involved, that is, when the block->target_end is not aligned to
page_size. In the below example, it is expected to dump two blocks.

(qemu) info mtree -f
... ...
  090a-090a0fff (prio 0, ram): pvtime KVM
... ...
  4000-0001bfff (prio 0, ram): mach-virt.ram KVM
... ...

However, there is an issue with get_next_page() so that the pages for
"mach-virt.ram" will not be dumped.

At line 1296, although we have reached at the end of the 'pvtime' block,
since it is not aligned to the page_size (e.g., 0x1), it will not break
at line 1298.

1255 static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
1256   uint8_t **bufptr, DumpState *s)
... ...
1294 memcpy(buf + addr % page_size, hbuf, n);
1295 addr += n;
1296 if (addr % page_size == 0) {
1297 /* we filled up the page */
1298 break;
1299 }

As a result, get_next_page() will continue to the next
block ("mach-virt.ram"). Finally, when get_next_page() returns to the
caller:

- 'pfnptr' is referring to the 'pvtime'
- but 'blockptr' is referring to the "mach-virt.ram"

When get_next_page() is called the next time, "*pfnptr += 1" still refers
to the prior 'pvtime'. It will exit immediately because it is out of the
range of the current "mach-virt.ram".

The fix is to break when it is time to come to the next block, so that both
'pfnptr' and 'blockptr' refer to the same block.

Fixes: 94d788408d2d ("dump: fix kdump to work over non-aligned blocks")
Cc: Joe Jin 
Signed-off-by: Dongli Zhang 
---
 dump/dump.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dump/dump.c b/dump/dump.c
index 1f1a6edcab..c93e4c572f 100644
--- a/dump/dump.c
+++ b/dump/dump.c
@@ -1293,8 +1293,8 @@ static bool get_next_page(GuestPhysBlock **blockptr, 
uint64_t *pfnptr,
 
 memcpy(buf + addr % page_size, hbuf, n);
 addr += n;
-if (addr % page_size == 0) {
-/* we filled up the page */
+if (addr % page_size == 0 || addr >= block->target_end) {
+/* we filled up the page or the current block is finished */
 break;
 }
 } else {
-- 
2.34.1

[PATCH 0/3] hw/arm/virt: Use generic CPU invalidation

There is a generic CPU type invalidation in machine_run_board_init()
and we needn't a same and private invalidation for hw/arm/virt machines.
This series intends to use the generic CPU type invalidation on the
hw/arm/virt machines.

PATCH[1] factors the CPU type invalidation logic in machine_run_board_init()
 to a helper validate_cpu_type().
PATCH[2] uses the generic CPU type invalidation for hw/arm/virt machines
PATCH[3] support "host-arm-cpu" CPU type only when KVM or HVF is visible

Testing
===

With the following command lines, the output messages are varied before
and after the series is applied.

  /home/gshan/sandbox/src/qemu/main/build/qemu-system-aarch64 \
  -accel tcg -machine virt,gic-version=3,nvdimm=on\
  -cpu cortex-a8 -smp maxcpus=2,cpus=1\
:

Before the series is applied:

  qemu-system-aarch64: mach-virt: CPU type cortex-a8-arm-cpu not supported

After the series is applied:

  qemu-system-aarch64: Invalid CPU type: cortex-a8-arm-cpu
  The valid types are: cortex-a7-arm-cpu, cortex-a15-arm-cpu, \
  cortex-a35-arm-cpu, cortex-a55-arm-cpu, cortex-a72-arm-cpu, \
  cortex-a76-arm-cpu, a64fx-arm-cpu, neoverse-n1-arm-cpu, \
  neoverse-v1-arm-cpu, cortex-a53-arm-cpu, cortex-a57-arm-cpu, \
  max-arm-cpu

Gavin Shan (3):
  machine: Factor CPU type invalidation out into helper
  hw/arm/virt: Use generic CPU type invalidation
  hw/arm/virt: Support host CPU type only when KVM or HVF is configured

 hw/arm/virt.c | 23 +++---
 hw/core/machine.c | 81 +--
 2 files changed, 48 insertions(+), 56 deletions(-)

-- 
2.41.0

[PATCH 2/3] hw/arm/virt: Use generic CPU type invalidation

There is a generic CPU type invalidation in machine_run_board_init()
and we needn't a same and private invalidation. Set mc->valid_cpu_types
to use the generic CPU type invalidation.

No functional change intended.

Signed-off-by: Gavin Shan 
---
 hw/arm/virt.c | 21 +++--
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 7d9dbc2663..43d7772ffd 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -203,7 +203,7 @@ static const int a15irqmap[] = {
 [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
 };
 
-static const char *valid_cpus[] = {
+static const char *valid_cpu_types[] = {
 #ifdef CONFIG_TCG
 ARM_CPU_TYPE_NAME("cortex-a7"),
 ARM_CPU_TYPE_NAME("cortex-a15"),
@@ -219,20 +219,9 @@ static const char *valid_cpus[] = {
 ARM_CPU_TYPE_NAME("cortex-a57"),
 ARM_CPU_TYPE_NAME("host"),
 ARM_CPU_TYPE_NAME("max"),
+NULL
 };
 
-static bool cpu_type_valid(const char *cpu)
-{
-int i;
-
-for (i = 0; i < ARRAY_SIZE(valid_cpus); i++) {
-if (strcmp(cpu, valid_cpus[i]) == 0) {
-return true;
-}
-}
-return false;
-}
-
 static void create_randomness(MachineState *ms, const char *node)
 {
 struct {
@@ -2030,11 +2019,6 @@ static void machvirt_init(MachineState *machine)
 unsigned int smp_cpus = machine->smp.cpus;
 unsigned int max_cpus = machine->smp.max_cpus;
 
-if (!cpu_type_valid(machine->cpu_type)) {
-error_report("mach-virt: CPU type %s not supported", 
machine->cpu_type);
-exit(1);
-}
-
 possible_cpus = mc->possible_cpu_arch_ids(machine);
 
 /*
@@ -2953,6 +2937,7 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 #else
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("max");
 #endif
+mc->valid_cpu_types = valid_cpu_types;
 mc->get_default_cpu_node_id = virt_get_default_cpu_node_id;
 mc->kvm_type = virt_kvm_type;
 assert(!mc->get_hotplug_handler);
-- 
2.41.0

[PATCH 1/3] machine: Factor CPU type invalidation out into helper

The CPU type invalidation logic in machine_run_board_init() is
independent enough. Lets factor it out into helper validate_cpu_type().
Since we're here, the relevant comments are improved a bit.

No functional change intended.

Signed-off-by: Gavin Shan 
---
 hw/core/machine.c | 81 +--
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index f0d35c6401..68b866c762 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1349,12 +1349,52 @@ out:
 return r;
 }
 
+static void validate_cpu_type(MachineState *machine)
+{
+MachineClass *machine_class = MACHINE_GET_CLASS(machine);
+ObjectClass *oc = object_class_by_name(machine->cpu_type);
+CPUClass *cc = CPU_CLASS(oc);
+int i;
+
+/*
+ * Check if the user-specified CPU type is supported when the valid
+ * CPU types have been determined. Note that the user-specified CPU
+ * type is given by '-cpu' option.
+ */
+if (!machine->cpu_type || !machine_class->valid_cpu_types) {
+goto out_no_check;
+}
+
+for (i = 0; machine_class->valid_cpu_types[i]; i++) {
+if (object_class_dynamic_cast(oc, machine_class->valid_cpu_types[i])) {
+break;
+}
+}
+
+if (!machine_class->valid_cpu_types[i]) {
+/* The user-specified CPU type is invalid */
+error_report("Invalid CPU type: %s", machine->cpu_type);
+error_printf("The valid types are: %s",
+ machine_class->valid_cpu_types[0]);
+for (i = 1; machine_class->valid_cpu_types[i]; i++) {
+error_printf(", %s", machine_class->valid_cpu_types[i]);
+}
+error_printf("\n");
+
+exit(1);
+}
+
+/* Check if CPU type is deprecated and warn if so */
+out_no_check:
+if (cc && cc->deprecation_note) {
+warn_report("CPU model %s is deprecated -- %s",
+machine->cpu_type, cc->deprecation_note);
+}
+}
 
 void machine_run_board_init(MachineState *machine, const char *mem_path, Error 
**errp)
 {
 MachineClass *machine_class = MACHINE_GET_CLASS(machine);
-ObjectClass *oc = object_class_by_name(machine->cpu_type);
-CPUClass *cc;
 
 /* This checkpoint is required by replay to separate prior clock
reading from the other reads, because timer polling functions query
@@ -1405,42 +1445,7 @@ void machine_run_board_init(MachineState *machine, const 
char *mem_path, Error *
 machine->ram = machine_consume_memdev(machine, machine->memdev);
 }
 
-/* If the machine supports the valid_cpu_types check and the user
- * specified a CPU with -cpu check here that the user CPU is supported.
- */
-if (machine_class->valid_cpu_types && machine->cpu_type) {
-int i;
-
-for (i = 0; machine_class->valid_cpu_types[i]; i++) {
-if (object_class_dynamic_cast(oc,
-  machine_class->valid_cpu_types[i])) {
-/* The user specificed CPU is in the valid field, we are
- * good to go.
- */
-break;
-}
-}
-
-if (!machine_class->valid_cpu_types[i]) {
-/* The user specified CPU is not valid */
-error_report("Invalid CPU type: %s", machine->cpu_type);
-error_printf("The valid types are: %s",
- machine_class->valid_cpu_types[0]);
-for (i = 1; machine_class->valid_cpu_types[i]; i++) {
-error_printf(", %s", machine_class->valid_cpu_types[i]);
-}
-error_printf("\n");
-
-exit(1);
-}
-}
-
-/* Check if CPU type is deprecated and warn if so */
-cc = CPU_CLASS(oc);
-if (cc && cc->deprecation_note) {
-warn_report("CPU model %s is deprecated -- %s", machine->cpu_type,
-cc->deprecation_note);
-}
+validate_cpu_type(machine);
 
 if (machine->cgs) {
 /*
-- 
2.41.0

[PATCH 3/3] hw/arm/virt: Support host CPU type only when KVM or HVF is configured

The CPU type 'host-arm-cpu' class won't be registered until KVM or
HVF is configured in target/arm/cpu64.c. Support the corresponding
CPU type only when KVM or HVF is configured.

Signed-off-by: Gavin Shan 
---
 hw/arm/virt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 43d7772ffd..ad28634445 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -217,7 +217,9 @@ static const char *valid_cpu_types[] = {
 #endif
 ARM_CPU_TYPE_NAME("cortex-a53"),
 ARM_CPU_TYPE_NAME("cortex-a57"),
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
 ARM_CPU_TYPE_NAME("host"),
+#endif
 ARM_CPU_TYPE_NAME("max"),
 NULL
 };
-- 
2.41.0

Re: [RFC PATCH 6/9] ui/gtk: Add a new parameter to assign connectors/monitors to GFX VCs

2023-07-12 Thread Kim, Dongwon




On 7/11/2023 10:52 PM, Markus Armbruster wrote:

"Kim, Dongwon"  writes:


On 7/10/2023 11:36 PM, Markus Armbruster wrote:

"Kim, Dongwon"  writes:


On 7/9/2023 11:05 PM, Markus Armbruster wrote:

"Kim, Dongwon"  writes:


On 7/7/2023 7:07 AM, Markus Armbruster wrote:

[...]


Old question not yet answered: Using a list for the mapping means the
mapping must be dense, e.g. I can't map #0 and #2 but not #1.  Is this
what we want?

No, it doesn't have to be dense. In your example, you can just leave the place 
for VC1 blank. For example, you could do connectors.0=DP-1,connectors.2=HDMI-1. 
But in this case, VC1 won't be activated and stay as disconnected from guest's 
perspective. I think this info is also needed in v2.

Have you tried this?  I believe it'll fail with something like
"Parameter 'connectors.1' missing".

Just tested it. Yeah you are correct. I think I had a bad assumption. Let me 
take a look to see if I can make it work as I assumed.

If sparse mappings make sense, we should provide for them, I think.

An array like '*connectors': ['str'] maps from integers 0, 1, ...  It
can't do sparse (you can't omit integers in the middle).

Yeah, I understand this now. Despite of my initial intention was different, I 
am wondering if we don't allow the sparse mapping in this implementation. Any 
thought on that?

Repeating myself: if sparse mappings make sense, we should provide for
them, I think.
So, do they make sense?  Or asked differently, could a user conceivably
want to *not* place a VC?


It should be very rare. I can't think of any valid use case other than 
test cases for validating this feature. If VC is not mapped to anything 
from the beginning, there is no way to get it displayed. So there is no 
value to do so. So I assume provisioning a full list as a requirement 
would make sense here.



The V2 patch is with that change in the description. Another thing I think is 
to change QAPI design little bit to make it fill the element with null (0) if 
it's not given. Would this be a feasible option?

A 'str' cannot be NULL.  In fact, no QAPI type can be null, except for
'null' (which is always NULL), alternates with a 'null' branch, and
pointer-valued optionals (which are null when absent).


Instead of omitting them, we could map them to null: '*connectors':
['StrOrNull'].  JSON input looks like [null, "HDMI-A-0"].  Since dotted
key syntax does not support null at this time, you'd have to use JSON.

Only an object can do sparse.  However, the QAPI schema language can't
express "object where the keys are integers and the values are strings".
We'd have to use 'any', and check everything manually.

Hmm.  Thoughts?


[...]

[PATCH] virtio-gpu-udmabuf: correct naming of QemuDmaBuf size properties

2023-07-12 Thread Dongwon Kim

Replace 'width' and 'height' in QemuDmaBuf with 'backing_widht'
and 'backing_height' as these commonly indicate the size of the
whole surface (e.g. guest's Xorg extended display). Then use
'width' and 'height' for sub region in there (e.g. guest's
scanouts).

Cc: Gerd Hoffmann 
Cc: Marc-André Lureau 
Cc: Vivek Kasireddy 
Signed-off-by: Dongwon Kim 
---
 hw/display/virtio-gpu-udmabuf.c | 12 ++--
 include/ui/console.h|  4 ++--
 ui/dbus-listener.c  |  8 
 ui/egl-helpers.c|  8 
 ui/gtk-egl.c|  8 
 ui/gtk-gl-area.c|  6 +++---
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/hw/display/virtio-gpu-udmabuf.c b/hw/display/virtio-gpu-udmabuf.c
index ef1a740de5..047758582c 100644
--- a/hw/display/virtio-gpu-udmabuf.c
+++ b/hw/display/virtio-gpu-udmabuf.c
@@ -181,13 +181,13 @@ static VGPUDMABuf
 }
 
 dmabuf = g_new0(VGPUDMABuf, 1);
-dmabuf->buf.width = fb->width;
-dmabuf->buf.height = fb->height;
+dmabuf->buf.width = r->width;
+dmabuf->buf.height = r->height;
 dmabuf->buf.stride = fb->stride;
 dmabuf->buf.x = r->x;
 dmabuf->buf.y = r->y;
-dmabuf->buf.scanout_width = r->width;
-dmabuf->buf.scanout_height = r->height;
+dmabuf->buf.backing_width = fb->width;
+dmabuf->buf.backing_height = fb->height;;
 dmabuf->buf.fourcc = qemu_pixman_to_drm_format(fb->format);
 dmabuf->buf.fd = res->dmabuf_fd;
 dmabuf->buf.allow_fences = true;
@@ -218,8 +218,8 @@ int virtio_gpu_update_dmabuf(VirtIOGPU *g,
 
 g->dmabuf.primary[scanout_id] = new_primary;
 qemu_console_resize(scanout->con,
-new_primary->buf.scanout_width,
-new_primary->buf.scanout_height);
+new_primary->buf.width,
+new_primary->buf.height);
 dpy_gl_scanout_dmabuf(scanout->con, &new_primary->buf);
 
 if (old_primary) {
diff --git a/include/ui/console.h b/include/ui/console.h
index f27b2aad4f..3e8b22d6c6 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -201,8 +201,8 @@ typedef struct QemuDmaBuf {
 uint32_t  texture;
 uint32_t  x;
 uint32_t  y;
-uint32_t  scanout_width;
-uint32_t  scanout_height;
+uint32_t  backing_width;
+uint32_t  backing_height;
 bool  y0_top;
 void  *sync;
 int   fence_fd;
diff --git a/ui/dbus-listener.c b/ui/dbus-listener.c
index 0240c39510..68ff343799 100644
--- a/ui/dbus-listener.c
+++ b/ui/dbus-listener.c
@@ -415,13 +415,13 @@ static void dbus_scanout_texture(DisplayChangeListener 
*dcl,
backing_width, backing_height, x, y, w, h);
 #ifdef CONFIG_GBM
 QemuDmaBuf dmabuf = {
-.width = backing_width,
-.height = backing_height,
+.width = w,
+.height = h,
 .y0_top = backing_y_0_top,
 .x = x,
 .y = y,
-.scanout_width = w,
-.scanout_height = h,
+.backing_width = backing_width,
+.backing_height = backing_height,
 };
 
 assert(tex_id);
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index 8f9fbf583e..bc0960a9ec 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -148,8 +148,8 @@ void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip)
 if (src->dmabuf) {
 x1 = src->dmabuf->x;
 y1 = src->dmabuf->y;
-w = src->dmabuf->scanout_width;
-h = src->dmabuf->scanout_height;
+w = src->dmabuf->width;
+h = src->dmabuf->height;
 }
 
 w = (x1 + w) > src->width ? src->width - x1 : w;
@@ -314,9 +314,9 @@ void egl_dmabuf_import_texture(QemuDmaBuf *dmabuf)
 }
 
 attrs[i++] = EGL_WIDTH;
-attrs[i++] = dmabuf->width;
+attrs[i++] = dmabuf->backing_width;
 attrs[i++] = EGL_HEIGHT;
-attrs[i++] = dmabuf->height;
+attrs[i++] = dmabuf->backing_height;;
 attrs[i++] = EGL_LINUX_DRM_FOURCC_EXT;
 attrs[i++] = dmabuf->fourcc;
 
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index d59b8cd7d7..a37ad6c9db 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -258,9 +258,9 @@ void gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,
 }
 
 gd_egl_scanout_texture(dcl, dmabuf->texture,
-   dmabuf->y0_top, dmabuf->width, dmabuf->height,
-   dmabuf->x, dmabuf->y, dmabuf->scanout_width,
-   dmabuf->scanout_height, NULL);
+   dmabuf->y0_top, dmabuf->backing_width, 
dmabuf->backing_height,
+   dmabuf->x, dmabuf->y, dmabuf->width,
+   dmabuf->height, NULL);
 
 if (dmabuf->allow_fences) {
 vc->gfx.guest_fb.dmabuf = dmabuf;
@@ -280,7 +280,7 @@ void gd_egl_cursor_dmabuf(DisplayChangeListener *dcl,
 if (!dmabuf->texture) {
 return;
 }
-egl_fb_setup_for_tex(&vc->gfx.cursor_fb, dmabuf->width, dmabuf->height,
+egl_fb_setup_for_tex(&vc->gfx

[PATCH 01/11] tpm_crb: refactor common code

In preparation for the SysBus variant, we move common code styled
after the TPM TIS devices.

To maintain compatibility, we do not rename the existing tpm-crb
device.

Signed-off-by: Joelle van Dyne 
---
 docs/specs/tpm.rst  |   1 +
 hw/tpm/tpm_crb.h|  76 +++
 hw/tpm/tpm_crb.c| 270 ++--
 hw/tpm/tpm_crb_common.c | 218 
 hw/tpm/meson.build  |   1 +
 hw/tpm/trace-events |   2 +-
 6 files changed, 333 insertions(+), 235 deletions(-)
 create mode 100644 hw/tpm/tpm_crb.h
 create mode 100644 hw/tpm/tpm_crb_common.c

diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst
index efe124a148..2bc29c9804 100644
--- a/docs/specs/tpm.rst
+++ b/docs/specs/tpm.rst
@@ -45,6 +45,7 @@ operating system.
 
 QEMU files related to TPM CRB interface:
  - ``hw/tpm/tpm_crb.c``
+ - ``hw/tpm/tpm_crb_common.c``
 
 SPAPR interface
 ---
diff --git a/hw/tpm/tpm_crb.h b/hw/tpm/tpm_crb.h
new file mode 100644
index 00..da3a0cf256
--- /dev/null
+++ b/hw/tpm/tpm_crb.h
@@ -0,0 +1,76 @@
+/*
+ * tpm_crb.h - QEMU's TPM CRB interface emulator
+ *
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * Authors:
+ *   Marc-André Lureau 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * tpm_crb is a device for TPM 2.0 Command Response Buffer (CRB) Interface
+ * as defined in TCG PC Client Platform TPM Profile (PTP) Specification
+ * Family “2.0” Level 00 Revision 01.03 v22
+ */
+#ifndef TPM_TPM_CRB_H
+#define TPM_TPM_CRB_H
+
+#include "exec/memory.h"
+#include "hw/acpi/tpm.h"
+#include "sysemu/tpm_backend.h"
+#include "tpm_ppi.h"
+
+#define CRB_CTRL_CMD_SIZE (TPM_CRB_ADDR_SIZE - A_CRB_DATA_BUFFER)
+
+typedef struct TPMCRBState {
+TPMBackend *tpmbe;
+TPMBackendCmd cmd;
+uint32_t regs[TPM_CRB_R_MAX];
+MemoryRegion mmio;
+MemoryRegion cmdmem;
+
+size_t be_buffer_size;
+
+bool ppi_enabled;
+TPMPPI ppi;
+} TPMCRBState;
+
+#define CRB_INTF_TYPE_CRB_ACTIVE 0b1
+#define CRB_INTF_VERSION_CRB 0b1
+#define CRB_INTF_CAP_LOCALITY_0_ONLY 0b0
+#define CRB_INTF_CAP_IDLE_FAST 0b0
+#define CRB_INTF_CAP_XFER_SIZE_64 0b11
+#define CRB_INTF_CAP_FIFO_NOT_SUPPORTED 0b0
+#define CRB_INTF_CAP_CRB_SUPPORTED 0b1
+#define CRB_INTF_IF_SELECTOR_CRB 0b1
+
+enum crb_loc_ctrl {
+CRB_LOC_CTRL_REQUEST_ACCESS = BIT(0),
+CRB_LOC_CTRL_RELINQUISH = BIT(1),
+CRB_LOC_CTRL_SEIZE = BIT(2),
+CRB_LOC_CTRL_RESET_ESTABLISHMENT_BIT = BIT(3),
+};
+
+enum crb_ctrl_req {
+CRB_CTRL_REQ_CMD_READY = BIT(0),
+CRB_CTRL_REQ_GO_IDLE = BIT(1),
+};
+
+enum crb_start {
+CRB_START_INVOKE = BIT(0),
+};
+
+enum crb_cancel {
+CRB_CANCEL_INVOKE = BIT(0),
+};
+
+#define TPM_CRB_NO_LOCALITY 0xff
+
+void tpm_crb_request_completed(TPMCRBState *s, int ret);
+enum TPMVersion tpm_crb_get_version(TPMCRBState *s);
+int tpm_crb_pre_save(TPMCRBState *s);
+void tpm_crb_reset(TPMCRBState *s, uint64_t baseaddr);
+void tpm_crb_init_memory(Object *obj, TPMCRBState *s, Error **errp);
+
+#endif /* TPM_TPM_CRB_H */
diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index ea930da545..3ef4977fb5 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -31,257 +31,62 @@
 #include "tpm_ppi.h"
 #include "trace.h"
 #include "qom/object.h"
+#include "tpm_crb.h"
 
 struct CRBState {
 DeviceState parent_obj;
 
-TPMBackend *tpmbe;
-TPMBackendCmd cmd;
-uint32_t regs[TPM_CRB_R_MAX];
-MemoryRegion mmio;
-MemoryRegion cmdmem;
-
-size_t be_buffer_size;
-
-bool ppi_enabled;
-TPMPPI ppi;
+TPMCRBState state;
 };
 typedef struct CRBState CRBState;
 
 DECLARE_INSTANCE_CHECKER(CRBState, CRB,
  TYPE_TPM_CRB)
 
-#define CRB_INTF_TYPE_CRB_ACTIVE 0b1
-#define CRB_INTF_VERSION_CRB 0b1
-#define CRB_INTF_CAP_LOCALITY_0_ONLY 0b0
-#define CRB_INTF_CAP_IDLE_FAST 0b0
-#define CRB_INTF_CAP_XFER_SIZE_64 0b11
-#define CRB_INTF_CAP_FIFO_NOT_SUPPORTED 0b0
-#define CRB_INTF_CAP_CRB_SUPPORTED 0b1
-#define CRB_INTF_IF_SELECTOR_CRB 0b1
-
-#define CRB_CTRL_CMD_SIZE (TPM_CRB_ADDR_SIZE - A_CRB_DATA_BUFFER)
-
-enum crb_loc_ctrl {
-CRB_LOC_CTRL_REQUEST_ACCESS = BIT(0),
-CRB_LOC_CTRL_RELINQUISH = BIT(1),
-CRB_LOC_CTRL_SEIZE = BIT(2),
-CRB_LOC_CTRL_RESET_ESTABLISHMENT_BIT = BIT(3),
-};
-
-enum crb_ctrl_req {
-CRB_CTRL_REQ_CMD_READY = BIT(0),
-CRB_CTRL_REQ_GO_IDLE = BIT(1),
-};
-
-enum crb_start {
-CRB_START_INVOKE = BIT(0),
-};
-
-enum crb_cancel {
-CRB_CANCEL_INVOKE = BIT(0),
-};
-
-#define TPM_CRB_NO_LOCALITY 0xff
-
-static uint64_t tpm_crb_mmio_read(void *opaque, hwaddr addr,
-  unsigned size)
-{
-CRBState *s = CRB(opaque);
-void *regs = (void *)&s->regs + (addr & ~3);
-unsigned offset = addr & 3;
-uint32_t val = *(uint32_t *)regs >> (8 * offset);
-
-switch (addr) {
-case A_CRB_LOC_STATE:
-val |= !tpm_backend_get_tpm_established_flag(s->tpmbe);
-break;
-

[PATCH 11/11] tpm_crb_sysbus: introduce TPM CRB SysBus device

This SysBus variant of the CRB interface supports dynamically locating
the MMIO interface so that Virt machines can use it. This interface
is currently the only one supported by QEMU that works on Windows 11
ARM64. We largely follow the TPM TIS SysBus device as a template.

Signed-off-by: Joelle van Dyne 
---
 docs/specs/tpm.rst  |   1 +
 include/hw/acpi/aml-build.h |   1 +
 include/sysemu/tpm.h|   3 +
 hw/acpi/aml-build.c |   7 +-
 hw/arm/virt.c   |   1 +
 hw/core/sysbus-fdt.c|   1 +
 hw/loongarch/virt.c |   1 +
 hw/riscv/virt.c |   1 +
 hw/tpm/tpm_crb_sysbus.c | 178 
 hw/arm/Kconfig  |   1 +
 hw/riscv/Kconfig|   1 +
 hw/tpm/Kconfig  |   5 +
 hw/tpm/meson.build  |   2 +
 13 files changed, 202 insertions(+), 1 deletion(-)
 create mode 100644 hw/tpm/tpm_crb_sysbus.c

diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst
index 2bc29c9804..95aeb49220 100644
--- a/docs/specs/tpm.rst
+++ b/docs/specs/tpm.rst
@@ -46,6 +46,7 @@ operating system.
 QEMU files related to TPM CRB interface:
  - ``hw/tpm/tpm_crb.c``
  - ``hw/tpm/tpm_crb_common.c``
+ - ``hw/tpm/tpm_crb_sysbus.c``
 
 SPAPR interface
 ---
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index d1fb08514b..9660e16148 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -3,6 +3,7 @@
 
 #include "hw/acpi/acpi-defs.h"
 #include "hw/acpi/bios-linker-loader.h"
+#include "exec/hwaddr.h"
 
 #define ACPI_BUILD_APPNAME6 "BOCHS "
 #define ACPI_BUILD_APPNAME8 "BXPC"
diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h
index 66e3b45f30..f79c8f3575 100644
--- a/include/sysemu/tpm.h
+++ b/include/sysemu/tpm.h
@@ -47,6 +47,7 @@ struct TPMIfClass {
 #define TYPE_TPM_TIS_ISA"tpm-tis"
 #define TYPE_TPM_TIS_SYSBUS "tpm-tis-device"
 #define TYPE_TPM_CRB"tpm-crb"
+#define TYPE_TPM_CRB_SYSBUS "tpm-crb-device"
 #define TYPE_TPM_SPAPR  "tpm-spapr"
 #define TYPE_TPM_TIS_I2C"tpm-tis-i2c"
 
@@ -56,6 +57,8 @@ struct TPMIfClass {
 object_dynamic_cast(OBJECT(chr), TYPE_TPM_TIS_SYSBUS)
 #define TPM_IS_CRB(chr) \
 object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB)
+#define TPM_IS_CRB_SYSBUS(chr)  \
+object_dynamic_cast(OBJECT(chr), TYPE_TPM_CRB_SYSBUS)
 #define TPM_IS_SPAPR(chr)   \
 object_dynamic_cast(OBJECT(chr), TYPE_TPM_SPAPR)
 #define TPM_IS_TIS_I2C(chr)  \
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index ea331a20d1..f809137fc9 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -31,6 +31,7 @@
 #include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_bridge.h"
 #include "qemu/cutils.h"
+#include "qom/object.h"
 
 static GArray *build_alloc_array(void)
 {
@@ -2218,7 +2219,7 @@ void build_tpm2(GArray *table_data, BIOSLinker *linker, 
GArray *tcpalog,
 {
 uint8_t start_method_params[12] = {};
 unsigned log_addr_offset;
-uint64_t control_area_start_address;
+uint64_t baseaddr, control_area_start_address;
 TPMIf *tpmif = tpm_find();
 uint32_t start_method;
 AcpiTable table = { .sig = "TPM2", .rev = 4,
@@ -2236,6 +2237,10 @@ void build_tpm2(GArray *table_data, BIOSLinker *linker, 
GArray *tcpalog,
 } else if (TPM_IS_CRB(tpmif)) {
 control_area_start_address = TPM_CRB_ADDR_CTRL;
 start_method = TPM2_START_METHOD_CRB;
+} else if (TPM_IS_CRB_SYSBUS(tpmif)) {
+baseaddr = object_property_get_uint(OBJECT(tpmif), "baseaddr", NULL);
+control_area_start_address = baseaddr + A_CRB_CTRL_REQ;
+start_method = TPM2_START_METHOD_CRB;
 } else {
 g_assert_not_reached();
 }
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 432148ef47..88e8b16103 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2977,6 +2977,7 @@ static void virt_machine_class_init(ObjectClass *oc, void 
*data)
 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
 #ifdef CONFIG_TPM
 machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
+machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_CRB_SYSBUS);
 #endif
 mc->block_default_type = IF_VIRTIO;
 mc->no_cdrom = 1;
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index eebcd28f9a..9c783f88eb 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -493,6 +493,7 @@ static const BindingEntry bindings[] = {
 #endif
 #ifdef CONFIG_TPM
 TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node),
+TYPE_BINDING(TYPE_TPM_CRB_SYSBUS, no_fdt_node),
 #endif
 TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
 TYPE_BINDING("", NULL), /* last element */
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 9c536c52bc..eb59fb04ee 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -1194,6 +1194,7 @@ static void loongarch_class_init(ObjectCl

[PATCH 07/11] hw/arm/virt: add plug handler for TPM on SysBus

TPM needs to know its own base address in order to generate its DSDT
device entry.

Signed-off-by: Joelle van Dyne 
---
 hw/arm/virt.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 7d9dbc2663..432148ef47 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2732,6 +2732,37 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev,
  dev, &error_abort);
 }
 
+#ifdef CONFIG_TPM
+static void virt_tpm_plug(VirtMachineState *vms, TPMIf *tpmif)
+{
+PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
+hwaddr pbus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
+SysBusDevice *sbdev = SYS_BUS_DEVICE(tpmif);
+MemoryRegion *sbdev_mr;
+hwaddr tpm_base;
+uint64_t tpm_size;
+
+if (!sbdev || !object_dynamic_cast(OBJECT(sbdev), TYPE_SYS_BUS_DEVICE)) {
+return;
+}
+
+tpm_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+assert(tpm_base != -1);
+
+tpm_base += pbus_base;
+
+sbdev_mr = sysbus_mmio_get_region(sbdev, 0);
+tpm_size = memory_region_size(sbdev_mr);
+
+if (object_property_find(OBJECT(sbdev), "baseaddr")) {
+object_property_set_uint(OBJECT(sbdev), "baseaddr", tpm_base, NULL);
+}
+if (object_property_find(OBJECT(sbdev), "size")) {
+object_property_set_uint(OBJECT(sbdev), "size", tpm_size, NULL);
+}
+}
+#endif
+
 static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
 DeviceState *dev, Error **errp)
 {
@@ -2803,6 +2834,12 @@ static void virt_machine_device_plug_cb(HotplugHandler 
*hotplug_dev,
 vms->virtio_iommu_bdf = pci_get_bdf(pdev);
 create_virtio_iommu_dt_bindings(vms);
 }
+
+#ifdef CONFIG_TPM
+if (object_dynamic_cast(OBJECT(dev), TYPE_TPM_IF)) {
+virt_tpm_plug(vms, TPM_IF(dev));
+}
+#endif
 }
 
 static void virt_dimm_unplug_request(HotplugHandler *hotplug_dev,
-- 
2.39.2 (Apple Git-143)

[PATCH 10/11] tpm_tis_sysbus: move DSDT AML generation to device

This reduces redundent code in different machine types with ACPI table
generation. Additionally, this will allow us to support multiple TPM
interfaces. Finally, this matches up with the TPM TIS ISA
implementation.

Ideally, we would be able to call `qbus_build_aml` and avoid any TPM
specific code in the ACPI table generation. However, currently we
still have to call `build_tpm2` anyways and it does not look like
most other ACPI devices support the `ACPI_DEV_AML_IF` interface.

Signed-off-by: Joelle van Dyne 
---
 hw/arm/virt-acpi-build.c  | 38 ++
 hw/loongarch/acpi-build.c | 38 ++
 hw/tpm/tpm_tis_sysbus.c   | 39 +++
 3 files changed, 43 insertions(+), 72 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6b674231c2..49b2f19440 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -35,6 +35,7 @@
 #include "target/arm/cpu.h"
 #include "hw/acpi/acpi-defs.h"
 #include "hw/acpi/acpi.h"
+#include "hw/acpi/acpi_aml_interface.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/acpi/bios-linker-loader.h"
 #include "hw/acpi/aml-build.h"
@@ -208,41 +209,6 @@ static void acpi_dsdt_add_gpio(Aml *scope, const 
MemMapEntry *gpio_memmap,
 aml_append(scope, dev);
 }
 
-#ifdef CONFIG_TPM
-static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms)
-{
-PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
-hwaddr pbus_base = vms->memmap[VIRT_PLATFORM_BUS].base;
-SysBusDevice *sbdev = SYS_BUS_DEVICE(tpm_find());
-MemoryRegion *sbdev_mr;
-hwaddr tpm_base;
-
-if (!sbdev) {
-return;
-}
-
-tpm_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
-assert(tpm_base != -1);
-
-tpm_base += pbus_base;
-
-sbdev_mr = sysbus_mmio_get_region(sbdev, 0);
-
-Aml *dev = aml_device("TPM0");
-aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
-aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
-aml_append(dev, aml_name_decl("_UID", aml_int(0)));
-
-Aml *crs = aml_resource_template();
-aml_append(crs,
-   aml_memory32_fixed(tpm_base,
-  (uint32_t)memory_region_size(sbdev_mr),
-  AML_READ_WRITE));
-aml_append(dev, aml_name_decl("_CRS", crs));
-aml_append(scope, dev);
-}
-#endif
-
 #define ID_MAPPING_ENTRY_SIZE 20
 #define SMMU_V3_ENTRY_SIZE 68
 #define ROOT_COMPLEX_ENTRY_SIZE 36
@@ -891,7 +857,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 
 acpi_dsdt_add_power_button(scope);
 #ifdef CONFIG_TPM
-acpi_dsdt_add_tpm(scope, vms);
+call_dev_aml_func(DEVICE(tpm_find()), scope);
 #endif
 
 aml_append(dsdt, scope);
diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c
index 0b62c3a2f7..4291e670c8 100644
--- a/hw/loongarch/acpi-build.c
+++ b/hw/loongarch/acpi-build.c
@@ -14,6 +14,7 @@
 #include "target/loongarch/cpu.h"
 #include "hw/acpi/acpi-defs.h"
 #include "hw/acpi/acpi.h"
+#include "hw/acpi/acpi_aml_interface.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/acpi/bios-linker-loader.h"
 #include "migration/vmstate.h"
@@ -328,41 +329,6 @@ static void build_flash_aml(Aml *scope, 
LoongArchMachineState *lams)
 aml_append(scope, dev);
 }
 
-#ifdef CONFIG_TPM
-static void acpi_dsdt_add_tpm(Aml *scope, LoongArchMachineState *vms)
-{
-PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
-hwaddr pbus_base = VIRT_PLATFORM_BUS_BASEADDRESS;
-SysBusDevice *sbdev = SYS_BUS_DEVICE(tpm_find());
-MemoryRegion *sbdev_mr;
-hwaddr tpm_base;
-
-if (!sbdev) {
-return;
-}
-
-tpm_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
-assert(tpm_base != -1);
-
-tpm_base += pbus_base;
-
-sbdev_mr = sysbus_mmio_get_region(sbdev, 0);
-
-Aml *dev = aml_device("TPM0");
-aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
-aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
-aml_append(dev, aml_name_decl("_UID", aml_int(0)));
-
-Aml *crs = aml_resource_template();
-aml_append(crs,
-   aml_memory32_fixed(tpm_base,
-  (uint32_t)memory_region_size(sbdev_mr),
-  AML_READ_WRITE));
-aml_append(dev, aml_name_decl("_CRS", crs));
-aml_append(scope, dev);
-}
-#endif
-
 /* build DSDT */
 static void
 build_dsdt(GArray *table_data, BIOSLinker *linker, MachineState *machine)
@@ -379,7 +345,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
 build_la_ged_aml(dsdt, machine);
 build_flash_aml(dsdt, lams);
 #ifdef CONFIG_TPM
-acpi_dsdt_add_tpm(dsdt, lams);
+call_dev_aml_func(DEVICE(tpm_find()), dsdt);
 #endif
 /* System State Package */
 scope = aml_scope("\\");
diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbu

[PATCH 04/11] tpm_crb: use a single read-as-mem/write-as-mmio mapping

On Apple Silicon, when Windows performs a LDP on the CRB MMIO space,
the exception is not decoded by hardware and we cannot trap the MMIO
read. This led to the idea from @agraf to use the same mapping type as
ROM devices: namely that reads should be seen as memory type and
writes should trap as MMIO.

Once that was done, the second memory mapping of the command buffer
region was redundent and was removed.

A note about the removal of the read trap for `CRB_LOC_STATE`:
The only usage was to return the most up-to-date value for
`tpmEstablished`. However, `tpmEstablished` is only set when a
TPM2_HashStart operation is called which only exists for locality 4.
Indeed, the comment for the write handler of `CRB_LOC_CTRL` makes the
same argument for why it is not calling the backend to reset the
`tpmEstablished` bit. As this bit is unused, we do not need to worry
about updating it for reads.

Signed-off-by: Joelle van Dyne 
---
 hw/tpm/tpm_crb.h|   2 -
 hw/tpm/tpm_crb.c|   3 -
 hw/tpm/tpm_crb_common.c | 124 
 3 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/hw/tpm/tpm_crb.h b/hw/tpm/tpm_crb.h
index da3a0cf256..7cdd37335f 100644
--- a/hw/tpm/tpm_crb.h
+++ b/hw/tpm/tpm_crb.h
@@ -26,9 +26,7 @@
 typedef struct TPMCRBState {
 TPMBackend *tpmbe;
 TPMBackendCmd cmd;
-uint32_t regs[TPM_CRB_R_MAX];
 MemoryRegion mmio;
-MemoryRegion cmdmem;
 
 size_t be_buffer_size;
 
diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index 598c3e0161..07c6868d8d 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -68,7 +68,6 @@ static const VMStateDescription vmstate_tpm_crb_none = {
 .name = "tpm-crb",
 .pre_save = tpm_crb_none_pre_save,
 .fields = (VMStateField[]) {
-VMSTATE_UINT32_ARRAY(state.regs, CRBState, TPM_CRB_R_MAX),
 VMSTATE_END_OF_LIST(),
 }
 };
@@ -103,8 +102,6 @@ static void tpm_crb_none_realize(DeviceState *dev, Error 
**errp)
 
 memory_region_add_subregion(get_system_memory(),
 TPM_CRB_ADDR_BASE, &s->state.mmio);
-memory_region_add_subregion(get_system_memory(),
-TPM_CRB_ADDR_BASE + sizeof(s->state.regs), &s->state.cmdmem);
 
 if (s->state.ppi_enabled) {
 memory_region_add_subregion(get_system_memory(),
diff --git a/hw/tpm/tpm_crb_common.c b/hw/tpm/tpm_crb_common.c
index e56e910670..f3e40095e3 100644
--- a/hw/tpm/tpm_crb_common.c
+++ b/hw/tpm/tpm_crb_common.c
@@ -33,31 +33,12 @@
 #include "qom/object.h"
 #include "tpm_crb.h"
 
-static uint64_t tpm_crb_mmio_read(void *opaque, hwaddr addr,
-  unsigned size)
+static uint8_t tpm_crb_get_active_locty(TPMCRBState *s, uint32_t *regs)
 {
-TPMCRBState *s = opaque;
-void *regs = (void *)&s->regs + (addr & ~3);
-unsigned offset = addr & 3;
-uint32_t val = *(uint32_t *)regs >> (8 * offset);
-
-switch (addr) {
-case A_CRB_LOC_STATE:
-val |= !tpm_backend_get_tpm_established_flag(s->tpmbe);
-break;
-}
-
-trace_tpm_crb_mmio_read(addr, size, val);
-
-return val;
-}
-
-static uint8_t tpm_crb_get_active_locty(TPMCRBState *s)
-{
-if (!ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, locAssigned)) {
+if (!ARRAY_FIELD_EX32(regs, CRB_LOC_STATE, locAssigned)) {
 return TPM_CRB_NO_LOCALITY;
 }
-return ARRAY_FIELD_EX32(s->regs, CRB_LOC_STATE, activeLocality);
+return ARRAY_FIELD_EX32(regs, CRB_LOC_STATE, activeLocality);
 }
 
 static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
@@ -65,35 +46,47 @@ static void tpm_crb_mmio_write(void *opaque, hwaddr addr,
 {
 TPMCRBState *s = opaque;
 uint8_t locty =  addr >> 12;
+uint32_t *regs;
+void *mem;
 
 trace_tpm_crb_mmio_write(addr, size, val);
+regs = memory_region_get_ram_ptr(&s->mmio);
+mem = ®s[R_CRB_DATA_BUFFER];
+assert(regs);
+
+if (addr >= A_CRB_DATA_BUFFER) {
+assert(addr + size <= TPM_CRB_ADDR_SIZE);
+assert(size <= sizeof(val));
+memcpy(mem + addr - A_CRB_DATA_BUFFER, &val, size);
+memory_region_set_dirty(&s->mmio, addr, size);
+return;
+}
 
 switch (addr) {
 case A_CRB_CTRL_REQ:
 switch (val) {
 case CRB_CTRL_REQ_CMD_READY:
-ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
+ARRAY_FIELD_DP32(regs, CRB_CTRL_STS,
  tpmIdle, 0);
 break;
 case CRB_CTRL_REQ_GO_IDLE:
-ARRAY_FIELD_DP32(s->regs, CRB_CTRL_STS,
+ARRAY_FIELD_DP32(regs, CRB_CTRL_STS,
  tpmIdle, 1);
 break;
 }
 break;
 case A_CRB_CTRL_CANCEL:
 if (val == CRB_CANCEL_INVOKE &&
-s->regs[R_CRB_CTRL_START] & CRB_START_INVOKE) {
+regs[R_CRB_CTRL_START] & CRB_START_INVOKE) {
 tpm_backend_cancel_cmd(s->tpmbe);
 }
 break;
 case A_CRB_CTRL_START:
 if (val == CRB_START_INVOKE &&
-!(s->regs[R_CRB_CTRL_STAR

[PATCH 03/11] tpm_ppi: refactor memory space initialization

Instead of calling `memory_region_add_subregion` directly, we defer to
the caller to do it. This allows us to re-use the code for a SysBus
device.

Signed-off-by: Joelle van Dyne 
---
 hw/tpm/tpm_ppi.h| 10 +++---
 hw/tpm/tpm_crb.c|  4 ++--
 hw/tpm/tpm_crb_common.c |  3 +++
 hw/tpm/tpm_ppi.c|  5 +
 hw/tpm/tpm_tis_isa.c|  5 +++--
 5 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/hw/tpm/tpm_ppi.h b/hw/tpm/tpm_ppi.h
index bf5d4a300f..30863c6438 100644
--- a/hw/tpm/tpm_ppi.h
+++ b/hw/tpm/tpm_ppi.h
@@ -20,17 +20,13 @@ typedef struct TPMPPI {
 } TPMPPI;
 
 /**
- * tpm_ppi_init:
+ * tpm_ppi_init_memory:
  * @tpmppi: a TPMPPI
- * @m: the address-space / MemoryRegion to use
- * @addr: the address of the PPI region
  * @obj: the owner object
  *
- * Register the TPM PPI memory region at @addr on the given address
- * space for the object @obj.
+ * Creates the TPM PPI memory region.
  **/
-void tpm_ppi_init(TPMPPI *tpmppi, MemoryRegion *m,
-  hwaddr addr, Object *obj);
+void tpm_ppi_init_memory(TPMPPI *tpmppi, Object *obj);
 
 /**
  * tpm_ppi_reset:
diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index 3ef4977fb5..598c3e0161 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -107,8 +107,8 @@ static void tpm_crb_none_realize(DeviceState *dev, Error 
**errp)
 TPM_CRB_ADDR_BASE + sizeof(s->state.regs), &s->state.cmdmem);
 
 if (s->state.ppi_enabled) {
-tpm_ppi_init(&s->state.ppi, get_system_memory(),
- TPM_PPI_ADDR_BASE, OBJECT(s));
+memory_region_add_subregion(get_system_memory(),
+TPM_PPI_ADDR_BASE, &s->state.ppi.ram);
 }
 
 if (xen_enabled()) {
diff --git a/hw/tpm/tpm_crb_common.c b/hw/tpm/tpm_crb_common.c
index 228e2d0faf..e56e910670 100644
--- a/hw/tpm/tpm_crb_common.c
+++ b/hw/tpm/tpm_crb_common.c
@@ -216,4 +216,7 @@ void tpm_crb_init_memory(Object *obj, TPMCRBState *s, Error 
**errp)
 "tpm-crb-mmio", sizeof(s->regs));
 memory_region_init_ram(&s->cmdmem, obj,
 "tpm-crb-cmd", CRB_CTRL_CMD_SIZE, errp);
+if (s->ppi_enabled) {
+tpm_ppi_init_memory(&s->ppi, obj);
+}
 }
diff --git a/hw/tpm/tpm_ppi.c b/hw/tpm/tpm_ppi.c
index 7f74e26ec6..40cab59afa 100644
--- a/hw/tpm/tpm_ppi.c
+++ b/hw/tpm/tpm_ppi.c
@@ -44,14 +44,11 @@ void tpm_ppi_reset(TPMPPI *tpmppi)
 }
 }
 
-void tpm_ppi_init(TPMPPI *tpmppi, MemoryRegion *m,
-  hwaddr addr, Object *obj)
+void tpm_ppi_init_memory(TPMPPI *tpmppi, Object *obj)
 {
 tpmppi->buf = qemu_memalign(qemu_real_host_page_size(),
 HOST_PAGE_ALIGN(TPM_PPI_ADDR_SIZE));
 memory_region_init_ram_device_ptr(&tpmppi->ram, obj, "tpm-ppi",
   TPM_PPI_ADDR_SIZE, tpmppi->buf);
 vmstate_register_ram(&tpmppi->ram, DEVICE(obj));
-
-memory_region_add_subregion(m, addr, &tpmppi->ram);
 }
diff --git a/hw/tpm/tpm_tis_isa.c b/hw/tpm/tpm_tis_isa.c
index 91e3792248..7cd7415f30 100644
--- a/hw/tpm/tpm_tis_isa.c
+++ b/hw/tpm/tpm_tis_isa.c
@@ -134,8 +134,9 @@ static void tpm_tis_isa_realizefn(DeviceState *dev, Error 
**errp)
 TPM_TIS_ADDR_BASE, &s->mmio);
 
 if (s->ppi_enabled) {
-tpm_ppi_init(&s->ppi, isa_address_space(ISA_DEVICE(dev)),
- TPM_PPI_ADDR_BASE, OBJECT(dev));
+tpm_ppi_init_memory(&s->ppi, OBJECT(dev));
+memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)),
+TPM_PPI_ADDR_BASE, &s->ppi.ram);
 }
 }
 
-- 
2.39.2 (Apple Git-143)

[PATCH 08/11] hw/loongarch/virt: add plug handler for TPM on SysBus

TPM needs to know its own base address in order to generate its DSDT
device entry.

Signed-off-by: Joelle van Dyne 
---
 hw/loongarch/virt.c | 37 +
 1 file changed, 37 insertions(+)

diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index e19b042ce8..9c536c52bc 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -1040,6 +1040,37 @@ static void virt_mem_plug(HotplugHandler *hotplug_dev,
  dev, &error_abort);
 }
 
+#ifdef CONFIG_TPM
+static void virt_tpm_plug(LoongArchMachineState *lams, TPMIf *tpmif)
+{
+PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(lams->platform_bus_dev);
+hwaddr pbus_base = VIRT_PLATFORM_BUS_BASEADDRESS;
+SysBusDevice *sbdev = SYS_BUS_DEVICE(tpmif);
+MemoryRegion *sbdev_mr;
+hwaddr tpm_base;
+uint64_t tpm_size;
+
+if (!sbdev || !object_dynamic_cast(OBJECT(sbdev), TYPE_SYS_BUS_DEVICE)) {
+return;
+}
+
+tpm_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+assert(tpm_base != -1);
+
+tpm_base += pbus_base;
+
+sbdev_mr = sysbus_mmio_get_region(sbdev, 0);
+tpm_size = memory_region_size(sbdev_mr);
+
+if (object_property_find(OBJECT(sbdev), "baseaddr")) {
+object_property_set_uint(OBJECT(sbdev), "baseaddr", tpm_base, NULL);
+}
+if (object_property_find(OBJECT(sbdev), "size")) {
+object_property_set_uint(OBJECT(sbdev), "size", tpm_size, NULL);
+}
+}
+#endif
+
 static void loongarch_machine_device_plug_cb(HotplugHandler *hotplug_dev,
 DeviceState *dev, Error **errp)
 {
@@ -1054,6 +1085,12 @@ static void 
loongarch_machine_device_plug_cb(HotplugHandler *hotplug_dev,
 } else if (memhp_type_supported(dev)) {
 virt_mem_plug(hotplug_dev, dev, errp);
 }
+
+#ifdef CONFIG_TPM
+if (object_dynamic_cast(OBJECT(dev), TYPE_TPM_IF)) {
+virt_tpm_plug(lams, TPM_IF(dev));
+}
+#endif
 }
 
 static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
-- 
2.39.2 (Apple Git-143)

[PATCH 06/11] tpm_crb: move ACPI table building to device interface

This logic is similar to TPM TIS ISA device.

Signed-off-by: Joelle van Dyne 
---
 hw/i386/acpi-build.c | 23 ---
 hw/tpm/tpm_crb.c | 28 
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 9c74fa17ad..b767df39df 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1441,9 +1441,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 uint32_t nr_mem = machine->ram_slots;
 int root_bus_limit = 0xFF;
 PCIBus *bus = NULL;
-#ifdef CONFIG_TPM
-TPMIf *tpm = tpm_find();
-#endif
 bool cxl_present = false;
 int i;
 VMBusBridge *vmbus_bridge = vmbus_bridge_find();
@@ -1793,26 +1790,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
 }
 }
 
-#ifdef CONFIG_TPM
-if (TPM_IS_CRB(tpm)) {
-dev = aml_device("TPM");
-aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
-aml_append(dev, aml_name_decl("_STR",
-  aml_string("TPM 2.0 Device")));
-crs = aml_resource_template();
-aml_append(crs, aml_memory32_fixed(TPM_CRB_ADDR_BASE,
-   TPM_CRB_ADDR_SIZE, AML_READ_WRITE));
-aml_append(dev, aml_name_decl("_CRS", crs));
-
-aml_append(dev, aml_name_decl("_STA", aml_int(0xf)));
-aml_append(dev, aml_name_decl("_UID", aml_int(1)));
-
-tpm_build_ppi_acpi(tpm, dev);
-
-aml_append(sb_scope, dev);
-}
-#endif
-
 if (pcms->sgx_epc.size != 0) {
 uint64_t epc_base = pcms->sgx_epc.base;
 uint64_t epc_size = pcms->sgx_epc.size;
diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index 6144081d30..14feb9857f 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -19,6 +19,8 @@
 #include "qemu/module.h"
 #include "qapi/error.h"
 #include "exec/address-spaces.h"
+#include "hw/acpi/acpi_aml_interface.h"
+#include "hw/acpi/tpm.h"
 #include "hw/qdev-properties.h"
 #include "hw/pci/pci_ids.h"
 #include "hw/acpi/tpm.h"
@@ -116,10 +118,34 @@ static void tpm_crb_isa_realize(DeviceState *dev, Error 
**errp)
 }
 }
 
+static void build_tpm_crb_isa_aml(AcpiDevAmlIf *adev, Aml *scope)
+{
+Aml *dev, *crs;
+CRBState *s = CRB(adev);
+TPMIf *ti = TPM_IF(s);
+
+dev = aml_device("TPM");
+if (tpm_crb_isa_get_version(ti) == TPM_VERSION_2_0) {
+aml_append(dev, aml_name_decl("_HID", aml_string("MSFT0101")));
+aml_append(dev, aml_name_decl("_STR", aml_string("TPM 2.0 Device")));
+} else {
+aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0C31")));
+}
+aml_append(dev, aml_name_decl("_UID", aml_int(1)));
+aml_append(dev, aml_name_decl("_STA", aml_int(0xF)));
+crs = aml_resource_template();
+aml_append(crs, aml_memory32_fixed(TPM_CRB_ADDR_BASE, TPM_CRB_ADDR_SIZE,
+  AML_READ_WRITE));
+aml_append(dev, aml_name_decl("_CRS", crs));
+tpm_build_ppi_acpi(ti, dev);
+aml_append(scope, dev);
+}
+
 static void tpm_crb_isa_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
 TPMIfClass *tc = TPM_IF_CLASS(klass);
+AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass);
 
 dc->realize = tpm_crb_isa_realize;
 device_class_set_props(dc, tpm_crb_isa_properties);
@@ -128,6 +154,7 @@ static void tpm_crb_isa_class_init(ObjectClass *klass, void 
*data)
 tc->model = TPM_MODEL_TPM_CRB;
 tc->get_version = tpm_crb_isa_get_version;
 tc->request_completed = tpm_crb_isa_request_completed;
+adevc->build_dev_aml = build_tpm_crb_isa_aml;
 
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
@@ -139,6 +166,7 @@ static const TypeInfo tpm_crb_isa_info = {
 .class_init  = tpm_crb_isa_class_init,
 .interfaces = (InterfaceInfo[]) {
 { TYPE_TPM_IF },
+{ TYPE_ACPI_DEV_AML_IF },
 { }
 }
 };
-- 
2.39.2 (Apple Git-143)

[PATCH 02/11] tpm_crb: CTRL_RSP_ADDR is 64-bits wide

The register is actually 64-bits but in order to make this more clear
than the specification, we define two 32-bit registers:
CTRL_RSP_LADDR and CTRL_RSP_HADDR to match the CTRL_CMD_* naming. This
deviates from the specs but is way more clear.

Previously, the only CRB device uses a fixed system address so this
was not an issue. However, once we support SysBus CRB device, the
address can be anywhere in 64-bit space.

Signed-off-by: Joelle van Dyne 
---
 include/hw/acpi/tpm.h  | 3 ++-
 hw/tpm/tpm_crb_common.c| 3 ++-
 tests/qtest/tpm-crb-test.c | 2 +-
 tests/qtest/tpm-util.c | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/hw/acpi/tpm.h b/include/hw/acpi/tpm.h
index 579c45f5ba..f60bfe2789 100644
--- a/include/hw/acpi/tpm.h
+++ b/include/hw/acpi/tpm.h
@@ -174,7 +174,8 @@ REG32(CRB_CTRL_CMD_SIZE, 0x58)
 REG32(CRB_CTRL_CMD_LADDR, 0x5C)
 REG32(CRB_CTRL_CMD_HADDR, 0x60)
 REG32(CRB_CTRL_RSP_SIZE, 0x64)
-REG32(CRB_CTRL_RSP_ADDR, 0x68)
+REG32(CRB_CTRL_RSP_LADDR, 0x68)
+REG32(CRB_CTRL_RSP_HADDR, 0x6C)
 REG32(CRB_DATA_BUFFER, 0x80)
 
 #define TPM_CRB_ADDR_BASE   0xFED4
diff --git a/hw/tpm/tpm_crb_common.c b/hw/tpm/tpm_crb_common.c
index 4c173affb6..228e2d0faf 100644
--- a/hw/tpm/tpm_crb_common.c
+++ b/hw/tpm/tpm_crb_common.c
@@ -199,7 +199,8 @@ void tpm_crb_reset(TPMCRBState *s, uint64_t baseaddr)
 s->regs[R_CRB_CTRL_CMD_LADDR] = (uint32_t)baseaddr;
 s->regs[R_CRB_CTRL_CMD_HADDR] = (uint32_t)(baseaddr >> 32);
 s->regs[R_CRB_CTRL_RSP_SIZE] = CRB_CTRL_CMD_SIZE;
-s->regs[R_CRB_CTRL_RSP_ADDR] = (uint32_t)baseaddr;
+s->regs[R_CRB_CTRL_RSP_LADDR] = (uint32_t)baseaddr;
+s->regs[R_CRB_CTRL_RSP_HADDR] = (uint32_t)(baseaddr >> 32);
 
 s->be_buffer_size = MIN(tpm_backend_get_buffer_size(s->tpmbe),
 CRB_CTRL_CMD_SIZE);
diff --git a/tests/qtest/tpm-crb-test.c b/tests/qtest/tpm-crb-test.c
index 396ae3f91c..9d30fe8293 100644
--- a/tests/qtest/tpm-crb-test.c
+++ b/tests/qtest/tpm-crb-test.c
@@ -28,7 +28,7 @@ static void tpm_crb_test(const void *data)
 uint32_t csize = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_SIZE);
 uint64_t caddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_LADDR);
 uint32_t rsize = readl(TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_SIZE);
-uint64_t raddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_ADDR);
+uint64_t raddr = readq(TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_LADDR);
 uint8_t locstate = readb(TPM_CRB_ADDR_BASE + A_CRB_LOC_STATE);
 uint32_t locctrl = readl(TPM_CRB_ADDR_BASE + A_CRB_LOC_CTRL);
 uint32_t locsts = readl(TPM_CRB_ADDR_BASE + A_CRB_LOC_STS);
diff --git a/tests/qtest/tpm-util.c b/tests/qtest/tpm-util.c
index 1c0319e6e7..dd02057fc0 100644
--- a/tests/qtest/tpm-util.c
+++ b/tests/qtest/tpm-util.c
@@ -25,7 +25,7 @@ void tpm_util_crb_transfer(QTestState *s,
unsigned char *rsp, size_t rsp_size)
 {
 uint64_t caddr = qtest_readq(s, TPM_CRB_ADDR_BASE + A_CRB_CTRL_CMD_LADDR);
-uint64_t raddr = qtest_readq(s, TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_ADDR);
+uint64_t raddr = qtest_readq(s, TPM_CRB_ADDR_BASE + A_CRB_CTRL_RSP_LADDR);
 
 qtest_writeb(s, TPM_CRB_ADDR_BASE + A_CRB_LOC_CTRL, 1);
 
-- 
2.39.2 (Apple Git-143)

[PATCH 05/11] tpm_crb: use the ISA bus

Since this device is gated to only build for targets with the PC
configuration, we should use the ISA bus like with TPM TIS.

Signed-off-by: Joelle van Dyne 
---
 hw/tpm/tpm_crb.c | 52 
 hw/tpm/Kconfig   |  2 +-
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/hw/tpm/tpm_crb.c b/hw/tpm/tpm_crb.c
index 07c6868d8d..6144081d30 100644
--- a/hw/tpm/tpm_crb.c
+++ b/hw/tpm/tpm_crb.c
@@ -22,6 +22,7 @@
 #include "hw/qdev-properties.h"
 #include "hw/pci/pci_ids.h"
 #include "hw/acpi/tpm.h"
+#include "hw/isa/isa.h"
 #include "migration/vmstate.h"
 #include "sysemu/tpm_backend.h"
 #include "sysemu/tpm_util.h"
@@ -34,7 +35,7 @@
 #include "tpm_crb.h"
 
 struct CRBState {
-DeviceState parent_obj;
+ISADevice parent_obj;
 
 TPMCRBState state;
 };
@@ -43,49 +44,49 @@ typedef struct CRBState CRBState;
 DECLARE_INSTANCE_CHECKER(CRBState, CRB,
  TYPE_TPM_CRB)
 
-static void tpm_crb_none_request_completed(TPMIf *ti, int ret)
+static void tpm_crb_isa_request_completed(TPMIf *ti, int ret)
 {
 CRBState *s = CRB(ti);
 
 tpm_crb_request_completed(&s->state, ret);
 }
 
-static enum TPMVersion tpm_crb_none_get_version(TPMIf *ti)
+static enum TPMVersion tpm_crb_isa_get_version(TPMIf *ti)
 {
 CRBState *s = CRB(ti);
 
 return tpm_crb_get_version(&s->state);
 }
 
-static int tpm_crb_none_pre_save(void *opaque)
+static int tpm_crb_isa_pre_save(void *opaque)
 {
 CRBState *s = opaque;
 
 return tpm_crb_pre_save(&s->state);
 }
 
-static const VMStateDescription vmstate_tpm_crb_none = {
+static const VMStateDescription vmstate_tpm_crb_isa = {
 .name = "tpm-crb",
-.pre_save = tpm_crb_none_pre_save,
+.pre_save = tpm_crb_isa_pre_save,
 .fields = (VMStateField[]) {
 VMSTATE_END_OF_LIST(),
 }
 };
 
-static Property tpm_crb_none_properties[] = {
+static Property tpm_crb_isa_properties[] = {
 DEFINE_PROP_TPMBE("tpmdev", CRBState, state.tpmbe),
 DEFINE_PROP_BOOL("ppi", CRBState, state.ppi_enabled, true),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-static void tpm_crb_none_reset(void *dev)
+static void tpm_crb_isa_reset(void *dev)
 {
 CRBState *s = CRB(dev);
 
 return tpm_crb_reset(&s->state, TPM_CRB_ADDR_BASE);
 }
 
-static void tpm_crb_none_realize(DeviceState *dev, Error **errp)
+static void tpm_crb_isa_realize(DeviceState *dev, Error **errp)
 {
 CRBState *s = CRB(dev);
 
@@ -100,52 +101,51 @@ static void tpm_crb_none_realize(DeviceState *dev, Error 
**errp)
 
 tpm_crb_init_memory(OBJECT(s), &s->state, errp);
 
-memory_region_add_subregion(get_system_memory(),
+memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)),
 TPM_CRB_ADDR_BASE, &s->state.mmio);
 
 if (s->state.ppi_enabled) {
-memory_region_add_subregion(get_system_memory(),
+memory_region_add_subregion(isa_address_space(ISA_DEVICE(dev)),
 TPM_PPI_ADDR_BASE, &s->state.ppi.ram);
 }
 
 if (xen_enabled()) {
-tpm_crb_none_reset(dev);
+tpm_crb_isa_reset(dev);
 } else {
-qemu_register_reset(tpm_crb_none_reset, dev);
+qemu_register_reset(tpm_crb_isa_reset, dev);
 }
 }
 
-static void tpm_crb_none_class_init(ObjectClass *klass, void *data)
+static void tpm_crb_isa_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
 TPMIfClass *tc = TPM_IF_CLASS(klass);
 
-dc->realize = tpm_crb_none_realize;
-device_class_set_props(dc, tpm_crb_none_properties);
-dc->vmsd  = &vmstate_tpm_crb_none;
+dc->realize = tpm_crb_isa_realize;
+device_class_set_props(dc, tpm_crb_isa_properties);
+dc->vmsd  = &vmstate_tpm_crb_isa;
 dc->user_creatable = true;
 tc->model = TPM_MODEL_TPM_CRB;
-tc->get_version = tpm_crb_none_get_version;
-tc->request_completed = tpm_crb_none_request_completed;
+tc->get_version = tpm_crb_isa_get_version;
+tc->request_completed = tpm_crb_isa_request_completed;
 
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
-static const TypeInfo tpm_crb_none_info = {
+static const TypeInfo tpm_crb_isa_info = {
 .name = TYPE_TPM_CRB,
-/* could be TYPE_SYS_BUS_DEVICE (or LPC etc) */
-.parent = TYPE_DEVICE,
+.parent = TYPE_ISA_DEVICE,
 .instance_size = sizeof(CRBState),
-.class_init  = tpm_crb_none_class_init,
+.class_init  = tpm_crb_isa_class_init,
 .interfaces = (InterfaceInfo[]) {
 { TYPE_TPM_IF },
 { }
 }
 };
 
-static void tpm_crb_none_register(void)
+static void tpm_crb_isa_register(void)
 {
-type_register_static(&tpm_crb_none_info);
+type_register_static(&tpm_crb_isa_info);
 }
 
-type_init(tpm_crb_none_register)
+type_init(tpm_crb_isa_register)
diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
index a46663288c..1fd73fe617 100644
--- a/hw/tpm/Kconfig
+++ b/hw/tpm/Kconfig
@@ -22,7 +22,7 @@ config TPM_TIS
 
 config TPM_CRB
 bool
-depends on TPM && PC
+depends on TPM && ISA_BUS

[PATCH 09/11] tpm_tis_sysbus: fix crash when PPI is enabled

If 'ppi' property is set, then `tpm_ppi_reset` is called on reset
which SEGFAULTs because `tpmppi->buf` is not allocated.

Signed-off-by: Joelle van Dyne 
---
 hw/tpm/tpm_tis_sysbus.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c
index 45e63efd63..1014d5d993 100644
--- a/hw/tpm/tpm_tis_sysbus.c
+++ b/hw/tpm/tpm_tis_sysbus.c
@@ -124,6 +124,10 @@ static void tpm_tis_sysbus_realizefn(DeviceState *dev, 
Error **errp)
 error_setg(errp, "'tpmdev' property is required");
 return;
 }
+
+if (s->ppi_enabled) {
+sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->ppi.ram);
+}
 }
 
 static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data)
-- 
2.39.2 (Apple Git-143)

[PATCH 00/11] tpm: introduce TPM CRB SysBus device

The impetus for this patch set is to get TPM 2.0 working on Windows 11 ARM64.
Windows' tpm.sys does not seem to work on a TPM TIS device (as verified with
VMWare's implementation). However, the current TPM CRB device uses a fixed
system bus address that is reserved for RAM in ARM64 Virt machines.

In the process of adding the TPM CRB SysBus device, we also went ahead and
cleaned up some of the existing TPM hardware code and fixed some bugs. We used
the TPM TIS devices as a template for the TPM CRB devices and refactored out
common code. We moved the ACPI DSDT generation to the device in order to handle
dynamic base address requirements as well as reduce redundent code in different
machine ACPI generation. We also changed the tpm_crb device to use the ISA bus
instead of depending on the default system bus as the device only was built for
the PC configuration.

Another change is that the TPM CRB registers are now mapped in the same way that
the pflash ROM devices are mapped. It is a memory region whose writes are
trapped as MMIO accesses. This was needed because Apple Silicon does not decode
LDP caused page faults. @agraf suggested that we do this to avoid having to
do AARCH64 decoding in the HVF fault handler.

Unfortunately, it seems like the LDP fault still happens on HVF but the issue
seems to be in the HVF backend which needs to be fixed in a separate patch.

One last thing that's needed to get Windows 11 to recognize the TPM 2.0 device
is for the OVMF firmware to setup the TPM device. Currently, OVMF for ARM64 Virt
only recognizes the TPM TIS device through a FDT entry. A workaround is to
falsely identify the TPM CRB device as a TPM TIS device in the FDT node but this
causes issues for Linux. A proper fix would involve adding an ACPI device driver
in OVMF.

Joelle van Dyne (11):
  tpm_crb: refactor common code
  tpm_crb: CTRL_RSP_ADDR is 64-bits wide
  tpm_ppi: refactor memory space initialization
  tpm_crb: use a single read-as-mem/write-as-mmio mapping
  tpm_crb: use the ISA bus
  tpm_crb: move ACPI table building to device interface
  hw/arm/virt: add plug handler for TPM on SysBus
  hw/loongarch/virt: add plug handler for TPM on SysBus
  tpm_tis_sysbus: fix crash when PPI is enabled
  tpm_tis_sysbus: move DSDT AML generation to device
  tpm_crb_sysbus: introduce TPM CRB SysBus device

 docs/specs/tpm.rst  |   2 +
 hw/tpm/tpm_crb.h|  74 +
 hw/tpm/tpm_ppi.h|  10 +-
 include/hw/acpi/aml-build.h |   1 +
 include/hw/acpi/tpm.h   |   3 +-
 include/sysemu/tpm.h|   3 +
 hw/acpi/aml-build.c |   7 +-
 hw/arm/virt-acpi-build.c|  38 +
 hw/arm/virt.c   |  38 +
 hw/core/sysbus-fdt.c|   1 +
 hw/i386/acpi-build.c|  23 ---
 hw/loongarch/acpi-build.c   |  38 +
 hw/loongarch/virt.c |  38 +
 hw/riscv/virt.c |   1 +
 hw/tpm/tpm_crb.c| 307 
 hw/tpm/tpm_crb_common.c | 224 ++
 hw/tpm/tpm_crb_sysbus.c | 178 +
 hw/tpm/tpm_ppi.c|   5 +-
 hw/tpm/tpm_tis_isa.c|   5 +-
 hw/tpm/tpm_tis_sysbus.c |  43 +
 tests/qtest/tpm-crb-test.c  |   2 +-
 tests/qtest/tpm-util.c  |   2 +-
 hw/arm/Kconfig  |   1 +
 hw/riscv/Kconfig|   1 +
 hw/tpm/Kconfig  |   7 +-
 hw/tpm/meson.build  |   3 +
 hw/tpm/trace-events |   2 +-
 27 files changed, 703 insertions(+), 354 deletions(-)
 create mode 100644 hw/tpm/tpm_crb.h
 create mode 100644 hw/tpm/tpm_crb_common.c
 create mode 100644 hw/tpm/tpm_crb_sysbus.c

-- 
2.39.2 (Apple Git-143)

Re: [PATCH] virtio-gpu-udmabuf: replacing scanout_width/height with backing_width/height

2023-07-12 Thread Kim, Dongwon




On 7/10/2023 4:57 AM, Marc-André Lureau wrote:

Hi

On Thu, Jul 6, 2023 at 3:10 AM Dongwon Kim  wrote:

'backing_width' and 'backing_height' are commonly used to indicate
the size
of the whole backing region so it makes sense to use those terms for
VGAUDMABuf as well in place of 'scanout_width' and 'scanout_height'.

Cc: Gerd Hoffmann 
Cc: Marc-André Lureau 
Cc: Vivek Kasireddy 
Signed-off-by: Dongwon Kim 
---
 hw/display/virtio-gpu-udmabuf.c | 8 
 include/ui/console.h            | 4 ++--
 ui/dbus-listener.c              | 4 ++--
 ui/egl-helpers.c                | 4 ++--
 ui/gtk-egl.c                    | 4 ++--
 ui/gtk-gl-area.c                | 4 ++--
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/display/virtio-gpu-udmabuf.c
b/hw/display/virtio-gpu-udmabuf.c
index ef1a740de5..920d457d4a 100644
--- a/hw/display/virtio-gpu-udmabuf.c
+++ b/hw/display/virtio-gpu-udmabuf.c
@@ -186,8 +186,8 @@ static VGPUDMABuf
     dmabuf->buf.stride = fb->stride;
     dmabuf->buf.x = r->x;
     dmabuf->buf.y = r->y;
-    dmabuf->buf.scanout_width = r->width;
-    dmabuf->buf.scanout_height = r->height;
+    dmabuf->buf.backing_width = r->width;
+    dmabuf->buf.backing_height = r->height;
     dmabuf->buf.fourcc = qemu_pixman_to_drm_format(fb->format);
     dmabuf->buf.fd = res->dmabuf_fd;
     dmabuf->buf.allow_fences = true;
@@ -218,8 +218,8 @@ int virtio_gpu_update_dmabuf(VirtIOGPU *g,

     g->dmabuf.primary[scanout_id] = new_primary;
     qemu_console_resize(scanout->con,
-                        new_primary->buf.scanout_width,
- new_primary->buf.scanout_height);
+                        new_primary->buf.backing_width,
+ new_primary->buf.backing_height);
     dpy_gl_scanout_dmabuf(scanout->con, &new_primary->buf);

     if (old_primary) {
diff --git a/include/ui/console.h b/include/ui/console.h
index f27b2aad4f..3e8b22d6c6 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -201,8 +201,8 @@ typedef struct QemuDmaBuf {
     uint32_t  texture;
     uint32_t  x;
     uint32_t  y;
-    uint32_t  scanout_width;
-    uint32_t  scanout_height;
+    uint32_t  backing_width;
+    uint32_t  backing_height;
     bool      y0_top;
     void      *sync;
     int       fence_fd;
diff --git a/ui/dbus-listener.c b/ui/dbus-listener.c
index 0240c39510..7d73681cbc 100644
--- a/ui/dbus-listener.c
+++ b/ui/dbus-listener.c
@@ -420,8 +420,8 @@ static void
dbus_scanout_texture(DisplayChangeListener *dcl,
         .y0_top = backing_y_0_top,
         .x = x,
         .y = y,
-        .scanout_width = w,
-        .scanout_height = h,
+        .backing_width = w,
+        .backing_height = h,


This is not consistent with the function arguments. I think it should 
be after:


.width = w, .height = h, .backing_width = backing_wdth, 
.backing_height = backing_height


Hopefully this inconsistency is not repeated elsewhere.

Yes, you are right. Backing_* is for the whole surface. And normal 
width/height or w/h specifies the sub region as you mentioned earlier in 
all other places. Inconsistency was caused in QemuDmabuf where 
width/height was used as backing_width/height. We should have corrected 
it first. I will send another version of patch to correct this.



thanks

     };

     assert(tex_id);
diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c
index 8f9fbf583e..6b7be5753d 100644
--- a/ui/egl-helpers.c
+++ b/ui/egl-helpers.c
@@ -148,8 +148,8 @@ void egl_fb_blit(egl_fb *dst, egl_fb *src,
bool flip)
     if (src->dmabuf) {
         x1 = src->dmabuf->x;
         y1 = src->dmabuf->y;
-        w = src->dmabuf->scanout_width;
-        h = src->dmabuf->scanout_height;
+        w = src->dmabuf->backing_width;
+        h = src->dmabuf->backing_height;
     }

     w = (x1 + w) > src->width ? src->width - x1 : w;
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index d59b8cd7d7..7604696d4a 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -259,8 +259,8 @@ void
gd_egl_scanout_dmabuf(DisplayChangeListener *dcl,

     gd_egl_scanout_texture(dcl, dmabuf->texture,
                            dmabuf->y0_top, dmabuf->width,
dmabuf->height,
-                           dmabuf->x, dmabuf->y,
dmabuf->scanout_width,
-                           dmabuf->scanout_height, NULL);
+                           dmabuf->x, dmabuf->y,
dmabuf->backing_width,
+                           dmabuf->backing_height, NULL);

     if (dmabuf->allow_fences) {
         vc->gfx.guest_fb.dmabuf = dmabuf;
diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
index 7367dfd793..3337a4baa3 100644
--- a/ui/gtk-gl-area.c
+++ b/ui/gtk-gl-area.c

Re: [PATCH v1 6/9] gfxstream + rutabaga: add initial support for gfxstream

2023-07-12 Thread Gurchetan Singh

On Wed, Jul 12, 2023 at 12:15 PM Marc-André Lureau
 wrote:
>
> Hi
>
> On Tue, Jul 11, 2023 at 6:57 AM Gurchetan Singh  
> wrote:
>>
>> This adds initial support for gfxstream and cross-domain.  Both
>> features rely on virtio-gpu blob resources and context types, which
>> are also implemented in this patch.
>>
>> gfxstream has a long and illustrious history in Android graphics
>> paravirtualization.  It has been powering graphics in the Android
>> Studio Emulator for more than a decade, which is the main developer
>> platform.
>>
>> Originally conceived by Jesse Hall, it was first known as "EmuGL" [a].
>> The key design characteristic was a 1:1 threading model and
>> auto-generation, which fit nicely with the OpenGLES spec.  It also
>> allowed easy layering with ANGLE on the host, which provides the GLES
>> implementations on Windows or MacOS enviroments.
>>
>> gfxstream has traditionally been maintained by a single engineer, and
>> between 2015 to 2021, the goldfish throne passed to Frank Yang.
>> Historians often remark this glorious reign ("pax gfxstreama" is the
>> academic term) was comparable to that of Augustus and the both Queen
>> Elizabeths.  Just to name a few accomplishments in a resplendent
>> panoply: higher versions of GLES, address space graphics, snapshot
>> support and CTS compliant Vulkan [b].
>>
>> One major drawback was the use of out-of-tree goldfish drivers.
>> Android engineers didn't know much about DRM/KMS and especially TTM so
>> a simple guest to host pipe was conceived.
>>
>> Luckily, virtio-gpu 3D started to emerge in 2016 due to the work of
>> the Mesa/virglrenderer communities.  In 2018, the initial virtio-gpu
>> port of gfxstream was done by Cuttlefish enthusiast Alistair Delva.
>> It was a symbol compatible replacement of virglrenderer [c] and named
>> "AVDVirglrenderer".  This implementation forms the basis of the
>> current gfxstream host implementation still in use today.
>>
>> cross-domain support follows a similar arc.  Originally conceived by
>> Wayland aficionado David Reveman and crosvm enjoyer Zach Reizner in
>> 2018, it initially relied on the downstream "virtio-wl" device.
>>
>> In 2020 and 2021, virtio-gpu was extended to include blob resources
>> and multiple timelines by yours truly, features gfxstream/cross-domain
>> both require to function correctly.
>>
>> Right now, we stand at the precipice of a truly fantastic possibility:
>> the Android Emulator powered by upstream QEMU and upstream Linux
>> kernel.  gfxstream will then be packaged properfully, and app
>> developers can even fix gfxstream bugs on their own if they encounter
>> them.
>>
>> It's been quite the ride, my friends.  Where will gfxstream head next,
>> nobody really knows.  I wouldn't be surprised if it's around for
>> another decade, maintained by a new generation of Android graphics
>> enthusiasts.
>>
>> Technical details:
>>   - Very simple initial display integration: just used Pixman
>>   - Largely, 1:1 mapping of virtio-gpu hypercalls to rutabaga function
>> calls
>>
>
> Wow, this is not for the faint reader.. there is a lot to grasp in this gfx 
> space...
>
> Could you perhaps extend on what this current code can do for an average 
> Linux VM? or for some Android VM (which one?!), and then what are the next 
> steps and status?

- For Linux VMs + Linux hosts, this provides more modern display
virtualization via Wayland passthrough.   It also is a performance
benefit since you can avoid a guest compositor pass.  For widespread
distribution, someone needs to package Sommelier or the
wayland-proxy-virtwl [a] Linux distro style.   In addition newer
versions [b] of the Linux kernel come with DRM_VIRTIO_GPU_KMS, which
allow disabling KMS hypercalls.  I suppose someone can come up with a
Linux VM variant that automatically starts the Sommelier or
wayland-proxy-virtwl and some terminal app.

- For Android VMs, you can boot with gfxstream GLES/Vulkan now with
upstream QEMU with a simple UI.  The next step would be improving
display integration and UI interfaces with the goal of the QEMU
upstream graphics being in an emulator release [c].

Will add these details to the commit message in v2.

[a] https://github.com/talex5/wayland-proxy-virtwl
[b] https://lore.kernel.org/lkml/20230302233506.3146290-1-robdcl...@gmail.com/
[c] https://developer.android.com/studio/releases/emulator

>
> My limited understanding (from this series and from 
> https://gitlab.com/qemu-project/qemu/-/issues/1611) is that it allows 
> passing-through some vulkan APIs for off-screen usage. Is that accurate?

For Linux VMs, it's currently offscreen accelerated rendering only.
For Android VMs, on-screen does work, but for simplicity a memcpy does
occur when flushing to the scanout.

>
> How far are we from getting upstream QEMU to be used by Android Emulator? (in 
> the gfx domain at least) What would it take to get the average Linux VM to 
> use virtio-vga-rutabaga instead of virtio-vga-gl to get accelerated rendering?

Re: [PATCH v1 9/9] docs/system: add basic virtio-gpu documentation

2023-07-12 Thread Gurchetan Singh

On Wed, Jul 12, 2023 at 2:40 PM Akihiko Odaki  wrote:
>
> On 2023/07/11 11:56, Gurchetan Singh wrote:
> > This adds basic documentation for virtio-gpu.
>
> Thank you for adding documentation for other backends too. I have been
> asked how virtio-gpu works so many times and always had to explain by
> myself though Gerd does have a nice article.* This documentation will help.
>
> * https://www.kraxel.org/blog/2021/05/virtio-gpu-qemu-graphics-update/
>
> >
> > Suggested-by: Akihiko Odaki 
> > Signed-off-by: Gurchetan Singh 
> > ---
> >   docs/system/device-emulation.rst   |  1 +
> >   docs/system/devices/virtio-gpu.rst | 80 ++
> >   2 files changed, 81 insertions(+)
> >   create mode 100644 docs/system/devices/virtio-gpu.rst
> >
> > diff --git a/docs/system/device-emulation.rst 
> > b/docs/system/device-emulation.rst
> > index 4491c4cbf7..1167f3a9f2 100644
> > --- a/docs/system/device-emulation.rst
> > +++ b/docs/system/device-emulation.rst
> > @@ -91,6 +91,7 @@ Emulated Devices
> >  devices/nvme.rst
> >  devices/usb.rst
> >  devices/vhost-user.rst
> > +   devices/virtio-gpu.rst
> >  devices/virtio-pmem.rst
> >  devices/vhost-user-rng.rst
> >  devices/canokey.rst
> > diff --git a/docs/system/devices/virtio-gpu.rst 
> > b/docs/system/devices/virtio-gpu.rst
> > new file mode 100644
> > index 00..2426039540
> > --- /dev/null
> > +++ b/docs/system/devices/virtio-gpu.rst
> > @@ -0,0 +1,80 @@
> > +..
> > +   SPDX-License-Identifier: GPL-2.0
> > +
> > +virtio-gpu
> > +==
> > +
> > +This document explains the setup and usage of the virtio-gpu device.
> > +The virtio-gpu device paravirtualizes the GPU and display controller.
> > +
> > +Linux kernel support
> > +
> > +
> > +virtio-gpu requires a guest Linux kernel built with the
> > +``CONFIG_DRM_VIRTIO_GPU`` option.
> > +
> > +QEMU virtio-gpu variants
> > +
> > +
> > +There are many virtio-gpu device variants, listed below:
> > +
> > + * ``virtio-vga``
> > + * ``virtio-gpu-pci``
> > + * ``virtio-vga-gl``
> > + * ``virtio-gpu-gl-pci``
> > + * ``virtio-vga-rutabaga``
> > + * ``virtio-gpu-rutabaga-pci``
> > + * ``vhost-user-vga``
> > + * ``vhost-user-gl-pci``
>
> > +
> > +QEMU provides a 2D virtio-gpu backend, and two accelerated backends:
> > +virglrenderer ('gl' device label) and rutabaga_gfx ('rutabaga' device
> > +label).  There is also a vhost-user backend that runs the 2D device > +in 
> > a separate process.  Each device type as VGA or PCI variant.  This
> > +document uses the PCI variant in examples.
>
> I suggest to replace "2D device" with "graphics stack"; vhost-user works
> with 3D too. It's also slightly awkward to say a device runs in a
> separate process as some portion of device emulation always stuck in
> QEMU. In my opinion, the point of vhost-user backend is to isolate the
> gigantic graphics stack so let's put this phrase.
>
> I also have a bit different understanding regarding virtio-gpu variants.
> First, the variants can be classified into VGA and non-VGA ones. The VGA
> ones are prefixed with virtio-vga or vhost-user-vga while the non-VGA
> ones are prefixed with virtio-gpu or vhost-user-gpu.
>
> The VGA ones always use PCI interface, but for the non-VGA ones, you can
> further pick simple MMIO or PCI. For MMIO, you can suffix the device
> name with -device though vhost-user-gpu apparently does not support
> MMIO. For PCI, you can suffix it with -pci. Without these suffixes, the
> platform default will be chosen.
>
> Since enumerating all variants will result in a long list, you may
> provide abstract syntaxes like the following for this explanation:
>
> * virtio-vga[-BACKEND]
> * virtio-gpu[-BACKEND][-INTERFACE]
> * vhost-user-vga
> * vhost-user-pci
>
> > +
> > +virtio-gpu 2d
> > +-
> > +
> > +The default 2D mode uses a guest software renderer (llvmpipe, lavapipe,
> > +Swiftshader) to provide the OpenGL/Vulkan implementations.
>
> It's certainly possible to use virtio-gpu without software
> OpenGL/Vulkan. A major example is Windows; its software renderer is
> somewhat limited in my understanding.
>
> My suggestion:
> The default 2D backend only performs 2D operations. The guest needs to
> employ a software renderer for 3D graphics.
>
> It's also better to provide links for the renderers. Apparently lavapipe
> does not have a dedicated documentation, so you may add a link for Mesa
> and mention them like:
> LLVMpipe and Lavapipe included in `Mesa`_, or `SwiftShader`_
>
> And I think it will be helpful to say LLVMpipe and Lavapipe work out of
> box on typical modern Linux distributions as that should be what people
> care.
>
> > +
> > +.. parsed-literal::
> > +-device virtio-gpu-pci
> > +
> > +virtio-gpu virglrenderer
> > +
> > +
> > +When using virgl accelerated graphics mode, OpenGL API calls are translated
> > +into an intermediate representation (see `Gallium3D`_). The intermediate
> > +represen

Re: [PATCH v1 0/5] target/arm: Handle psci calls in userspace


Hi Salil,

On 7/4/23 19:58, Salil Mehta wrote:



Latest Qemu Prototype (Pre RFC V2) (Not in the final shape of the patches)
https://github.com/salil-mehta/qemu.git   
virt-cpuhp-armv8/rfc-v1-port11052023.dev-1


should work against below kernel changes as confirmed by James,

Latest Kernel Prototype (Pre RFC V2 = RFC V1 + Fixes)
https://git.gitlab.arm.com/linux-arm/linux-jm.git   virtual_cpu_hotplug/rfc/v2



I think it'd better to have the discussions through maillist. The threads and 
all
follow-up replies can be cached somewhere to avoid lost. Besides, other people 
may
be intrested in the same points and can join the discussion directly.

I got a chance to give the RFC patchsets some tests. Not all cases are working
as expected. I know the patchset is being polished. I'm summarize them as below:

(1) coredump is triggered when the topology is out of range. It's the issue we
discussed in private. Here I'm just recapping in case other people also 
blocked
by the issue.

(a) start VM with the following command lines
 /home/gavin/sandbox/qemu.main/build/qemu-system-aarch64   \
 -accel kvm -machine virt,gic-version=host,nvdimm=on -cpu host \
 -smp cpus=1,maxcpus=2,sockets=1,clusters=1,cores=1,threads=2  \
 -m 512M,slots=16,maxmem=64G   \
 -object memory-backend-ram,id=mem0,size=512M  \
 -numa node,nodeid=0,cpus=0-1,memdev=mem0  \

(b) hot add CPU whose topology is out of range
(qemu) device_add driver=host-arm-cpu,id=cpu1,core-id=1


It's actually caused by typos in hw/arm/virt.c::virt_cpu_pre_plug() where
'ms->possible_cpus->len' needs to be replaced with 'ms->smp.cores'. With 
this,
the hot-added CPU object will be rejected.

(2) I don't think TCG has been tested since it seems not working at all.

(a) start VM with the following command lines
/home/gshan/sandbox/src/qemu/main/build/qemu-system-aarch64 \
-machine virt,gic-version=3 -cpu max -m 1024\
-smp maxcpus=2,cpus=1,sockets=1,clusters=1,cores=1,threads=2\

(b) failure while hot-adding CPU
(qemu) device_add driver=max-arm-cpu,id=cpu1,thread-id=1
Error: cpu(id1=0:0:0:1) with arch-id 1 exists

The error message is printed by hw/arm/virt.c::virt_cpu_pre_plug() where the
specific CPU has been presented. For KVM case, the disabled CPUs are 
detached
from 'ms->possible_cpu->cpus[1].cpu' and destroyed. I think we need to do 
similar
thing for TCG case in hw/arm/virt.c::virt_cpu_post_init(). I'm able to add 
CPU
with the following hunk of changes.

--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2122,6 +2122,18 @@ static void virt_cpu_post_init(VirtMachineState *vms, 
MemoryRegion *sysmem)
 exit(1);
 }
 }
+
+#if 1
+for (n = 0; n < possible_cpus->len; n++) {
+cpu = qemu_get_possible_cpu(n);
+if (!qemu_enabled_cpu(cpu)) {
+CPUArchId *cpu_slot;
+cpu_slot = virt_find_cpu_slot(ms, cpu->cpu_index);
+cpu_slot->cpu = NULL;
+object_unref(OBJECT(cpu));
+}
+}
+#endif
 }
 }

(3) Assertion on following the sequence of hot-add, hot-remove and hot-add when 
TCG mode is enabled.

(a) Include the hack from (2) and start VM with the following command lines
/home/gshan/sandbox/src/qemu/main/build/qemu-system-aarch64 \
-machine virt,gic-version=3 -cpu max -m 1024\
-smp maxcpus=2,cpus=1,sockets=1,clusters=1,cores=1,threads=2\

(b) assertion on the sequence of hot-add, hot-remove and hot-add
(qemu) device_add driver=max-arm-cpu,id=cpu1,thread-id=1
(qemu) device_del cpu1
(qemu) device_add driver=max-arm-cpu,id=cpu1,thread-id=1
**
ERROR:../tcg/tcg.c:669:tcg_register_thread: assertion failed: (n < 
tcg_max_ctxs)
Bail out! ERROR:../tcg/tcg.c:669:tcg_register_thread: assertion failed: (n 
< tcg_max_ctxs)
Aborted (core dumped)

I'm not sure if x86 has similar issue. It seems the management for TCG 
contexts, corresponding
to variable @tcg_max_ctxs and @tcg_ctxs need some improvements for better 
TCG context registration
and unregistration to accomodate CPU hotplug.


Apart from what have been found in the tests, I've started to look into the 
code changes. I may
reply with more specific comments. However, it would be ideal to comment on the 
specific changes
after the patchset is posted for review. Salil, the plan may have been 
mentioned by you somewhere.
As I understood, the QEMU patchset will be posted after James's RFCv2 kernel 
series is posted.
Please let me know if my understanding is correct. Again, thanks for your 
efforts to make vCPU
hotplug to be supported :)

Thanks,
Gavin

Re: [PATCH v4] kconfig: Add PCIe devices to s390x machines

2023-07-12 Thread Akihiko Odaki


On 2023/07/12 19:48, Philippe Mathieu-Daudé wrote:

Hi Cédric,

On 12/7/23 10:01, Cédric Le Goater wrote:

It is useful to extend the number of available PCIe devices to KVM guests
for passthrough scenarios and also to expose these models to a different
(big endian) architecture. Introduce a new config PCIE_DEVICES to select
models, Intel Ethernet adapters and one USB controller. These devices all
support MSI-X which is a requirement on s390x as legacy INTx are not
supported.

Cc: Matthew Rosato 
Cc: Paolo Bonzini 
Cc: Thomas Huth 
Signed-off-by: Cédric Le Goater 
---

  There could be a more general use of PCIE_DEVICES

  v4: Introduce PCIE_DEVICES
  v3: PCI -> PCI_EXPRESS
  v2: select -> imply
  configs/devices/s390x-softmmu/default.mak | 1 +
  hw/net/Kconfig    | 4 ++--
  hw/pci/Kconfig    | 3 +++
  hw/s390x/Kconfig  | 3 ++-
  hw/usb/Kconfig    | 2 +-
  5 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/configs/devices/s390x-softmmu/default.mak 
b/configs/devices/s390x-softmmu/default.mak

index f2287a133f36..2d5ff476e32a 100644
--- a/configs/devices/s390x-softmmu/default.mak
+++ b/configs/devices/s390x-softmmu/default.mak
@@ -7,6 +7,7 @@
  #CONFIG_VFIO_CCW=n
  #CONFIG_VIRTIO_PCI=n
  #CONFIG_WDT_DIAG288=n
+#CONFIG_PCIE_DEVICE=n
  # Boards:
  #
diff --git a/hw/net/Kconfig b/hw/net/Kconfig
index 98e00be4f937..7fcc0d7faa29 100644
--- a/hw/net/Kconfig
+++ b/hw/net/Kconfig
@@ -41,12 +41,12 @@ config E1000_PCI
  config E1000E_PCI_EXPRESS
  bool
-    default y if PCI_DEVICES
+    default y if PCI_DEVICES || PCIE_DEVICES


There seems to be a pre-existing bug, shouldn't this be

    default y if PCIE_DEVICES

?


I think you should leave this as is and instead add a config selected 
only when legacy PCI is available and make all legacy PCI devices depend 
on the config. This will prevent from selecting legacy PCI devices for 
s390x machines no matter if it's selected due to PCI_DEVICES or selected 
manually by the user (by mistake).




(Cc'ing maintainers)


  depends on PCI_EXPRESS && MSI_NONBROKEN
  config IGB_PCI_EXPRESS
  bool
-    default y if PCI_DEVICES
+    default y if PCI_DEVICES || PCIE_DEVICES


Similarly:

    default y if PCIE_DEVICES


  depends on PCI_EXPRESS && MSI_NONBROKEN
  config RTL8139_PCI
diff --git a/hw/pci/Kconfig b/hw/pci/Kconfig
index 77f8b005ffb1..fe70902cd821 100644
--- a/hw/pci/Kconfig
+++ b/hw/pci/Kconfig
@@ -8,6 +8,9 @@ config PCI_EXPRESS
  config PCI_DEVICES
  bool
+config PCIE_DEVICES
+    bool
+
  config MSI_NONBROKEN
  # selected by interrupt controllers that do not support MSI,
  # or support it and have a good implementation. See commit
diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig
index 454e0ff4b613..4c068d7960b9 100644
--- a/hw/s390x/Kconfig
+++ b/hw/s390x/Kconfig
@@ -5,7 +5,8 @@ config S390_CCW_VIRTIO
  imply VFIO_AP
  imply VFIO_CCW
  imply WDT_DIAG288
-    select PCI
+    imply PCIE_DEVICES
+    select PCI_EXPRESS


I'm confused, TYPE_S390_PCI_HOST_BRIDGE exposes a PCI bus...
At a minimum you'd need:

-- >8 --
  static const TypeInfo s390_pcihost_info = {
  .name  = TYPE_S390_PCI_HOST_BRIDGE,
-    .parent    = TYPE_PCI_HOST_BRIDGE,
+    .parent    = TYPE_PCIE_HOST_BRIDGE,
  .instance_size = sizeof(S390pciState),
  .class_init    = s390_pcihost_class_init,
  .interfaces = (InterfaceInfo[]) {
---

Actually I can see:

     if (s390_pci_msix_init(pbdev) && !pbdev->interp) {
     error_setg(errp, "MSI-X support is mandatory "
    "in the S390 architecture");
     return;
     }

So this must be PCIe, not legacy PCI, right?


diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig
index 0ec6def4b8b8..0f486764ed69 100644
--- a/hw/usb/Kconfig
+++ b/hw/usb/Kconfig
@@ -36,7 +36,7 @@ config USB_XHCI
  config USB_XHCI_PCI
  bool
-    default y if PCI_DEVICES
+    default y if PCI_DEVICES || PCIE_DEVICES


TYPE_XHCI_PCI inherits TYPE_PCI_DEVICE and implements
INTERFACE_PCIE_DEVICE, so this is OK.


  depends on PCI
  select USB_XHCI

Re: [PATCH for-8.2 6/7] target/riscv: add 'max' CPU type

On Wed, Jul 12, 2023 at 06:39:28PM -0300, Daniel Henrique Barboza wrote:
> On 7/12/23 18:35, Conor Dooley wrote:
> > On Wed, Jul 12, 2023 at 06:09:10PM -0300, Daniel Henrique Barboza wrote:
> > 
> > > It is intentional. Those default marchid/mimpid vals were derived from 
> > > the current
> > > QEMU version ID/build and didn't mean much.
> > > 
> > > It is still possible to set them via "-cpu rv64,marchid=N,mimpid=N" if 
> > > needed when
> > > using the generic (rv64,rv32) CPUs. Vendor CPUs can't have their machine 
> > > IDs changed
> > > via command line.
> > 
> > Sounds good, thanks. I did just now go and check icicle to see what it
> > would report & it does not boot. I'll go bisect...
> 
> BTW how are you booting the icicle board nowadays? I remember you mentioning 
> about
> some changes in the FDT being required to boot and whatnot.

I do direct kernel boots, as the HSS doesn't work anymore, and just lie
a bit to QEMU about how much DDR we have.
.PHONY: qemu-icicle
qemu-icicle:
$(qemu) -M microchip-icicle-kit \
-m 3G -smp 5 \
-kernel $(vmlinux_bin) \
-dtb $(icicle_dtb) \
-initrd $(initramfs) \
-display none -serial null \
-serial stdio \
-D qemu.log -d unimp

The platform only supports 2 GiB of DDR, not 3, but if I pass 2 to QEMU
it thinks there's 1 GiB at 0x8000_ and 1 GiB at 0x10__. The
upstream devicetree (and current FPGA reference design) expects there to
be 1 GiB at 0x8000_ and 1 GiB at 0x10_4000_. If I lie to QEMU,
it thinks there is 1 GiB at 0x8000_ and 2 GiB at 0x10__, and
things just work. I prefer doing it this way than having to modify the
DT, it is a lot easier to explain to people this way.

I've been meaning to work the support for the icicle & mpfs in QEMU, but
it just gets shunted down the priority list. I'd really like if a proper
boot flow would run in QEMU, which means fixing whatever broke the HSS,
but I've recently picked up maintainership of dt-binding stuff in Linux,
so I've unfortunately got even less time to try and work on it. Maybe
we'll get some new graduate in and I can make them suffer in my stead...

> If it's not too hard I'll add it in my test scripts to keep it under check. 
> Perhaps
> we can even add it to QEMU testsuite.

I don't think it really should be that bad, at least for the direct
kernel boot, which is what I mainly care about, since I use it fairly
often for debugging boot stuff in Linux.

Anyways, aa903cf31391dd505b399627158f1292a6d19896 is the first bad commit:
commit aa903cf31391dd505b399627158f1292a6d19896
Author: Bin Meng 
Date:   Fri Jun 30 23:36:04 2023 +0800

roms/opensbi: Upgrade from v1.2 to v1.3

Upgrade OpenSBI from v1.2 to v1.3 and the pre-built bios images.

And I see something like:
qemu//build/qemu-system-riscv64 -M microchip-icicle-kit \
-m 3G -smp 5 \
-kernel vmlinux.bin \
-dtb icicle.dtb \
-initrd initramfs.cpio.gz \
-display none -serial null \
-serial stdio \
-D qemu.log -d unimp
qemu-system-riscv64: warning: disabling zca extension for hart 
0x because privilege spec version does not match
qemu-system-riscv64: warning: disabling zca extension for hart 
0x0001 because privilege spec version does not match
qemu-system-riscv64: warning: disabling zcd extension for hart 
0x0001 because privilege spec version does not match
qemu-system-riscv64: warning: disabling zca extension for hart 
0x0002 because privilege spec version does not match
qemu-system-riscv64: warning: disabling zcd extension for hart 
0x0002 because privilege spec version does not match
qemu-system-riscv64: warning: disabling zca extension for hart 
0x0003 because privilege spec version does not match
qemu-system-riscv64: warning: disabling zcd extension for hart 
0x0003 because privilege spec version does not match
qemu-system-riscv64: warning: disabling zca extension for hart 
0x0004 because privilege spec version does not match
qemu-system-riscv64: warning: disabling zcd extension for hart 
0x0004 because privilege spec version does not match

OpenSBI v1.3
   _  _
  / __ \  / |  _ \_   _|
 | |  | |_ __   ___ _ __ | (___ | |_) || |
 | |  | | '_ \ / _ \ '_ \ \___ \|  _ < | |
 | |__| | |_) |  __/ | | |) | |_) || |_
  \/| .__/ \___|_| |_|_/|___/_|
| |
|_|

init_coldboot: ipi init failed (error -1009)

Just to note, because we use our own firmware that vendors in OpenSBI
and compiles only a significantly cut down number of files from it, we
do not use the fw_dynamic etc flow on our hardware. As a result, we have
not tested v1.3, nor do we have any immediate plans to change our
platform firmware to vendor v1.3 either.

I unless there's something obvious to yo

Re: [PATCH v1 9/9] docs/system: add basic virtio-gpu documentation

2023-07-12 Thread Akihiko Odaki


On 2023/07/11 11:56, Gurchetan Singh wrote:

This adds basic documentation for virtio-gpu.


Thank you for adding documentation for other backends too. I have been 
asked how virtio-gpu works so many times and always had to explain by 
myself though Gerd does have a nice article.* This documentation will help.


* https://www.kraxel.org/blog/2021/05/virtio-gpu-qemu-graphics-update/



Suggested-by: Akihiko Odaki 
Signed-off-by: Gurchetan Singh 
---
  docs/system/device-emulation.rst   |  1 +
  docs/system/devices/virtio-gpu.rst | 80 ++
  2 files changed, 81 insertions(+)
  create mode 100644 docs/system/devices/virtio-gpu.rst

diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst
index 4491c4cbf7..1167f3a9f2 100644
--- a/docs/system/device-emulation.rst
+++ b/docs/system/device-emulation.rst
@@ -91,6 +91,7 @@ Emulated Devices
 devices/nvme.rst
 devices/usb.rst
 devices/vhost-user.rst
+   devices/virtio-gpu.rst
 devices/virtio-pmem.rst
 devices/vhost-user-rng.rst
 devices/canokey.rst
diff --git a/docs/system/devices/virtio-gpu.rst 
b/docs/system/devices/virtio-gpu.rst
new file mode 100644
index 00..2426039540
--- /dev/null
+++ b/docs/system/devices/virtio-gpu.rst
@@ -0,0 +1,80 @@
+..
+   SPDX-License-Identifier: GPL-2.0
+
+virtio-gpu
+==
+
+This document explains the setup and usage of the virtio-gpu device.
+The virtio-gpu device paravirtualizes the GPU and display controller.
+
+Linux kernel support
+
+
+virtio-gpu requires a guest Linux kernel built with the
+``CONFIG_DRM_VIRTIO_GPU`` option.
+
+QEMU virtio-gpu variants
+
+
+There are many virtio-gpu device variants, listed below:
+
+ * ``virtio-vga``
+ * ``virtio-gpu-pci``
+ * ``virtio-vga-gl``
+ * ``virtio-gpu-gl-pci``
+ * ``virtio-vga-rutabaga``
+ * ``virtio-gpu-rutabaga-pci``
+ * ``vhost-user-vga``
+ * ``vhost-user-gl-pci``



+
+QEMU provides a 2D virtio-gpu backend, and two accelerated backends:
+virglrenderer ('gl' device label) and rutabaga_gfx ('rutabaga' device
+label).  There is also a vhost-user backend that runs the 2D device > +in a 
separate process.  Each device type as VGA or PCI variant.  This
+document uses the PCI variant in examples.


I suggest to replace "2D device" with "graphics stack"; vhost-user works 
with 3D too. It's also slightly awkward to say a device runs in a 
separate process as some portion of device emulation always stuck in 
QEMU. In my opinion, the point of vhost-user backend is to isolate the 
gigantic graphics stack so let's put this phrase.


I also have a bit different understanding regarding virtio-gpu variants.
First, the variants can be classified into VGA and non-VGA ones. The VGA 
ones are prefixed with virtio-vga or vhost-user-vga while the non-VGA 
ones are prefixed with virtio-gpu or vhost-user-gpu.


The VGA ones always use PCI interface, but for the non-VGA ones, you can 
further pick simple MMIO or PCI. For MMIO, you can suffix the device 
name with -device though vhost-user-gpu apparently does not support 
MMIO. For PCI, you can suffix it with -pci. Without these suffixes, the 
platform default will be chosen.


Since enumerating all variants will result in a long list, you may 
provide abstract syntaxes like the following for this explanation:


* virtio-vga[-BACKEND]
* virtio-gpu[-BACKEND][-INTERFACE]
* vhost-user-vga
* vhost-user-pci


+
+virtio-gpu 2d
+-
+
+The default 2D mode uses a guest software renderer (llvmpipe, lavapipe,
+Swiftshader) to provide the OpenGL/Vulkan implementations.


It's certainly possible to use virtio-gpu without software 
OpenGL/Vulkan. A major example is Windows; its software renderer is 
somewhat limited in my understanding.


My suggestion:
The default 2D backend only performs 2D operations. The guest needs to 
employ a software renderer for 3D graphics.


It's also better to provide links for the renderers. Apparently lavapipe 
does not have a dedicated documentation, so you may add a link for Mesa 
and mention them like:

LLVMpipe and Lavapipe included in `Mesa`_, or `SwiftShader`_

And I think it will be helpful to say LLVMpipe and Lavapipe work out of 
box on typical modern Linux distributions as that should be what people 
care.



+
+.. parsed-literal::
+-device virtio-gpu-pci
+
+virtio-gpu virglrenderer
+
+
+When using virgl accelerated graphics mode, OpenGL API calls are translated
+into an intermediate representation (see `Gallium3D`_). The intermediate
+representation is communicated to the host and the `virglrenderer`_ library
+on the host translates the intermediate representation back to OpenGL API
+calls.
It should be mentioned that the translation occurs in the guest side, 
and the guest side component is included in Linux distributions as like 
LLVMpipe and Lavapipe are.



+
+.. parsed-literal::
+-device virtio-gpu-gl-pci
+
+.. _Gallium3D: https://www.freedesktop.org/wi

Re: [PATCH for-8.2 6/7] target/riscv: add 'max' CPU type





On 7/12/23 18:35, Conor Dooley wrote:

On Wed, Jul 12, 2023 at 06:09:10PM -0300, Daniel Henrique Barboza wrote:


It is intentional. Those default marchid/mimpid vals were derived from the 
current
QEMU version ID/build and didn't mean much.

It is still possible to set them via "-cpu rv64,marchid=N,mimpid=N" if needed 
when
using the generic (rv64,rv32) CPUs. Vendor CPUs can't have their machine IDs 
changed
via command line.


Sounds good, thanks. I did just now go and check icicle to see what it
would report & it does not boot. I'll go bisect...


BTW how are you booting the icicle board nowadays? I remember you mentioning 
about
some changes in the FDT being required to boot and whatnot.

If it's not too hard I'll add it in my test scripts to keep it under check. 
Perhaps
we can even add it to QEMU testsuite.


Daniel

Re: [PATCH for-8.2 6/7] target/riscv: add 'max' CPU type

On Wed, Jul 12, 2023 at 06:09:10PM -0300, Daniel Henrique Barboza wrote:

> It is intentional. Those default marchid/mimpid vals were derived from the 
> current
> QEMU version ID/build and didn't mean much.
> 
> It is still possible to set them via "-cpu rv64,marchid=N,mimpid=N" if needed 
> when
> using the generic (rv64,rv32) CPUs. Vendor CPUs can't have their machine IDs 
> changed
> via command line.

Sounds good, thanks. I did just now go and check icicle to see what it
would report & it does not boot. I'll go bisect...


signature.asc
Description: PGP signature

Re: [PATCH for-8.2 6/7] target/riscv: add 'max' CPU type

On 7/12/23 18:00, Conor Dooley wrote:

On Wed, Jul 12, 2023 at 05:30:41PM -0300, Daniel Henrique Barboza wrote:

On 7/12/23 16:22, Conor Dooley wrote:

On Wed, Jul 12, 2023 at 04:01:48PM -0300, Daniel Henrique Barboza wrote:

The 'max' CPU type is used by tooling to determine what's the most
capable CPU a current QEMU version implements. Other archs such as ARM
implements this type. Let's add it to RISC-V.

What we consider "most capable CPU" in this context are related to
ratified, non-vendor extensions. This means that we want the 'max' CPU
to enable all (possible) ratified extensions by default. The reasoning
behind this design is (1) vendor extensions can conflict with each other
and we won't play favorities deciding which one is default or not and
(2) non-ratified extensions are always prone to changes, not being
stable enough to be enabled by default.

All this said, we're still not able to enable all ratified extensions
due to conflicts between them. Zfinx and all its dependencies aren't
enabled because of a conflict with RVF. zce, zcmp and zcmt are also
disabled due to RVD conflicts. When running with 64 bits we're also
disabling zcf.

Signed-off-by: Daniel Henrique Barboza

This seems like it will be super helpful for CI stuff etc, thanks for
doing it.

And Linux actually boots on it, which was remarkable to see. I was expecting
something
to blow up I guess.

This is the riscv,isa DT generated:

# cat /proc/device-tree/cpus/cpu@0/riscv,isa
rv64imafdch_zicbom_zicboz_zicsr_zifencei_zihintpause_zawrs_zfa_zfh_zfhmin_zca_zcb_zcd_
zba_zbb_zbc_zbkb_zbkc_zbkx_zbs_zk_zkn_zknd_zkne_zknh_zkr_zks_zksed_zksh_zkt_
zve32f_zve64f_zve64d_smstateen_sscofpmf_sstc_svadu_svinval_svnapot_svpbmt#

Of which an upstream Linux kernel, building using something close to
defconfig, accepts only
rv64imafdch_zicbom_zicboz_zicntr_zicsr_zifencei_zihintpause_zihpm_zba_zbb_zbs_sscofpmf_sstc_svinval_svnapot_svpbmt
so the set of possible things that break could break it has been reduced
somewhat.

btw, I noticed that the default marchid/mimpid have changed. Previously I
used to see something like:
processor : 15
hart: 15
isa :
rv64imafdcvh_zicbom_zicboz_zicntr_zicsr_zifencei_zihintpause_zihpm_zba_zbb_zbs_sscofpmf_sstc
mmu : sv57
mvendorid : 0x0
marchid : 0x80032
mimpid : 0x80032
in /proc/cpuinfo, but "now" I see 0x0 for marchid & mimpid. Is this
change to the default behaviour intentional? I saw "now" in "s because
I applied your patches on top of Alistair's next branch, which contains
the changes to m*id stuff.

It is intentional. Those default marchid/mimpid vals were derived from the
current
QEMU version ID/build and didn't mean much.

It is still possible to set them via "-cpu rv64,marchid=N,mimpid=N" if needed
when
using the generic (rv64,rv32) CPUs. Vendor CPUs can't have their machine IDs
changed
via command line.

Thanks,

Daniel

Cheers,
Conor.

Re: [PATCH for-8.2 6/7] target/riscv: add 'max' CPU type

On Wed, Jul 12, 2023 at 05:30:41PM -0300, Daniel Henrique Barboza wrote:
> On 7/12/23 16:22, Conor Dooley wrote:
> > On Wed, Jul 12, 2023 at 04:01:48PM -0300, Daniel Henrique Barboza wrote:
> > > The 'max' CPU type is used by tooling to determine what's the most
> > > capable CPU a current QEMU version implements. Other archs such as ARM
> > > implements this type. Let's add it to RISC-V.
> > > 
> > > What we consider "most capable CPU" in this context are related to
> > > ratified, non-vendor extensions. This means that we want the 'max' CPU
> > > to enable all (possible) ratified extensions by default. The reasoning
> > > behind this design is (1) vendor extensions can conflict with each other
> > > and we won't play favorities deciding which one is default or not and
> > > (2) non-ratified extensions are always prone to changes, not being
> > > stable enough to be enabled by default.
> > > 
> > > All this said, we're still not able to enable all ratified extensions
> > > due to conflicts between them. Zfinx and all its dependencies aren't
> > > enabled because of a conflict with RVF. zce, zcmp and zcmt are also
> > > disabled due to RVD conflicts. When running with 64 bits we're also
> > > disabling zcf.
> > > 
> > > Signed-off-by: Daniel Henrique Barboza 
> > 
> > This seems like it will be super helpful for CI stuff etc, thanks for
> > doing it.
> 
> And Linux actually boots on it, which was remarkable to see. I was expecting 
> something
> to blow up I guess.
> 
> This is the riscv,isa DT generated:
> 
> # cat /proc/device-tree/cpus/cpu@0/riscv,isa
> rv64imafdch_zicbom_zicboz_zicsr_zifencei_zihintpause_zawrs_zfa_zfh_zfhmin_zca_zcb_zcd_
> zba_zbb_zbc_zbkb_zbkc_zbkx_zbs_zk_zkn_zknd_zkne_zknh_zkr_zks_zksed_zksh_zkt_
> zve32f_zve64f_zve64d_smstateen_sscofpmf_sstc_svadu_svinval_svnapot_svpbmt#

Of which an upstream Linux kernel, building using something close to
defconfig, accepts only
rv64imafdch_zicbom_zicboz_zicntr_zicsr_zifencei_zihintpause_zihpm_zba_zbb_zbs_sscofpmf_sstc_svinval_svnapot_svpbmt
so the set of possible things that break could break it has been reduced
somewhat.

btw, I noticed that the default marchid/mimpid have changed. Previously I
used to see something like:
processor   : 15
hart: 15
isa : 
rv64imafdcvh_zicbom_zicboz_zicntr_zicsr_zifencei_zihintpause_zihpm_zba_zbb_zbs_sscofpmf_sstc
mmu : sv57
mvendorid   : 0x0
marchid : 0x80032
mimpid  : 0x80032
in /proc/cpuinfo, but "now" I see 0x0 for marchid & mimpid. Is this
change to the default behaviour intentional? I saw "now" in "s because
I applied your patches on top of Alistair's next branch, which contains
the changes to m*id stuff.

Cheers,
Conor.

signature.asc
Description: PGP signature

[PATCH for-8.2 v2 4/7] target/riscv/cpu.c: split non-ratified exts from riscv_cpu_extensions[]

Create a new riscv_cpu_experimental_exts[] to store the non-ratified
extensions properties. Once they are ratified we'll move them back to
riscv_cpu_extensions[].

Change riscv_cpu_add_user_properties to keep adding them to users.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 38 +++---
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9bbdc46126..c0826b449d 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1808,21 +1808,6 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false),
 DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false),
 
-/* These are experimental so mark with 'x-' */
-DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
-
-/* ePMP 0.9.3 */
-DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
-DEFINE_PROP_BOOL("x-smaia", RISCVCPU, cfg.ext_smaia, false),
-DEFINE_PROP_BOOL("x-ssaia", RISCVCPU, cfg.ext_ssaia, false),
-
-DEFINE_PROP_BOOL("x-zvfh", RISCVCPU, cfg.ext_zvfh, false),
-DEFINE_PROP_BOOL("x-zvfhmin", RISCVCPU, cfg.ext_zvfhmin, false),
-
-DEFINE_PROP_BOOL("x-zfbfmin", RISCVCPU, cfg.ext_zfbfmin, false),
-DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false),
-DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
-
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1843,6 +1828,25 @@ static Property riscv_cpu_vendor_exts[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+/* These are experimental so mark with 'x-' */
+static Property riscv_cpu_experimental_exts[] = {
+DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
+
+/* ePMP 0.9.3 */
+DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
+DEFINE_PROP_BOOL("x-smaia", RISCVCPU, cfg.ext_smaia, false),
+DEFINE_PROP_BOOL("x-ssaia", RISCVCPU, cfg.ext_ssaia, false),
+
+DEFINE_PROP_BOOL("x-zvfh", RISCVCPU, cfg.ext_zvfh, false),
+DEFINE_PROP_BOOL("x-zvfhmin", RISCVCPU, cfg.ext_zvfhmin, false),
+
+DEFINE_PROP_BOOL("x-zfbfmin", RISCVCPU, cfg.ext_zfbfmin, false),
+DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false),
+DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
+
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static Property riscv_cpu_options[] = {
 DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 
@@ -1927,6 +1931,10 @@ static void riscv_cpu_add_user_properties(Object *obj)
 for (prop = riscv_cpu_vendor_exts; prop && prop->name; prop++) {
 qdev_property_add_static(dev, prop);
 }
+
+for (prop = riscv_cpu_experimental_exts; prop && prop->name; prop++) {
+qdev_property_add_static(dev, prop);
+}
 }
 
 static Property riscv_cpu_properties[] = {
-- 
2.41.0

[PATCH for-8.2 v2 7/7] avocado, risc-v: add opensbi tests for 'max' CPU

Add smoke tests to ensure that we'll not break the 'max' CPU type when
adding new ratified extensions to be enabled.

Signed-off-by: Daniel Henrique Barboza 
---
 tests/avocado/riscv_opensbi.py | 16 
 1 file changed, 16 insertions(+)

diff --git a/tests/avocado/riscv_opensbi.py b/tests/avocado/riscv_opensbi.py
index bfff9cc3c3..15fd57fe51 100644
--- a/tests/avocado/riscv_opensbi.py
+++ b/tests/avocado/riscv_opensbi.py
@@ -61,3 +61,19 @@ def test_riscv64_virt(self):
 :avocado: tags=machine:virt
 """
 self.boot_opensbi()
+
+def test_riscv32_virt_maxcpu(self):
+"""
+:avocado: tags=arch:riscv32
+:avocado: tags=machine:virt
+:avocado: tags=cpu:max
+"""
+self.boot_opensbi()
+
+def test_riscv64_virt_maxcpu(self):
+"""
+:avocado: tags=arch:riscv64
+:avocado: tags=machine:virt
+:avocado: tags=cpu:max
+"""
+self.boot_opensbi()
-- 
2.41.0

[PATCH for-8.2 v2 0/7] target/riscv: add 'max' CPU type

Hi,

This second version has smalls tweak in patch 6 that I found out
missing while chatting with Conor in the v1 review.

Changes from v1:
- patch 6:
  - enable RVG, RVJ and RVV in riscv_init_max_cpu_extensions()
  - Added the resulting 'riscv,isa' DT in the commit message
- v1 link: 
https://lore.kernel.org/qemu-riscv/20230712190149.424675-1-dbarb...@ventanamicro.com/T/#t


Daniel Henrique Barboza (7):
  target/riscv/cpu.c: split CPU options from riscv_cpu_extensions[]
  target/riscv/cpu.c: skip 'bool' check when filtering KVM props
  target/riscv/cpu.c: split vendor exts from riscv_cpu_extensions[]
  target/riscv/cpu.c: split non-ratified exts from
riscv_cpu_extensions[]
  target/riscv/cpu.c: add a ADD_CPU_PROPERTIES_ARRAY() macro
  target/riscv: add 'max' CPU type
  avocado, risc-v: add opensbi tests for 'max' CPU

 target/riscv/cpu-qom.h |   1 +
 target/riscv/cpu.c | 109 +++--
 tests/avocado/riscv_opensbi.py |  16 +
 3 files changed, 106 insertions(+), 20 deletions(-)

-- 
2.41.0

[PATCH for-8.2 v2 5/7] target/riscv/cpu.c: add a ADD_CPU_PROPERTIES_ARRAY() macro

The code inside riscv_cpu_add_user_properties() became quite repetitive
after recent changes. Add a macro to hide the repetition away.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index c0826b449d..b61465c8c4 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1881,6 +1881,11 @@ static void cpu_set_cfg_unavailable(Object *obj, Visitor 
*v,
 }
 #endif
 
+#define ADD_CPU_PROPERTIES_ARRAY(_dev, _array) \
+for (prop = _array; prop && prop->name; prop++) { \
+qdev_property_add_static(_dev, prop); \
+} \
+
 /*
  * Add CPU properties with user-facing flags.
  *
@@ -1924,17 +1929,9 @@ static void riscv_cpu_add_user_properties(Object *obj)
 qdev_property_add_static(dev, prop);
 }
 
-for (prop = riscv_cpu_options; prop && prop->name; prop++) {
-qdev_property_add_static(dev, prop);
-}
-
-for (prop = riscv_cpu_vendor_exts; prop && prop->name; prop++) {
-qdev_property_add_static(dev, prop);
-}
-
-for (prop = riscv_cpu_experimental_exts; prop && prop->name; prop++) {
-qdev_property_add_static(dev, prop);
-}
+ADD_CPU_PROPERTIES_ARRAY(dev, riscv_cpu_options);
+ADD_CPU_PROPERTIES_ARRAY(dev, riscv_cpu_vendor_exts);
+ADD_CPU_PROPERTIES_ARRAY(dev, riscv_cpu_experimental_exts);
 }
 
 static Property riscv_cpu_properties[] = {
-- 
2.41.0

[PATCH for-8.2 v2 6/7] target/riscv: add 'max' CPU type

The 'max' CPU type is used by tooling to determine what's the most
capable CPU a current QEMU version implements. Other archs such as ARM
implements this type. Let's add it to RISC-V.

What we consider "most capable CPU" in this context are related to
ratified, non-vendor extensions. This means that we want the 'max' CPU
to enable all (possible) ratified extensions by default. The reasoning
behind this design is (1) vendor extensions can conflict with each other
and we won't play favorities deciding which one is default or not and
(2) non-ratified extensions are always prone to changes, not being
stable enough to be enabled by default.

All this said, we're still not able to enable all ratified extensions
due to conflicts between them. Zfinx and all its dependencies aren't
enabled because of a conflict with RVF. zce, zcmp and zcmt are also
disabled due to RVD conflicts. When running with 64 bits we're also
disabling zcf.

MISA bits RVG, RVJ and RVV are also being set manually since they're
default disabled.

This is the resulting 'riscv,isa' DT for this new CPU:

rv64imafdcvh_zicbom_zicboz_zicsr_zifencei_zihintpause_zawrs_zfa_
zfh_zfhmin_zca_zcb_zcd_zba_zbb_zbc_zbkb_zbkc_zbkx_zbs_zk_zkn_zknd_
zkne_zknh_zkr_zks_zksed_zksh_zkt_zve32f_zve64f_zve64d_
smstateen_sscofpmf_sstc_svadu_svinval_svnapot_svpbmt

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu-qom.h |  1 +
 target/riscv/cpu.c | 53 ++
 2 files changed, 54 insertions(+)

diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index 04af50983e..f3fbe37a2c 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -30,6 +30,7 @@
 #define CPU_RESOLVING_TYPE TYPE_RISCV_CPU
 
 #define TYPE_RISCV_CPU_ANY  RISCV_CPU_TYPE_NAME("any")
+#define TYPE_RISCV_CPU_MAX  RISCV_CPU_TYPE_NAME("max")
 #define TYPE_RISCV_CPU_BASE32   RISCV_CPU_TYPE_NAME("rv32")
 #define TYPE_RISCV_CPU_BASE64   RISCV_CPU_TYPE_NAME("rv64")
 #define TYPE_RISCV_CPU_BASE128  RISCV_CPU_TYPE_NAME("x-rv128")
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index b61465c8c4..5172566cda 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -248,6 +248,7 @@ static const char * const riscv_intr_names[] = {
 };
 
 static void riscv_cpu_add_user_properties(Object *obj);
+static void riscv_init_max_cpu_extensions(Object *obj);
 
 const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
 {
@@ -374,6 +375,25 @@ static void riscv_any_cpu_init(Object *obj)
 cpu->cfg.pmp = true;
 }
 
+static void riscv_max_cpu_init(Object *obj)
+{
+RISCVCPU *cpu = RISCV_CPU(obj);
+CPURISCVState *env = &cpu->env;
+RISCVMXL mlx = MXL_RV64;
+
+#ifdef TARGET_RISCV32
+mlx = MXL_RV32;
+#endif
+set_misa(env, mlx, 0);
+riscv_cpu_add_user_properties(obj);
+riscv_init_max_cpu_extensions(obj);
+env->priv_ver = PRIV_VERSION_LATEST;
+#ifndef CONFIG_USER_ONLY
+set_satp_mode_max_supported(RISCV_CPU(obj), mlx == MXL_RV32 ?
+VM_1_10_SV32 : VM_1_10_SV57);
+#endif
+}
+
 #if defined(TARGET_RISCV64)
 static void rv64_base_cpu_init(Object *obj)
 {
@@ -1934,6 +1954,38 @@ static void riscv_cpu_add_user_properties(Object *obj)
 ADD_CPU_PROPERTIES_ARRAY(dev, riscv_cpu_experimental_exts);
 }
 
+/*
+ * The 'max' type CPU will have all possible ratified
+ * non-vendor extensions enabled.
+ */
+static void riscv_init_max_cpu_extensions(Object *obj)
+{
+RISCVCPU *cpu = RISCV_CPU(obj);
+CPURISCVState *env = &cpu->env;
+Property *prop;
+
+/* Enable RVG, RVJ and RVV that are disabled by default */
+set_misa(env, env->misa_mxl, env->misa_ext | RVG | RVJ | RVV);
+
+for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
+object_property_set_bool(obj, prop->name, true, NULL);
+}
+
+/* Zfinx is not compatible with F. Disable it */
+object_property_set_bool(obj, "zfinx", false, NULL);
+object_property_set_bool(obj, "zdinx", false, NULL);
+object_property_set_bool(obj, "zhinx", false, NULL);
+object_property_set_bool(obj, "zhinxmin", false, NULL);
+
+object_property_set_bool(obj, "zce", false, NULL);
+object_property_set_bool(obj, "zcmp", false, NULL);
+object_property_set_bool(obj, "zcmt", false, NULL);
+
+if (env->misa_mxl != MXL_RV32) {
+object_property_set_bool(obj, "zcf", false, NULL);
+}
+}
+
 static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),
 
@@ -2272,6 +2324,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 .abstract = true,
 },
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_ANY,  riscv_any_cpu_init),
+DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX,  riscv_max_cpu_init),
 #if defined(CONFIG_KVM)
 DEFINE_CPU(TYPE_RISCV_CPU_HOST, riscv_host_cpu_init),
 #endif
-- 
2.41.0

[PATCH for-8.2 v2 1/7] target/riscv/cpu.c: split CPU options from riscv_cpu_extensions[]

We'll add a new CPU type that will enable a considerable amount of
extensions. To make it easier for us we'll do a few cleanups in our
existing riscv_cpu_extensions[] array.

Start by splitting all CPU non-boolean options from it. Create a new
riscv_cpu_options[] array for them. Add all these properties in
riscv_cpu_add_user_properties() as it is already being done today.

No functional changes made.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9339c0241d..cdf9eeeb6b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1751,7 +1751,6 @@ static void riscv_cpu_add_misa_properties(Object *cpu_obj)
 
 static Property riscv_cpu_extensions[] = {
 /* Defaults for standard extensions */
-DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
 DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
@@ -1767,11 +1766,6 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
 DEFINE_PROP_BOOL("sstc", RISCVCPU, cfg.ext_sstc, true),
 
-DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
-DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
-DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
-DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
-
 DEFINE_PROP_BOOL("smstateen", RISCVCPU, cfg.ext_smstateen, false),
 DEFINE_PROP_BOOL("svadu", RISCVCPU, cfg.ext_svadu, true),
 DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false),
@@ -1802,9 +1796,7 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
 
 DEFINE_PROP_BOOL("zicbom", RISCVCPU, cfg.ext_icbom, true),
-DEFINE_PROP_UINT16("cbom_blocksize", RISCVCPU, cfg.cbom_blocksize, 64),
 DEFINE_PROP_BOOL("zicboz", RISCVCPU, cfg.ext_icboz, true),
-DEFINE_PROP_UINT16("cboz_blocksize", RISCVCPU, cfg.cboz_blocksize, 64),
 
 DEFINE_PROP_BOOL("zmmul", RISCVCPU, cfg.ext_zmmul, false),
 
@@ -1848,6 +1840,20 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+static Property riscv_cpu_options[] = {
+DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
+
+DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
+DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
+
+DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
+DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
+
+DEFINE_PROP_UINT16("cbom_blocksize", RISCVCPU, cfg.cbom_blocksize, 64),
+DEFINE_PROP_UINT16("cboz_blocksize", RISCVCPU, cfg.cboz_blocksize, 64),
+
+DEFINE_PROP_END_OF_LIST(),
+};
 
 #ifndef CONFIG_USER_ONLY
 static void cpu_set_cfg_unavailable(Object *obj, Visitor *v,
@@ -1916,6 +1922,11 @@ static void riscv_cpu_add_user_properties(Object *obj)
 #endif
 qdev_property_add_static(dev, prop);
 }
+
+for (prop = riscv_cpu_options; prop && prop->name; prop++) {
+qdev_property_add_static(dev, prop);
+}
+
 }
 
 static Property riscv_cpu_properties[] = {
-- 
2.41.0

[PATCH for-8.2 v2 3/7] target/riscv/cpu.c: split vendor exts from riscv_cpu_extensions[]

Our goal is to make riscv_cpu_extensions[] hold only ratified,
non-vendor extensions.

Create a new riscv_cpu_vendor_exts[] array for them, changing
riscv_cpu_add_user_properties() accordingly.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 34 --
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 735e0ed793..9bbdc46126 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1808,20 +1808,6 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false),
 DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false),
 
-/* Vendor-specific custom extensions */
-DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false),
-DEFINE_PROP_BOOL("xtheadbb", RISCVCPU, cfg.ext_xtheadbb, false),
-DEFINE_PROP_BOOL("xtheadbs", RISCVCPU, cfg.ext_xtheadbs, false),
-DEFINE_PROP_BOOL("xtheadcmo", RISCVCPU, cfg.ext_xtheadcmo, false),
-DEFINE_PROP_BOOL("xtheadcondmov", RISCVCPU, cfg.ext_xtheadcondmov, false),
-DEFINE_PROP_BOOL("xtheadfmemidx", RISCVCPU, cfg.ext_xtheadfmemidx, false),
-DEFINE_PROP_BOOL("xtheadfmv", RISCVCPU, cfg.ext_xtheadfmv, false),
-DEFINE_PROP_BOOL("xtheadmac", RISCVCPU, cfg.ext_xtheadmac, false),
-DEFINE_PROP_BOOL("xtheadmemidx", RISCVCPU, cfg.ext_xtheadmemidx, false),
-DEFINE_PROP_BOOL("xtheadmempair", RISCVCPU, cfg.ext_xtheadmempair, false),
-DEFINE_PROP_BOOL("xtheadsync", RISCVCPU, cfg.ext_xtheadsync, false),
-DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, 
false),
-
 /* These are experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
 
@@ -1840,6 +1826,23 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+static Property riscv_cpu_vendor_exts[] = {
+DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false),
+DEFINE_PROP_BOOL("xtheadbb", RISCVCPU, cfg.ext_xtheadbb, false),
+DEFINE_PROP_BOOL("xtheadbs", RISCVCPU, cfg.ext_xtheadbs, false),
+DEFINE_PROP_BOOL("xtheadcmo", RISCVCPU, cfg.ext_xtheadcmo, false),
+DEFINE_PROP_BOOL("xtheadcondmov", RISCVCPU, cfg.ext_xtheadcondmov, false),
+DEFINE_PROP_BOOL("xtheadfmemidx", RISCVCPU, cfg.ext_xtheadfmemidx, false),
+DEFINE_PROP_BOOL("xtheadfmv", RISCVCPU, cfg.ext_xtheadfmv, false),
+DEFINE_PROP_BOOL("xtheadmac", RISCVCPU, cfg.ext_xtheadmac, false),
+DEFINE_PROP_BOOL("xtheadmemidx", RISCVCPU, cfg.ext_xtheadmemidx, false),
+DEFINE_PROP_BOOL("xtheadmempair", RISCVCPU, cfg.ext_xtheadmempair, false),
+DEFINE_PROP_BOOL("xtheadsync", RISCVCPU, cfg.ext_xtheadsync, false),
+DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, 
false),
+
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static Property riscv_cpu_options[] = {
 DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 
@@ -1921,6 +1924,9 @@ static void riscv_cpu_add_user_properties(Object *obj)
 qdev_property_add_static(dev, prop);
 }
 
+for (prop = riscv_cpu_vendor_exts; prop && prop->name; prop++) {
+qdev_property_add_static(dev, prop);
+}
 }
 
 static Property riscv_cpu_properties[] = {
-- 
2.41.0

[PATCH for-8.2 v2 2/7] target/riscv/cpu.c: skip 'bool' check when filtering KVM props

After the introduction of riscv_cpu_options[] all properties in
riscv_cpu_extensions[] are booleans. This check is now obsolete.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index cdf9eeeb6b..735e0ed793 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1907,17 +1907,11 @@ static void riscv_cpu_add_user_properties(Object *obj)
  * Set the default to disabled for every extension
  * unknown to KVM and error out if the user attempts
  * to enable any of them.
- *
- * We're giving a pass for non-bool properties since they're
- * not related to the availability of extensions and can be
- * safely ignored as is.
  */
-if (prop->info == &qdev_prop_bool) {
-object_property_add(obj, prop->name, "bool",
-NULL, cpu_set_cfg_unavailable,
-NULL, (void *)prop->name);
-continue;
-}
+object_property_add(obj, prop->name, "bool",
+NULL, cpu_set_cfg_unavailable,
+NULL, (void *)prop->name);
+continue;
 }
 #endif
 qdev_property_add_static(dev, prop);
-- 
2.41.0

Re: [PATCH] linux-user: make sure brk(0) returns a page-aligned value

2023-07-12 Thread Helge Deller


On 7/8/23 23:36, Helge Deller wrote:

On 7/8/23 19:26, Richard Henderson wrote:

On 7/6/23 12:34, Andreas Schwab wrote:

Fixes: 86f04735ac ("linux-user: Fix brk() to release pages")
Signed-off-by: Andreas Schwab 
---
  linux-user/syscall.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 08162cc966..e8a17377f5 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -805,7 +805,7 @@ static abi_ulong brk_page;
  void target_set_brk(abi_ulong new_brk)
  {
-    target_brk = new_brk;
+    target_brk = TARGET_PAGE_ALIGN(new_brk);
  brk_page = HOST_PAGE_ALIGN(target_brk);
  }


It make sense, since that's how do_brk aligns things.


Yes, patch looks good.
I haven't tested, but it seems it adjusts the initial brk(0) value
only to make sure that it's target page aligned.
Maybe the title should be: ?
linux-user: make sure the initial brk(0) is page-aligned


Another bug report regarding the non-aligned brk()...

See:
https://github.com/upx/upx/issues/683
https://gitlab.com/qemu-project/qemu/-/issues/1756

Helge

Re: [PATCH for-8.2 6/7] target/riscv: add 'max' CPU type





On 7/12/23 16:22, Conor Dooley wrote:

On Wed, Jul 12, 2023 at 04:01:48PM -0300, Daniel Henrique Barboza wrote:

The 'max' CPU type is used by tooling to determine what's the most
capable CPU a current QEMU version implements. Other archs such as ARM
implements this type. Let's add it to RISC-V.

What we consider "most capable CPU" in this context are related to
ratified, non-vendor extensions. This means that we want the 'max' CPU
to enable all (possible) ratified extensions by default. The reasoning
behind this design is (1) vendor extensions can conflict with each other
and we won't play favorities deciding which one is default or not and
(2) non-ratified extensions are always prone to changes, not being
stable enough to be enabled by default.

All this said, we're still not able to enable all ratified extensions
due to conflicts between them. Zfinx and all its dependencies aren't
enabled because of a conflict with RVF. zce, zcmp and zcmt are also
disabled due to RVD conflicts. When running with 64 bits we're also
disabling zcf.

Signed-off-by: Daniel Henrique Barboza 


This seems like it will be super helpful for CI stuff etc, thanks for
doing it.


And Linux actually boots on it, which was remarkable to see. I was expecting 
something
to blow up I guess.

This is the riscv,isa DT generated:

# cat /proc/device-tree/cpus/cpu@0/riscv,isa
rv64imafdch_zicbom_zicboz_zicsr_zifencei_zihintpause_zawrs_zfa_zfh_zfhmin_zca_zcb_zcd_
zba_zbb_zbc_zbkb_zbkc_zbkx_zbs_zk_zkn_zknd_zkne_zknh_zkr_zks_zksed_zksh_zkt_
zve32f_zve64f_zve64d_smstateen_sscofpmf_sstc_svadu_svinval_svnapot_svpbmt#


I'll put this in the commit message for the next version.

Oh, and I just realized that I forgot to light up all the MISA bits (we're 
missing
RVV). Guess I'll have to send the v2 right away.


Thanks,


Daniel

Re: [PATCH 03/10] hw/riscv: virt: Make few IMSIC macros and functions public





On 7/12/23 13:39, Sunil V L wrote:

Some macros and static function related to IMSIC are defined
in virt.c. They are required in virt-acpi-build.c. So, make them
public.

Signed-off-by: Sunil V L 
---


Reviewed-by: Daniel Henrique Barboza 


  hw/riscv/virt.c | 25 +
  include/hw/riscv/virt.h | 25 +
  2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 46d3341113..f6067db8ec 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -37,7 +37,6 @@
  #include "hw/riscv/numa.h"
  #include "hw/intc/riscv_aclint.h"
  #include "hw/intc/riscv_aplic.h"
-#include "hw/intc/riscv_imsic.h"
  #include "hw/intc/sifive_plic.h"
  #include "hw/misc/sifive_test.h"
  #include "hw/platform-bus.h"
@@ -53,28 +52,6 @@
  #include "hw/acpi/aml-build.h"
  #include "qapi/qapi-visit-common.h"
  
-/*

- * The virt machine physical address space used by some of the devices
- * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets,
- * number of CPUs, and number of IMSIC guest files.
- *
- * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS,
- * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization
- * of virt machine physical address space.
- */
-
-#define VIRT_IMSIC_GROUP_MAX_SIZE  (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
-#if VIRT_IMSIC_GROUP_MAX_SIZE < \
-IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS)
-#error "Can't accomodate single IMSIC group in address space"
-#endif
-
-#define VIRT_IMSIC_MAX_SIZE(VIRT_SOCKETS_MAX * \
-VIRT_IMSIC_GROUP_MAX_SIZE)
-#if 0x400 < VIRT_IMSIC_MAX_SIZE
-#error "Can't accomodate all IMSIC groups in address space"
-#endif
-
  static const MemMapEntry virt_memmap[] = {
  [VIRT_DEBUG] ={0x0, 0x100 },
  [VIRT_MROM] = { 0x1000,0xf000 },
@@ -505,7 +482,7 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
  g_free(plic_cells);
  }
  
-static uint32_t imsic_num_bits(uint32_t count)

+uint32_t imsic_num_bits(uint32_t count)
  {
  uint32_t ret = 0;
  
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h

index 4ef1f660ab..00c22492a7 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -23,6 +23,7 @@
  #include "hw/riscv/riscv_hart.h"
  #include "hw/sysbus.h"
  #include "hw/block/flash.h"
+#include "hw/intc/riscv_imsic.h"
  
  #define VIRT_CPUS_MAX_BITS 9

  #define VIRT_CPUS_MAX  (1 << VIRT_CPUS_MAX_BITS)
@@ -128,4 +129,28 @@ enum {
  
  bool virt_is_acpi_enabled(RISCVVirtState *s);

  void virt_acpi_setup(RISCVVirtState *vms);
+uint32_t imsic_num_bits(uint32_t count);
+
+/*
+ * The virt machine physical address space used by some of the devices
+ * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets,
+ * number of CPUs, and number of IMSIC guest files.
+ *
+ * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS,
+ * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization
+ * of virt machine physical address space.
+ */
+
+#define VIRT_IMSIC_GROUP_MAX_SIZE  (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
+#if VIRT_IMSIC_GROUP_MAX_SIZE < \
+IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS)
+#error "Can't accomodate single IMSIC group in address space"
+#endif
+
+#define VIRT_IMSIC_MAX_SIZE(VIRT_SOCKETS_MAX * \
+VIRT_IMSIC_GROUP_MAX_SIZE)
+#if 0x400 < VIRT_IMSIC_MAX_SIZE
+#error "Can't accomodate all IMSIC groups in address space"
+#endif
+
  #endif

Re: [PATCH 02/10] hw/riscv: virt: Add PCI bus reference in RISCVVirtState





On 7/12/23 13:39, Sunil V L wrote:

The PCI bus information is needed in RISCVVirtState so that other
files like virt-acpi-build.c can make use of it. Add new field in
RISCVVirtState so that ACPI code can use it.

Signed-off-by: Sunil V L 
---
  hw/riscv/virt.c | 6 --
  include/hw/riscv/virt.h | 1 +
  2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index d90286dc46..46d3341113 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1073,7 +1073,8 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion 
*sys_mem,
hwaddr high_mmio_base,
hwaddr high_mmio_size,
hwaddr pio_base,
-  DeviceState *irqchip)
+  DeviceState *irqchip,
+  RISCVVirtState *s)
  {
  DeviceState *dev;
  MemoryRegion *ecam_alias, *ecam_reg;
@@ -1113,6 +1114,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion 
*sys_mem,
  gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i);
  }
  
+s->bus = PCI_HOST_BRIDGE(dev)->bus;

  return dev;
  }
  
@@ -1502,7 +1504,7 @@ static void virt_machine_init(MachineState *machine)

 virt_high_pcie_memmap.base,
 virt_high_pcie_memmap.size,
 memmap[VIRT_PCIE_PIO].base,
-   pcie_irqchip);
+   pcie_irqchip, s);


I wonder whether we could use 's' inside gpex_pcie_init() to avoid passing all
this memmap stuff to the function. It seems that most, if not all these values,
can be derived from s->memmap[]. A work for another day perhaps.


Reviewed-by: Daniel Henrique Barboza 


  
  create_platform_bus(s, mmio_irqchip);
  
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h

index e5c474b26e..4ef1f660ab 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -60,6 +60,7 @@ struct RISCVVirtState {
  char *oem_table_id;
  OnOffAuto acpi;
  const MemMapEntry *memmap;
+PCIBus *bus;
  };
  
  enum {

Re: [PATCH v21 03/20] target/s390x/cpu topology: handle STSI(15) and build the SYSIB

2023-07-12 Thread Thomas Huth


On 12/07/2023 16.24, Pierre Morel wrote:


On 7/4/23 13:40, Thomas Huth wrote:

On 30/06/2023 11.17, Pierre Morel wrote:

On interception of STSI(15.1.x) the System Information Block
(SYSIB) is built from the list of pre-ordered topology entries.

Signed-off-by: Pierre Morel 
---

...

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7ebd5e05b6..6e7d041b01 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -569,6 +569,29 @@ typedef struct SysIB_322 {
  } SysIB_322;
  QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
  +/*
+ * Topology Magnitude fields (MAG) indicates the maximum number of
+ * topology list entries (TLE) at the corresponding nesting level.
+ */
+#define S390_TOPOLOGY_MAG  6
+#define S390_TOPOLOGY_MAG6 0
+#define S390_TOPOLOGY_MAG5 1
+#define S390_TOPOLOGY_MAG4 2
+#define S390_TOPOLOGY_MAG3 3
+#define S390_TOPOLOGY_MAG2 4
+#define S390_TOPOLOGY_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+    uint8_t  reserved0[2];
+    uint16_t length;
+    uint8_t  mag[S390_TOPOLOGY_MAG];
+    uint8_t  reserved1;
+    uint8_t  mnest;
+    uint32_t reserved2;
+    char tle[];
+} SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
  typedef union SysIB {
  SysIB_111 sysib_111;
  SysIB_121 sysib_121;
@@ -576,9 +599,62 @@ typedef union SysIB {
  SysIB_221 sysib_221;
  SysIB_222 sysib_222;
  SysIB_322 sysib_322;
+    SysIB_151x sysib_151x;
  } SysIB;
  QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
  +/*
+ * CPU Topology List provided by STSI with fc=15 provides a list
+ * of two different Topology List Entries (TLE) types to specify
+ * the topology hierarchy.
+ *
+ * - Container Topology List Entry
+ *   Defines a container to contain other Topology List Entries
+ *   of any type, nested containers or CPU.
+ * - CPU Topology List Entry
+ *   Specifies the CPUs position, type, entitlement and polarization
+ *   of the CPUs contained in the last Container TLE.
+ *
+ * There can be theoretically up to five levels of containers, QEMU
+ * uses only three levels, the drawer's, book's and socket's level.
+ *
+ * A container with a nesting level (NL) greater than 1 can only
+ * contain another container of nesting level NL-1.
+ *
+ * A container of nesting level 1 (socket), contains as many CPU TLE
+ * as needed to describe the position and qualities of all CPUs inside
+ * the container.
+ * The qualities of a CPU are polarization, entitlement and type.
+ *
+ * The CPU TLE defines the position of the CPUs of identical qualities
+ * using a 64bits mask which first bit has its offset defined by
+ * the CPU address orgin field of the CPU TLE like in:
+ * CPU address = origin * 64 + bit position within the mask
+ *
+ */
+/* Container type Topology List Entry */
+typedef struct SysIBTl_container {
+    uint8_t nl;
+    uint8_t reserved[6];
+    uint8_t id;
+} SysIBTl_container;


Why mixing CamelCase with underscore-style here? SysIBTlContainer would 
look more natural, I think?



OK, what about SYSIBContainerListEntry ?


Sounds fine!






+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
+
+/* CPU type Topology List Entry */
+typedef struct SysIBTl_cpu {
+    uint8_t nl;
+    uint8_t reserved0[3];
+#define SYSIB_TLE_POLARITY_MASK 0x03
+#define SYSIB_TLE_DEDICATED 0x04
+    uint8_t flags;
+    uint8_t type;
+    uint16_t origin;
+    uint64_t mask;
+} SysIBTl_cpu;


dito, maybe better SysIBTlCpu ?



What about SysIBCPUListEntry ?


Ack.

 Thomas

Re: [PATCH v21 16/20] tests/avocado: s390x cpu topology entitlement tests

2023-07-12 Thread Thomas Huth


On 12/07/2023 21.37, Nina Schoetterl-Glausch wrote:

On Wed, 2023-07-05 at 12:22 +0200, Thomas Huth wrote:

On 30/06/2023 11.17, Pierre Morel wrote:

This test takes care to check the changes on different entitlements
when the guest requests a polarization change.

Signed-off-by: Pierre Morel 
---
   tests/avocado/s390_topology.py | 47
++
   1 file changed, 47 insertions(+)

diff --git a/tests/avocado/s390_topology.py
b/tests/avocado/s390_topology.py
index 2cf731cb1d..4855e5d7e4 100644
--- a/tests/avocado/s390_topology.py
+++ b/tests/avocado/s390_topology.py
@@ -240,3 +240,50 @@ def test_polarisation(self):
   res = self.vm.qmp('query-cpu-polarization')
   self.assertEqual(res['return']['polarization'],
'horizontal')
   self.check_topology(0, 0, 0, 0, 'medium', False)
+
+    def test_entitlement(self):
+    """
+    This test verifies that QEMU modifies the polarization
+    after a guest request.

...

+    self.check_topology(0, 0, 0, 0, 'low', False)
+    self.check_topology(1, 0, 0, 0, 'medium', False)
+    self.check_topology(2, 1, 0, 0, 'high', False)
+    self.check_topology(3, 1, 0, 0, 'high', False)
+
+    self.guest_set_dispatching('1');
+
+    self.check_topology(0, 0, 0, 0, 'low', False)
+    self.check_topology(1, 0, 0, 0, 'medium', False)
+    self.check_topology(2, 1, 0, 0, 'high', False)
+    self.check_topology(3, 1, 0, 0, 'high', False)
+
+    self.guest_set_dispatching('0');
+
+    self.check_topology(0, 0, 0, 0, 'low', False)
+    self.check_topology(1, 0, 0, 0, 'medium', False)
+    self.check_topology(2, 1, 0, 0, 'high', False)
+    self.check_topology(3, 1, 0, 0, 'high', False)


Sorry, I think I'm too blind to see it, but what has changed after
the guest
changed the polarization?


Nothing, the values are retained, they're just not active.
The guest will see a horizontal polarization until it changes back to
vertical.


But then the comment in front of it ("This test verifies that QEMU 
*modifies* the polarization...") does not quite match, does it?


 Thomas

Re: [PATCH v21 14/20] tests/avocado: s390x cpu topology core

2023-07-12 Thread Nina Schoetterl-Glausch

On Fri, 2023-06-30 at 11:17 +0200, Pierre Morel wrote:
> Introduction of the s390x cpu topology core functions and
> basic tests.
> 
> We test the corelation between the command line and

corRelation

> the QMP results in query-cpus-fast for various CPU topology.
> 
> Signed-off-by: Pierre Morel 

Reviewed-by: Nina Schoetterl-Glausch 
> ---

[...]

> diff --git a/tests/avocado/s390_topology.py
> b/tests/avocado/s390_topology.py
> new file mode 100644
> index 00..1758ec1f13
> --- /dev/null
> +++ b/tests/avocado/s390_topology.py
> @@ -0,0 +1,196 @@

[...]

> +class S390CPUTopology(QemuSystemTest):
> +    """
> +    S390x CPU topology consist of 4 topology layers, from bottom to

consistS

> top,
> +    the cores, sockets, books and drawers and 2 modifiers
> attributes,
> +    the entitlement and the dedication.
> +    See: docs/system/s390x/cpu-topology.rst.
> +
> +    S390x CPU topology is setup in different ways:
> +    - implicitely from the '-smp' argument by completing each
> topology
> +  level one after the other begining with drawer 0, book 0 and
> socket 0.
> +    - explicitely from the '-device' argument on the QEMU command
> line
> +    - explicitely by hotplug of a new CPU using QMP or HMP
> +    - it is modified by using QMP 'set-cpu-topology'
> +
> +    The S390x modifier attribute entitlement depends on the machine
> +    polarization, which can be horizontal or vertical.
> +    The polarization is changed on a request from the guest.
> +    """

[...]

Re: [PATCH v21 16/20] tests/avocado: s390x cpu topology entitlement tests

2023-07-12 Thread Nina Schoetterl-Glausch

On Wed, 2023-07-05 at 12:22 +0200, Thomas Huth wrote:
> On 30/06/2023 11.17, Pierre Morel wrote:
> > This test takes care to check the changes on different entitlements
> > when the guest requests a polarization change.
> > 
> > Signed-off-by: Pierre Morel 
> > ---
> >   tests/avocado/s390_topology.py | 47
> > ++
> >   1 file changed, 47 insertions(+)
> > 
> > diff --git a/tests/avocado/s390_topology.py
> > b/tests/avocado/s390_topology.py
> > index 2cf731cb1d..4855e5d7e4 100644
> > --- a/tests/avocado/s390_topology.py
> > +++ b/tests/avocado/s390_topology.py
> > @@ -240,3 +240,50 @@ def test_polarisation(self):
> >   res = self.vm.qmp('query-cpu-polarization')
> >   self.assertEqual(res['return']['polarization'],
> > 'horizontal')
> >   self.check_topology(0, 0, 0, 0, 'medium', False)
> > +
> > +    def test_entitlement(self):
> > +    """
> > +    This test verifies that QEMU modifies the polarization
> > +    after a guest request.
> ...
> > +    self.check_topology(0, 0, 0, 0, 'low', False)
> > +    self.check_topology(1, 0, 0, 0, 'medium', False)
> > +    self.check_topology(2, 1, 0, 0, 'high', False)
> > +    self.check_topology(3, 1, 0, 0, 'high', False)
> > +
> > +    self.guest_set_dispatching('1');
> > +
> > +    self.check_topology(0, 0, 0, 0, 'low', False)
> > +    self.check_topology(1, 0, 0, 0, 'medium', False)
> > +    self.check_topology(2, 1, 0, 0, 'high', False)
> > +    self.check_topology(3, 1, 0, 0, 'high', False)
> > +
> > +    self.guest_set_dispatching('0');
> > +
> > +    self.check_topology(0, 0, 0, 0, 'low', False)
> > +    self.check_topology(1, 0, 0, 0, 'medium', False)
> > +    self.check_topology(2, 1, 0, 0, 'high', False)
> > +    self.check_topology(3, 1, 0, 0, 'high', False)
> 
> Sorry, I think I'm too blind to see it, but what has changed after
> the guest 
> changed the polarization?

Nothing, the values are retained, they're just not active.
The guest will see a horizontal polarization until it changes back to
vertical.

> 
>   Thomas
>

[PULL 1/1] virtio-blk: fix host notifier issues during dataplane start/stop

The main loop thread can consume 100% CPU when using --device
virtio-blk-pci,iothread=. ppoll() constantly returns but
reading virtqueue host notifiers fails with EAGAIN. The file descriptors
are stale and remain registered with the AioContext because of bugs in
the virtio-blk dataplane start/stop code.

The problem is that the dataplane start/stop code involves drain
operations, which call virtio_blk_drained_begin() and
virtio_blk_drained_end() at points where the host notifier is not
operational:
- In virtio_blk_data_plane_start(), blk_set_aio_context() drains after
  vblk->dataplane_started has been set to true but the host notifier has
  not been attached yet.
- In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context()
  drain after the host notifier has already been detached but with
  vblk->dataplane_started still set to true.

I would like to simplify ->ioeventfd_start/stop() to avoid interactions
with drain entirely, but couldn't find a way to do that. Instead, this
patch accepts the fragile nature of the code and reorders it so that
vblk->dataplane_started is false during drain operations. This way the
virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't
touch the host notifier. The result is that
virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have
complete control over the host notifier and stale file descriptors are
no longer left in the AioContext.

This patch fixes the 100% CPU consumption in the main loop thread and
correctly moves host notifier processing to the IOThread.

Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()")
Reported-by: Lukáš Doktor 
Signed-off-by: Stefan Hajnoczi 
Tested-by: Lukas Doktor 
Message-id: 20230704151527.193586-1-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 hw/block/dataplane/virtio-blk.c | 67 +++--
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index c227b39408..da36fcfd0b 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -219,13 +219,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
 
 memory_region_transaction_commit();
 
-/*
- * These fields are visible to the IOThread so we rely on implicit barriers
- * in aio_context_acquire() on the write side and aio_notify_accept() on
- * the read side.
- */
-s->starting = false;
-vblk->dataplane_started = true;
 trace_virtio_blk_data_plane_start(s);
 
 old_context = blk_get_aio_context(s->conf->conf.blk);
@@ -244,6 +237,18 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
 event_notifier_set(virtio_queue_get_host_notifier(vq));
 }
 
+/*
+ * These fields must be visible to the IOThread when it processes the
+ * virtqueue, otherwise it will think dataplane has not started yet.
+ *
+ * Make sure ->dataplane_started is false when blk_set_aio_context() is
+ * called above so that draining does not cause the host notifier to be
+ * detached/attached prematurely.
+ */
+s->starting = false;
+vblk->dataplane_started = true;
+smp_wmb(); /* paired with aio_notify_accept() on the read side */
+
 /* Get this show started by hooking up our callbacks */
 if (!blk_in_drain(s->conf->conf.blk)) {
 aio_context_acquire(s->ctx);
@@ -273,7 +278,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
   fail_guest_notifiers:
 vblk->dataplane_disabled = true;
 s->starting = false;
-vblk->dataplane_started = true;
 return -ENOSYS;
 }
 
@@ -327,6 +331,32 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
 }
 
+/*
+ * Batch all the host notifiers in a single transaction to avoid
+ * quadratic time complexity in address_space_update_ioeventfds().
+ */
+memory_region_transaction_begin();
+
+for (i = 0; i < nvqs; i++) {
+virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
+}
+
+/*
+ * The transaction expects the ioeventfds to be open when it
+ * commits. Do it now, before the cleanup loop.
+ */
+memory_region_transaction_commit();
+
+for (i = 0; i < nvqs; i++) {
+virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
+}
+
+/*
+ * Set ->dataplane_started to false before draining so that host notifiers
+ * are not detached/attached anymore.
+ */
+vblk->dataplane_started = false;
+
 aio_context_acquire(s->ctx);
 
 /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
@@ -340,32 +370,11 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 
 aio_context_release(s->ctx);
 
-/*
- * Batch all the host notifiers in a single transaction to avoid
- * quadratic time complexity in address_space_update_ioeventfds().
- */
-memory_region_transaction_begin();
-
-for (i = 0;

[PULL 0/1] Block patches

The following changes since commit 887cba855bb6ff4775256f7968409281350b568c:

  configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100)

are available in the Git repository at:

  https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6:

  virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 
15:20:32 -0400)


Pull request



Stefan Hajnoczi (1):
  virtio-blk: fix host notifier issues during dataplane start/stop

 hw/block/dataplane/virtio-blk.c | 67 +++--
 1 file changed, 38 insertions(+), 29 deletions(-)

-- 
2.40.1

Re: [PATCH] block: Fix pad_request's request restriction

On Wed, 12 Jul 2023 at 10:51, Hanna Czenczek  wrote:
>
> On 12.07.23 16:15, Stefan Hajnoczi wrote:
> > On Wed, Jul 12, 2023 at 09:41:05AM +0200, Hanna Czenczek wrote:
> >> On 11.07.23 22:23, Stefan Hajnoczi wrote:
> >>> On Fri, Jun 09, 2023 at 10:33:16AM +0200, Hanna Czenczek wrote:
>  bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
>  which bdrv_check_qiov_request() does not guarantee.
> 
>  bdrv_check_request32() however will guarantee this, and both of
>  bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
>  bdrv_co_pwritev_part()) already run it before calling
>  bdrv_pad_request().  Therefore, bdrv_pad_request() can safely call
>  bdrv_check_request32() without expecting error, too.
> 
>  There is one difference between bdrv_check_qiov_request() and
>  bdrv_check_request32(): The former takes an errp, the latter does not,
>  so we can no longer just pass &error_abort.  Instead, we need to check
>  the returned value.  While we do expect success (because the callers
>  have already run this function), an assert(ret == 0) is not much simpler
>  than just to return an error if it occurs, so let us handle errors by
>  returning them up the stack now.
> >>> Is this patch intended to silence a Coverity warning or can this be
> >>> triggered by a guest?
> >> Neither.  There was a Coverity warning about the `assert(*bytes <=
> >> SIZE_MAX)`, which is always true on 32-bit architectures. Regardless of
> >> Coverity, Peter inquired how bdrv_check_qiov_request() would guarantee this
> >> condition (as the comments I’ve put above the assertions say).  It doesn’t,
> >> only bdrv_check_request32() does, which I was thinking of, and just 
> >> confused
> >> the two.
> > It's unclear to me whether this patch silences a Coverity warning or
> > not? You said "neither", but then you acknowledged there was a Coverity
> > warning. Maybe "was" (past-tense) means something else already fixed it
> > but I don't see any relevant commits in the git log.
>
> There was and is no fix for the Coverity warning.  I have mentioned that
> warning because the question as to why the code uses
> bdrv_check_qiov_request() came in the context of discussing it
> (https://lists.nongnu.org/archive/html/qemu-devel/2023-06/msg01809.html).
>
> I’m not planning on fixing the Coverity warning in the code. `assert(x
> <= SIZE_MAX)` to me is an absolutely reasonable piece of code, even if
> always true (on some platforms), in fact, I find it a good thing if
> asserted conditions are always true, not least because then the compiler
> can optimize them out.  I don’t think we should make it more complicated
> to make Coverity happier.
>
> >> As the commit message says, all callers already run bdrv_check_request32(),
> >> so I expect this change to functionally be a no-op.  (That is why the
> >> pre-patch code runs bdrv_check_qiov_request() with `&error_abort`.)
> > Okay, this means a guest cannot trigger the assertion failure.
> >
> > Please mention the intent in the commit description: a code cleanup
> > requested by Peter and/or a Coverity warning fix, but definitely not
> > guest triggerable assertion failure.
>
> Sure!
>
> >>> I find this commit description and patch confusing. Instead of checking
> >>> the actual SIZE_MAX value that bdrv_pad_request() relies on, we use a
> >>> 32-bit offsets/lengths helper because it checks INT_MAX or SIZE_MAX (but
> >>> really INT_MAX, because that's always smaller on host architectures that
> >>> QEMU supports).
> >> I preferred to use a bounds-checking function that we already use for
> >> requests, and that happens to be used to limit all I/O that ends up here in
> >> bdrv_pad_request() anyway, instead of adding a new specific limit.
> >>
> >> It doesn’t matter to me, though.  The callers already ensure that 
> >> everything
> >> is in bounds, so I’d be happy with anything, ranging from keeping the bare
> >> assertions with no checks beforehand, over specifically checking SIZE_MAX
> >> and returning an error then, to bdrv_check_request32().
> >>
> >> (I thought repeating the simple bounds check that all callers already did
> >> for verbosity would be the most robust and obvious way to do it, but now 
> >> I’m
> >> biting myself for not just using bare assertions annotated with “Caller 
> >> must
> >> guarantee this” from the start...)
> > Okay. I looked at the code more and don't see a cleanup for the overall
> > problem of duplicated checks and type mismatches (size_t vs int64_t)
> > that is appropriate for this patch.
> >
> > I'm okay with this fix, but please clarify the intent as mentioned above.
>
> I can’t quite fit these two paragraphs together.  It sounds like you
> would rather not duplicate the call to bdrv_check_request32() in
> bdrv_pad_request() and just defer to the callers on that one, and also
> address the Coverity warning in the code (instead of just ignoring it).
> So would you rather have me remove t

Re: [PATCH for-8.2 6/7] target/riscv: add 'max' CPU type

On Wed, Jul 12, 2023 at 04:01:48PM -0300, Daniel Henrique Barboza wrote:
> The 'max' CPU type is used by tooling to determine what's the most
> capable CPU a current QEMU version implements. Other archs such as ARM
> implements this type. Let's add it to RISC-V.
> 
> What we consider "most capable CPU" in this context are related to
> ratified, non-vendor extensions. This means that we want the 'max' CPU
> to enable all (possible) ratified extensions by default. The reasoning
> behind this design is (1) vendor extensions can conflict with each other
> and we won't play favorities deciding which one is default or not and
> (2) non-ratified extensions are always prone to changes, not being
> stable enough to be enabled by default.
> 
> All this said, we're still not able to enable all ratified extensions
> due to conflicts between them. Zfinx and all its dependencies aren't
> enabled because of a conflict with RVF. zce, zcmp and zcmt are also
> disabled due to RVD conflicts. When running with 64 bits we're also
> disabling zcf.
> 
> Signed-off-by: Daniel Henrique Barboza 

This seems like it will be super helpful for CI stuff etc, thanks for
doing it.


signature.asc
Description: PGP signature

Re: [PATCH] virtio-blk: fix host notifier issues during dataplane start/stop

On Tue, Jul 04, 2023 at 05:15:27PM +0200, Stefan Hajnoczi wrote:
> The main loop thread can consume 100% CPU when using --device
> virtio-blk-pci,iothread=. ppoll() constantly returns but
> reading virtqueue host notifiers fails with EAGAIN. The file descriptors
> are stale and remain registered with the AioContext because of bugs in
> the virtio-blk dataplane start/stop code.
> 
> The problem is that the dataplane start/stop code involves drain
> operations, which call virtio_blk_drained_begin() and
> virtio_blk_drained_end() at points where the host notifier is not
> operational:
> - In virtio_blk_data_plane_start(), blk_set_aio_context() drains after
>   vblk->dataplane_started has been set to true but the host notifier has
>   not been attached yet.
> - In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context()
>   drain after the host notifier has already been detached but with
>   vblk->dataplane_started still set to true.
> 
> I would like to simplify ->ioeventfd_start/stop() to avoid interactions
> with drain entirely, but couldn't find a way to do that. Instead, this
> patch accepts the fragile nature of the code and reorders it so that
> vblk->dataplane_started is false during drain operations. This way the
> virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't
> touch the host notifier. The result is that
> virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have
> complete control over the host notifier and stale file descriptors are
> no longer left in the AioContext.
> 
> This patch fixes the 100% CPU consumption in the main loop thread and
> correctly moves host notifier processing to the IOThread.
> 
> Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()")
> Reported-by: Lukáš Doktor 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  hw/block/dataplane/virtio-blk.c | 67 +++--
>  1 file changed, 38 insertions(+), 29 deletions(-)

Thanks, applied to my block tree:
https://gitlab.com/stefanha/qemu/commits/block

Stefan


signature.asc
Description: PGP signature

[PATCH] block/nvme: invoke blk_io_plug_call() outside q->lock

blk_io_plug_call() is invoked outside a blk_io_plug()/blk_io_unplug()
section while opening the NVMe drive from:

  nvme_file_open() ->
  nvme_init() ->
  nvme_identify() ->
  nvme_admin_cmd_sync() ->
  nvme_submit_command() ->
  blk_io_plug_call()

blk_io_plug_call() immediately invokes the given callback when the
current thread is not plugged, as is the case during nvme_file_open().

Unfortunately, nvme_submit_command() calls blk_io_plug_call() with
q->lock still held:

...
q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
q->need_kick++;
blk_io_plug_call(nvme_unplug_fn, q);
qemu_mutex_unlock(&q->lock);
^^^

nvme_unplug_fn() deadlocks trying to acquire q->lock because the lock is
already acquired by the same thread. The symptom is that QEMU hangs
during startup while opening the NVMe drive.

Fix this by moving the blk_io_plug_call() outside q->lock. This is safe
because no other thread runs code related to this queue and
blk_io_plug_call()'s internal state is immune to thread safety issues
since it is thread-local.

Reported-by: Lukáš Doktor 
Fixes: f2e590002bd6 ("block/nvme: convert to blk_io_plug_call() API")
Signed-off-by: Stefan Hajnoczi 
---
 block/nvme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/nvme.c b/block/nvme.c
index 7ca85bc44a..b6e95f0b7e 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -501,8 +501,9 @@ static void nvme_submit_command(NVMeQueuePair *q, 
NVMeRequest *req,
q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd));
 q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE;
 q->need_kick++;
+qemu_mutex_unlock(&q->lock);
+
 blk_io_plug_call(nvme_unplug_fn, q);
-qemu_mutex_unlock(&q->lock);
 }
 
 static void nvme_admin_cmd_sync_cb(void *opaque, int ret)
-- 
2.40.1

Re: [PATCH 1/2] i386: Add support for SUCCOR feature

2023-07-12 Thread John Allen

On Fri, Jul 07, 2023 at 04:25:22PM +0200, Paolo Bonzini wrote:
> On 7/6/23 21:40, John Allen wrote:
> >   case 0x8007:
> >   *eax = 0;
> > -*ebx = 0;
> > +*ebx = env->features[FEAT_8000_0007_EBX] | 
> > CPUID_8000_0007_EBX_SUCCOR;
> >   *ecx = 0;
> >   *edx = env->features[FEAT_8000_0007_EDX];
> >   break;
> 
> I agree that it needs no hypervisor support, but Babu is right that you
> cannot add it unconditionally (especially not on Intel processors).
> 
> You can special case CPUID_8000_0007_EBX_SUCCOR in
> kvm_arch_get_supported_cpuid() so that it is added even on old kernels.
> There are already several such cases.  Adding it to KVM is nice to have
> anyway, so please send a patch for that.

By adding it to KVM do you mean adding a patch to the kernel to expose
the cpuid bit? Or do you mean just adding the special case to
kvm_arch_get_supported_cpuid?

For the kvm_arch_get_supported_cpuid case, I don't understand how this
would be different from unconditionally exposing the bit as done above.
Can you help me understand what you have in mind for this?

I might add a case like below:
...
} else if (function == 0x8007 && reg == R_EBX) {
ret |= CPUID_8000_0007_EBX_SUCCOR;
...

If we wanted to only expose the bit for AMD cpus, we would then need to
call IS_AMD_CPU with the CPUX86State as a paramter which would mean that
kvm_arch_get_supported_cpuid and all of its callers would need to take
the CPUX86State as a parameter. Is there another way to differentiate
between AMD and Intel cpus in this case?

> 
> Also, the patch does not compile (probably you missed a prerequisite) as it
> lacks all the rigamarole that is needed to add FEAT_8000_0007_EBX.

I'm not encountering any compilation issues. What are the errors that
you are seeing?

Thanks,
John

Re: [PATCH v1 6/9] gfxstream + rutabaga: add initial support for gfxstream

2023-07-12 Thread Marc-André Lureau

Hi

On Tue, Jul 11, 2023 at 6:57 AM Gurchetan Singh 
wrote:

> This adds initial support for gfxstream and cross-domain.  Both
> features rely on virtio-gpu blob resources and context types, which
> are also implemented in this patch.
>
> gfxstream has a long and illustrious history in Android graphics
> paravirtualization.  It has been powering graphics in the Android
> Studio Emulator for more than a decade, which is the main developer
> platform.
>
> Originally conceived by Jesse Hall, it was first known as "EmuGL" [a].
> The key design characteristic was a 1:1 threading model and
> auto-generation, which fit nicely with the OpenGLES spec.  It also
> allowed easy layering with ANGLE on the host, which provides the GLES
> implementations on Windows or MacOS enviroments.
>
> gfxstream has traditionally been maintained by a single engineer, and
> between 2015 to 2021, the goldfish throne passed to Frank Yang.
> Historians often remark this glorious reign ("pax gfxstreama" is the
> academic term) was comparable to that of Augustus and the both Queen
> Elizabeths.  Just to name a few accomplishments in a resplendent
> panoply: higher versions of GLES, address space graphics, snapshot
> support and CTS compliant Vulkan [b].
>
> One major drawback was the use of out-of-tree goldfish drivers.
> Android engineers didn't know much about DRM/KMS and especially TTM so
> a simple guest to host pipe was conceived.
>
> Luckily, virtio-gpu 3D started to emerge in 2016 due to the work of
> the Mesa/virglrenderer communities.  In 2018, the initial virtio-gpu
> port of gfxstream was done by Cuttlefish enthusiast Alistair Delva.
> It was a symbol compatible replacement of virglrenderer [c] and named
> "AVDVirglrenderer".  This implementation forms the basis of the
> current gfxstream host implementation still in use today.
>
> cross-domain support follows a similar arc.  Originally conceived by
> Wayland aficionado David Reveman and crosvm enjoyer Zach Reizner in
> 2018, it initially relied on the downstream "virtio-wl" device.
>
> In 2020 and 2021, virtio-gpu was extended to include blob resources
> and multiple timelines by yours truly, features gfxstream/cross-domain
> both require to function correctly.
>
> Right now, we stand at the precipice of a truly fantastic possibility:
> the Android Emulator powered by upstream QEMU and upstream Linux
> kernel.  gfxstream will then be packaged properfully, and app
> developers can even fix gfxstream bugs on their own if they encounter
> them.
>
> It's been quite the ride, my friends.  Where will gfxstream head next,
> nobody really knows.  I wouldn't be surprised if it's around for
> another decade, maintained by a new generation of Android graphics
> enthusiasts.
>
> Technical details:
>   - Very simple initial display integration: just used Pixman
>   - Largely, 1:1 mapping of virtio-gpu hypercalls to rutabaga function
> calls
>
>
Wow, this is not for the faint reader.. there is a lot to grasp in this gfx
space...

Could you perhaps extend on what this current code can do for an average
Linux VM? or for some Android VM (which one?!), and then what are the next
steps and status?

My limited understanding (from this series and from
https://gitlab.com/qemu-project/qemu/-/issues/1611) is that it allows
passing-through some vulkan APIs for off-screen usage. Is that accurate?

How far are we from getting upstream QEMU to be used by Android Emulator?
(in the gfx domain at least) What would it take to get the average Linux VM
to use virtio-vga-rutabaga instead of virtio-vga-gl to get accelerated
rendering?

[a] https://android-review.googlesource.com/c/platform/development/+/34470
> [b]
> https://android-review.googlesource.com/q/topic:%22vulkan-hostconnection-start%22
> [c]
> https://android-review.googlesource.com/c/device/generic/goldfish-opengl/+/761927
>
> Signed-off-by: Gurchetan Singh 
> ---
> v2: Incorported various suggestions by Akihiko Odaki and Bernard Berschow
> - Removed GET_VIRTIO_GPU_GL / GET_RUTABAGA macros
> - Used error_report(..)
> - Used g_autofree to fix leaks on error paths
> - Removed unnecessary casts
> - added virtio-gpu-pci-rutabaga.c + virtio-vga-rutabaga.c files
>
>  hw/display/virtio-gpu-pci-rutabaga.c |   48 ++
>  hw/display/virtio-gpu-rutabaga.c | 1088 ++
>  hw/display/virtio-vga-rutabaga.c |   52 ++
>  3 files changed, 1188 insertions(+)
>  create mode 100644 hw/display/virtio-gpu-pci-rutabaga.c
>  create mode 100644 hw/display/virtio-gpu-rutabaga.c
>  create mode 100644 hw/display/virtio-vga-rutabaga.c
>
> diff --git a/hw/display/virtio-gpu-pci-rutabaga.c
> b/hw/display/virtio-gpu-pci-rutabaga.c
> new file mode 100644
> index 00..5765bef266
> --- /dev/null
> +++ b/hw/display/virtio-gpu-pci-rutabaga.c
> @@ -0,0 +1,48 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "qemu/osdep.h"
> +#include "qapi/error.h"
> +#include "qemu/module.h"
> +#include "hw/pci/pci.h"
> +#include "hw/qde

[PATCH v5 1/6] tests/qtest: migration: Expose migrate_set_capability

The following patch will make use of this function from within
migrate-helpers.c, so move it there.

Reviewed-by: Juan Quintela 
Reviewed-by: Thomas Huth 
Reviewed-by: Peter Xu 
Signed-off-by: Fabiano Rosas 
---
 tests/qtest/migration-helpers.c | 11 +++
 tests/qtest/migration-helpers.h |  3 +++
 tests/qtest/migration-test.c| 11 ---
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index be00c52d00..2df198c99e 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -70,6 +70,17 @@ void migrate_qmp(QTestState *who, const char *uri, const 
char *fmt, ...)
  "{ 'execute': 'migrate', 'arguments': %p}", args);
 }
 
+void migrate_set_capability(QTestState *who, const char *capability,
+bool value)
+{
+qtest_qmp_assert_success(who,
+ "{ 'execute': 'migrate-set-capabilities',"
+ "'arguments': { "
+ "'capabilities': [ { "
+ "'capability': %s, 'state': %i } ] } }",
+ capability, value);
+}
+
 /*
  * Note: caller is responsible to free the returned object via
  * qobject_unref() after use
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 009e250e90..484d7c960f 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -23,6 +23,9 @@ bool migrate_watch_for_resume(QTestState *who, const char 
*name,
 G_GNUC_PRINTF(3, 4)
 void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...);
 
+void migrate_set_capability(QTestState *who, const char *capability,
+bool value);
+
 QDict *migrate_query(QTestState *who);
 QDict *migrate_query_not_failed(QTestState *who);
 
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index c61a3162b4..9f147ac542 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -571,17 +571,6 @@ static void migrate_cancel(QTestState *who)
 qtest_qmp_assert_success(who, "{ 'execute': 'migrate_cancel' }");
 }
 
-static void migrate_set_capability(QTestState *who, const char *capability,
-   bool value)
-{
-qtest_qmp_assert_success(who,
- "{ 'execute': 'migrate-set-capabilities',"
- "'arguments': { "
- "'capabilities': [ { "
- "'capability': %s, 'state': %i } ] } }",
- capability, value);
-}
-
 static void migrate_postcopy_start(QTestState *from, QTestState *to)
 {
 qtest_qmp_assert_success(from, "{ 'execute': 'migrate-start-postcopy' }");
-- 
2.35.3

[PATCH v5 0/6] migration: Test the new "file:" migration

Based-on:
[PATCH V4 0/2] migration file URI
https://lore.kernel.org/r/1688135108-316997-1-git-send-email-steven.sist...@oracle.com

Since v4:

- Implemented a separate version of test_precopy_common to be used
  with the file transport (patch 6).

v4:
https://lore.kernel.org/r/20230706201927.15442-1-faro...@suse.de

v3:
https://lore.kernel.org/r/20230630212902.19925-1-faro...@suse.de

v2:
https://lore.kernel.org/r/20230628165542.17214-1-faro...@suse.de

v1:
https://lore.kernel.org/r/20230626182210.8792-1-faro...@suse.de

Fabiano Rosas (6):
  tests/qtest: migration: Expose migrate_set_capability
  tests/qtest: migration: Add migrate_incoming_qmp helper
  tests/qtest: migration: Use migrate_incoming_qmp where appropriate
  migration: Set migration status early in incoming side
  tests/qtest: migration: Add support for negative testing of
qmp_migrate
  tests/qtest: migration-test: Add tests for file-based migration

 migration/migration.c |   7 +-
 tests/qtest/libqtest.c|  33 ++
 tests/qtest/libqtest.h|  28 +
 tests/qtest/meson.build   |   1 +
 tests/qtest/migration-helpers.c   |  60 ++
 tests/qtest/migration-helpers.h   |  10 ++
 tests/qtest/migration-test.c  | 185 ++
 tests/qtest/virtio-net-failover.c |  77 ++---
 8 files changed, 308 insertions(+), 93 deletions(-)

-- 
2.35.3

[PATCH v5 6/6] tests/qtest: migration-test: Add tests for file-based migration

Add basic tests for file-based migration.

Note that we cannot use test_precopy_common because that routine
expects it to be possible to run the migration live. With the file
transport there is no live migration because we must wait for the
source to finish writing the migration data to the file before the
destination can start reading. Add a new migration function
specifically to handle the file migration.

Signed-off-by: Fabiano Rosas 
---
 tests/qtest/migration-test.c | 146 +++
 1 file changed, 146 insertions(+)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 01a2a2ceb7..f4658814b1 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -66,6 +66,10 @@ static bool got_dst_resume;
  */
 #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
 
+#define QEMU_VM_FILE_MAGIC 0x5145564d
+#define FILE_TEST_FILENAME "migfile"
+#define FILE_TEST_OFFSET 0x1000
+
 #if defined(__linux__)
 #include 
 #include 
@@ -864,6 +868,7 @@ static void test_migrate_end(QTestState *from, QTestState 
*to, bool test_dest)
 cleanup("migsocket");
 cleanup("src_serial");
 cleanup("dest_serial");
+cleanup(FILE_TEST_FILENAME);
 }
 
 #ifdef CONFIG_GNUTLS
@@ -1590,6 +1595,70 @@ finish:
 test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED);
 }
 
+static void test_file_common(MigrateCommon *args, bool stop_src)
+{
+QTestState *from, *to;
+void *data_hook = NULL;
+g_autofree char *connect_uri = g_strdup(args->connect_uri);
+
+if (test_migrate_start(&from, &to, args->listen_uri, &args->start)) {
+return;
+}
+
+/*
+ * File migration is never live. We can keep the source VM running
+ * during migration, but the destination will not be running
+ * concurrently.
+ */
+g_assert_false(args->live);
+
+if (args->start_hook) {
+data_hook = args->start_hook(from, to);
+}
+
+migrate_ensure_converge(from);
+wait_for_serial("src_serial");
+
+if (stop_src) {
+qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}");
+if (!got_src_stop) {
+qtest_qmp_eventwait(from, "STOP");
+}
+}
+
+if (args->result == MIG_TEST_QMP_ERROR) {
+migrate_qmp_fail(from, connect_uri, "{}");
+goto finish;
+}
+
+migrate_qmp(from, connect_uri, "{}");
+wait_for_migration_complete(from);
+
+/*
+ * We need to wait for the source to finish before starting the
+ * destination.
+ */
+migrate_incoming_qmp(to, connect_uri, "{}");
+wait_for_migration_complete(to);
+
+if (stop_src) {
+qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}");
+}
+
+if (!got_dst_resume) {
+qtest_qmp_eventwait(to, "RESUME");
+}
+
+wait_for_serial("dest_serial");
+
+finish:
+if (args->finish_hook) {
+args->finish_hook(from, to, data_hook);
+}
+
+test_migrate_end(from, to, args->result == MIG_TEST_SUCCEED);
+}
+
 static void test_precopy_unix_plain(void)
 {
 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
@@ -1785,6 +1854,75 @@ static void test_precopy_unix_compress_nowait(void)
 test_precopy_common(&args);
 }
 
+static void test_precopy_file(void)
+{
+g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
+   FILE_TEST_FILENAME);
+MigrateCommon args = {
+.connect_uri = uri,
+.listen_uri = "defer",
+};
+
+test_file_common(&args, true);
+}
+
+static void file_offset_finish_hook(QTestState *from, QTestState *to, void 
*opaque)
+{
+#if defined(__linux__)
+g_autofree char *path = g_strdup_printf("%s/%s", tmpfs, 
FILE_TEST_FILENAME);
+size_t size = FILE_TEST_OFFSET + sizeof(QEMU_VM_FILE_MAGIC);
+uintptr_t *addr, *p;
+int fd;
+
+fd = open(path, O_RDONLY);
+g_assert(fd != -1);
+addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
+g_assert(addr != MAP_FAILED);
+
+/*
+ * Ensure the skipped offset contains zeros and the migration
+ * stream starts at the right place.
+ */
+p = addr;
+while (p < addr + FILE_TEST_OFFSET / sizeof(uintptr_t)) {
+g_assert(*p == 0);
+p++;
+}
+g_assert_cmpint(cpu_to_be32(*p), ==, QEMU_VM_FILE_MAGIC);
+
+munmap(addr, size);
+close(fd);
+#endif
+}
+
+static void test_precopy_file_offset(void)
+{
+g_autofree char *uri = g_strdup_printf("file:%s/%s,offset=%d", tmpfs,
+   FILE_TEST_FILENAME,
+   FILE_TEST_OFFSET);
+MigrateCommon args = {
+.connect_uri = uri,
+.listen_uri = "defer",
+.finish_hook = file_offset_finish_hook,
+};
+
+test_file_common(&args, false);
+}
+
+static void test_precopy_file_offset_bad(void)
+{
+/* using a value not supported by qemu_strtosz() */
+g_autofree char *uri = g_strdup_printf("file:%s/%s,offset=0x20M",
+

[PATCH v5 4/6] migration: Set migration status early in incoming side

We are sending a migration event of MIGRATION_STATUS_SETUP at
qemu_start_incoming_migration but never actually setting the state.

This creates a window between qmp_migrate_incoming and
process_incoming_migration_co where the migration status is still
MIGRATION_STATUS_NONE. Calling query-migrate during this time will
return an empty response even though the incoming migration command
has already been issued.

Commit 7cf1fe6d68 ("migration: Add migration events on target side")
has added support to the 'events' capability to the incoming part of
migration, but chose to send the SETUP event without setting the
state. I'm assuming this was a mistake.

This introduces a change in behavior, any QMP client waiting for the
SETUP event will hang, unless it has previously enabled the 'events'
capability. Having the capability enabled is sufficient to continue to
receive the event.

Reviewed-by: Peter Xu 
Signed-off-by: Fabiano Rosas 
---
 migration/migration.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index ea020c8335..5c8e82f82b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -425,13 +425,16 @@ void migrate_add_address(SocketAddress *address)
 static void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
 const char *p = NULL;
+MigrationIncomingState *mis = migration_incoming_get_current();
 
 /* URI is not suitable for migration? */
 if (!migration_channels_and_uri_compatible(uri, errp)) {
 return;
 }
 
-qapi_event_send_migration(MIGRATION_STATUS_SETUP);
+migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
+  MIGRATION_STATUS_SETUP);
+
 if (strstart(uri, "tcp:", &p) ||
 strstart(uri, "unix:", NULL) ||
 strstart(uri, "vsock:", NULL)) {
@@ -525,7 +528,7 @@ process_incoming_migration_co(void *opaque)
 
 mis->largest_page_size = qemu_ram_pagesize_largest();
 postcopy_state_set(POSTCOPY_INCOMING_NONE);
-migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
+migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP,
   MIGRATION_STATUS_ACTIVE);
 
 mis->loadvm_co = qemu_coroutine_self();
-- 
2.35.3

[PATCH v5 2/6] tests/qtest: migration: Add migrate_incoming_qmp helper

file-based migration requires the target to initiate its migration after
the source has finished writing out the data in the file. Currently
there's no easy way to initiate 'migrate-incoming', allow this by
introducing migrate_incoming_qmp helper, similarly to migrate_qmp.

Also make sure migration events are enabled and wait for the incoming
migration to start before returning. This avoid a race when querying
the migration status too soon after issuing the command.

Reviewed-by: Peter Xu 
Signed-off-by: Fabiano Rosas 
---
 tests/qtest/migration-helpers.c | 29 +
 tests/qtest/migration-helpers.h |  4 
 2 files changed, 33 insertions(+)

diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index 2df198c99e..08f5ee1179 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -81,6 +81,35 @@ void migrate_set_capability(QTestState *who, const char 
*capability,
  capability, value);
 }
 
+void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, 
...)
+{
+va_list ap;
+QDict *args, *rsp, *data;
+
+va_start(ap, fmt);
+args = qdict_from_vjsonf_nofail(fmt, ap);
+va_end(ap);
+
+g_assert(!qdict_haskey(args, "uri"));
+qdict_put_str(args, "uri", uri);
+
+migrate_set_capability(to, "events", true);
+
+rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}",
+args);
+g_assert(qdict_haskey(rsp, "return"));
+qobject_unref(rsp);
+
+rsp = qtest_qmp_eventwait_ref(to, "MIGRATION");
+g_assert(qdict_haskey(rsp, "data"));
+
+data = qdict_get_qdict(rsp, "data");
+g_assert(qdict_haskey(data, "status"));
+g_assert_cmpstr(qdict_get_str(data, "status"), ==, "setup");
+
+qobject_unref(rsp);
+}
+
 /*
  * Note: caller is responsible to free the returned object via
  * qobject_unref() after use
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 484d7c960f..57d295a4fe 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -23,6 +23,10 @@ bool migrate_watch_for_resume(QTestState *who, const char 
*name,
 G_GNUC_PRINTF(3, 4)
 void migrate_qmp(QTestState *who, const char *uri, const char *fmt, ...);
 
+G_GNUC_PRINTF(3, 4)
+void migrate_incoming_qmp(QTestState *who, const char *uri,
+  const char *fmt, ...);
+
 void migrate_set_capability(QTestState *who, const char *capability,
 bool value);
 
-- 
2.35.3

[PATCH v5 5/6] tests/qtest: migration: Add support for negative testing of qmp_migrate

There is currently no way to write a test for errors that happened in
qmp_migrate before the migration has started.

Add a version of qmp_migrate that ensures an error happens. To make
use of it a test needs to set MigrateCommon.result as
MIG_TEST_QMP_ERROR.

Reviewed-by: Peter Xu 
Signed-off-by: Fabiano Rosas 
---
 tests/qtest/libqtest.c  | 33 +
 tests/qtest/libqtest.h  | 28 
 tests/qtest/migration-helpers.c | 20 
 tests/qtest/migration-helpers.h |  3 +++
 tests/qtest/migration-test.c| 16 
 5 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
index c22dfc30d3..e8512c1fde 100644
--- a/tests/qtest/libqtest.c
+++ b/tests/qtest/libqtest.c
@@ -1248,6 +1248,28 @@ void qtest_memset(QTestState *s, uint64_t addr, uint8_t 
pattern, size_t size)
 qtest_rsp(s);
 }
 
+QDict *qtest_vqmp_assert_failure_ref(QTestState *qts,
+ const char *fmt, va_list args)
+{
+QDict *response;
+QDict *ret;
+
+response = qtest_vqmp(qts, fmt, args);
+
+g_assert(response);
+if (!qdict_haskey(response, "error")) {
+g_autoptr(GString) s = qobject_to_json_pretty(QOBJECT(response), true);
+g_test_message("%s", s->str);
+}
+g_assert(qdict_haskey(response, "error"));
+g_assert(!qdict_haskey(response, "return"));
+ret = qdict_get_qdict(response, "error");
+qobject_ref(ret);
+qobject_unref(response);
+
+return ret;
+}
+
 QDict *qtest_vqmp_assert_success_ref(QTestState *qts,
  const char *fmt, va_list args)
 {
@@ -1310,6 +1332,17 @@ void qtest_vqmp_fds_assert_success(QTestState *qts, int 
*fds, size_t nfds,
 }
 #endif /* !_WIN32 */
 
+QDict *qtest_qmp_assert_failure_ref(QTestState *qts, const char *fmt, ...)
+{
+QDict *response;
+va_list ap;
+
+va_start(ap, fmt);
+response = qtest_vqmp_assert_failure_ref(qts, fmt, ap);
+va_end(ap);
+return response;
+}
+
 QDict *qtest_qmp_assert_success_ref(QTestState *qts, const char *fmt, ...)
 {
 QDict *response;
diff --git a/tests/qtest/libqtest.h b/tests/qtest/libqtest.h
index 3a71bc45fc..a781104861 100644
--- a/tests/qtest/libqtest.h
+++ b/tests/qtest/libqtest.h
@@ -799,6 +799,34 @@ void qtest_vqmp_fds_assert_success(QTestState *qts, int 
*fds, size_t nfds,
 G_GNUC_PRINTF(4, 0);
 #endif /* !_WIN32 */
 
+/**
+ * qtest_qmp_assert_failure_ref:
+ * @qts: QTestState instance to operate on
+ * @fmt: QMP message to send to qemu, formatted like
+ * qobject_from_jsonf_nofail().  See parse_interpolation() for what's
+ * supported after '%'.
+ *
+ * Sends a QMP message to QEMU, asserts that an 'error' key is present in
+ * the response, and returns the response.
+ */
+QDict *qtest_qmp_assert_failure_ref(QTestState *qts, const char *fmt, ...)
+G_GNUC_PRINTF(2, 3);
+
+/**
+ * qtest_vqmp_assert_failure_ref:
+ * @qts: QTestState instance to operate on
+ * @fmt: QMP message to send to qemu, formatted like
+ * qobject_from_jsonf_nofail().  See parse_interpolation() for what's
+ * supported after '%'.
+ * @args: variable arguments for @fmt
+ *
+ * Sends a QMP message to QEMU, asserts that an 'error' key is present in
+ * the response, and returns the response.
+ */
+QDict *qtest_vqmp_assert_failure_ref(QTestState *qts,
+ const char *fmt, va_list args)
+G_GNUC_PRINTF(2, 0);
+
 /**
  * qtest_qmp_assert_success_ref:
  * @qts: QTestState instance to operate on
diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c
index 08f5ee1179..0c185db450 100644
--- a/tests/qtest/migration-helpers.c
+++ b/tests/qtest/migration-helpers.c
@@ -49,6 +49,26 @@ bool migrate_watch_for_resume(QTestState *who, const char 
*name,
 return false;
 }
 
+void migrate_qmp_fail(QTestState *who, const char *uri, const char *fmt, ...)
+{
+va_list ap;
+QDict *args, *err;
+
+va_start(ap, fmt);
+args = qdict_from_vjsonf_nofail(fmt, ap);
+va_end(ap);
+
+g_assert(!qdict_haskey(args, "uri"));
+qdict_put_str(args, "uri", uri);
+
+err = qtest_qmp_assert_failure_ref(
+who, "{ 'execute': 'migrate', 'arguments': %p}", args);
+
+g_assert(qdict_haskey(err, "desc"));
+
+qobject_unref(err);
+}
+
 /*
  * Send QMP command "migrate".
  * Arguments are built from @fmt... (formatted like
diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h
index 57d295a4fe..4f51d0f8bc 100644
--- a/tests/qtest/migration-helpers.h
+++ b/tests/qtest/migration-helpers.h
@@ -27,6 +27,9 @@ G_GNUC_PRINTF(3, 4)
 void migrate_incoming_qmp(QTestState *who, const char *uri,
   const char *fmt, ...);
 
+G_GNUC_PRINTF(3, 4)
+void migrate_qmp_fail(QTestState *who, const char *uri, const char *fmt, ...);
+
 void migrate_set_capability(QTestState *who, const char *capability,
 bo

Re: [PATCH 01/10] hw/arm/virt-acpi-build.c: Move fw_cfg and virtio to common location





On 7/12/23 13:39, Sunil V L wrote:

The functions which add fw_cfg and virtio to DSDT are same for ARM
and RISC-V. So, instead of duplicating in RISC-V, move them from
hw/arm/virt-acpi-build.c to common aml-build.c.


Nice.



Signed-off-by: Sunil V L 
---


Reviewed-by: Daniel Henrique Barboza 


  hw/acpi/aml-build.c | 41 
  hw/arm/virt-acpi-build.c| 42 -
  hw/riscv/virt-acpi-build.c  | 16 --
  include/hw/acpi/aml-build.h |  6 ++
  4 files changed, 47 insertions(+), 58 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index ea331a20d1..eeb1263c8c 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2467,3 +2467,44 @@ Aml *aml_i2c_serial_bus_device(uint16_t address, const 
char *resource_source)
  
  return var;

  }
+
+void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap)
+{
+Aml *dev = aml_device("FWCF");
+aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
+/* device present, functioning, decoding, not shown in UI */
+aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
+aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+
+Aml *crs = aml_resource_template();
+aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base,
+   fw_cfg_memmap->size, AML_READ_WRITE));
+aml_append(dev, aml_name_decl("_CRS", crs));
+aml_append(scope, dev);
+}
+
+void acpi_dsdt_add_virtio(Aml *scope,
+  const MemMapEntry *virtio_mmio_memmap,
+  uint32_t mmio_irq, int num)
+{
+hwaddr base = virtio_mmio_memmap->base;
+hwaddr size = virtio_mmio_memmap->size;
+int i;
+
+for (i = 0; i < num; i++) {
+uint32_t irq = mmio_irq + i;
+Aml *dev = aml_device("VR%02u", i);
+aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005")));
+aml_append(dev, aml_name_decl("_UID", aml_int(i)));
+aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+
+Aml *crs = aml_resource_template();
+aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));
+aml_append(crs,
+   aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+ AML_EXCLUSIVE, &irq, 1));
+aml_append(dev, aml_name_decl("_CRS", crs));
+aml_append(scope, dev);
+base += size;
+}
+}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6b674231c2..fdedb68e2b 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -35,7 +35,6 @@
  #include "target/arm/cpu.h"
  #include "hw/acpi/acpi-defs.h"
  #include "hw/acpi/acpi.h"
-#include "hw/nvram/fw_cfg.h"
  #include "hw/acpi/bios-linker-loader.h"
  #include "hw/acpi/aml-build.h"
  #include "hw/acpi/utils.h"
@@ -94,21 +93,6 @@ static void acpi_dsdt_add_uart(Aml *scope, const MemMapEntry 
*uart_memmap,
  aml_append(scope, dev);
  }
  
-static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap)

-{
-Aml *dev = aml_device("FWCF");
-aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
-/* device present, functioning, decoding, not shown in UI */
-aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
-aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
-
-Aml *crs = aml_resource_template();
-aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base,
-   fw_cfg_memmap->size, AML_READ_WRITE));
-aml_append(dev, aml_name_decl("_CRS", crs));
-aml_append(scope, dev);
-}
-
  static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap)
  {
  Aml *dev, *crs;
@@ -133,32 +117,6 @@ static void acpi_dsdt_add_flash(Aml *scope, const 
MemMapEntry *flash_memmap)
  aml_append(scope, dev);
  }
  
-static void acpi_dsdt_add_virtio(Aml *scope,

- const MemMapEntry *virtio_mmio_memmap,
- uint32_t mmio_irq, int num)
-{
-hwaddr base = virtio_mmio_memmap->base;
-hwaddr size = virtio_mmio_memmap->size;
-int i;
-
-for (i = 0; i < num; i++) {
-uint32_t irq = mmio_irq + i;
-Aml *dev = aml_device("VR%02u", i);
-aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005")));
-aml_append(dev, aml_name_decl("_UID", aml_int(i)));
-aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
-
-Aml *crs = aml_resource_template();
-aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));
-aml_append(crs,
-   aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
- AML_EXCLUSIVE, &irq, 1));
-aml_append(dev, aml_name_decl("_CRS", crs));
-aml_append(scope, dev);
-base += size;
-}
-}
-
  static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,

[PATCH v5 3/6] tests/qtest: migration: Use migrate_incoming_qmp where appropriate

Use the new migrate_incoming_qmp helper in the places that currently
open-code calling migrate-incoming.

Reviewed-by: Juan Quintela 
Reviewed-by: Peter Xu 
Signed-off-by: Fabiano Rosas 
---
 tests/qtest/meson.build   |  1 +
 tests/qtest/migration-test.c  | 12 ++---
 tests/qtest/virtio-net-failover.c | 77 ---
 3 files changed, 14 insertions(+), 76 deletions(-)

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index b071d400b3..cab7ae81cd 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -316,6 +316,7 @@ qtests = {
   'tpm-tis-i2c-test': [io, tpmemu_files, 'qtest_aspeed.c'],
   'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'],
   'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'],
+  'virtio-net-failover': files('migration-helpers.c'),
   'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'),
   'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'),
 }
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 9f147ac542..0c60391f51 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1950,8 +1950,7 @@ static void *test_migrate_fd_start_hook(QTestState *from,
 close(pair[0]);
 
 /* Start incoming migration from the 1st socket */
-qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
- "  'arguments': { 'uri': 'fd:fd-mig' }}");
+migrate_incoming_qmp(to, "fd:fd-mig", "{}");
 
 /* Send the 2nd socket to the target */
 qtest_qmp_fds_assert_success(from, &pair[1], 1,
@@ -2173,8 +2172,7 @@ test_migrate_precopy_tcp_multifd_start_common(QTestState 
*from,
 migrate_set_capability(to, "multifd", true);
 
 /* Start incoming migration from the 1st socket */
-qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
- "  'arguments': { 'uri': 'tcp:127.0.0.1:0' }}");
+migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}");
 
 return NULL;
 }
@@ -2427,8 +2425,7 @@ static void test_multifd_tcp_cancel(void)
 migrate_set_capability(to, "multifd", true);
 
 /* Start incoming migration from the 1st socket */
-qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
- "  'arguments': { 'uri': 'tcp:127.0.0.1:0' }}");
+migrate_incoming_qmp(to, "tcp:127.0.0.1:0", "{}");
 
 /* Wait for the first serial output from the source */
 wait_for_serial("src_serial");
@@ -2458,8 +2455,7 @@ static void test_multifd_tcp_cancel(void)
 migrate_set_capability(to2, "multifd", true);
 
 /* Start incoming migration from the 1st socket */
-qtest_qmp_assert_success(to2, "{ 'execute': 'migrate-incoming',"
- "  'arguments': { 'uri': 'tcp:127.0.0.1:0' }}");
+migrate_incoming_qmp(to2, "tcp:127.0.0.1:0", "{}");
 
 g_free(uri);
 uri = migrate_get_socket_address(to2, "socket-address");
diff --git a/tests/qtest/virtio-net-failover.c 
b/tests/qtest/virtio-net-failover.c
index 4a809590bf..0d40bc1f2d 100644
--- a/tests/qtest/virtio-net-failover.c
+++ b/tests/qtest/virtio-net-failover.c
@@ -11,6 +11,7 @@
 #include "libqtest.h"
 #include "libqos/pci.h"
 #include "libqos/pci-pc.h"
+#include "migration-helpers.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qlist.h"
 #include "qapi/qmp/qjson.h"
@@ -736,26 +737,10 @@ static void test_migrate_out(gconstpointer opaque)
 machine_stop(qts);
 }
 
-static QDict *get_migration_event(QTestState *qts)
-{
-QDict *resp;
-QDict *data;
-
-resp = qtest_qmp_eventwait_ref(qts, "MIGRATION");
-g_assert(qdict_haskey(resp, "data"));
-
-data = qdict_get_qdict(resp, "data");
-g_assert(qdict_haskey(data, "status"));
-qobject_ref(data);
-qobject_unref(resp);
-
-return data;
-}
-
 static void test_migrate_in(gconstpointer opaque)
 {
 QTestState *qts;
-QDict *resp, *args, *ret;
+QDict *resp, *ret;
 g_autofree gchar *uri = g_strdup_printf("exec: cat %s", (gchar *)opaque);
 
 qts = machine_start(BASE_MACHINE
@@ -787,18 +772,7 @@ static void test_migrate_in(gconstpointer opaque)
 check_one_card(qts, true, "standby0", MAC_STANDBY0);
 check_one_card(qts, false, "primary0", MAC_PRIMARY0);
 
-args = qdict_from_jsonf_nofail("{}");
-g_assert_nonnull(args);
-qdict_put_str(args, "uri", uri);
-
-resp = qtest_qmp(qts, "{ 'execute': 'migrate-incoming', 'arguments': %p}",
- args);
-g_assert(qdict_haskey(resp, "return"));
-qobject_unref(resp);
-
-resp = get_migration_event(qts);
-g_assert_cmpstr(qdict_get_str(resp, "status"), ==, "setup");
-qobject_unref(resp);
+migrate_incoming_qmp(qts, uri, "{}");
 
 resp = get_failover_negociated_event(qts);
 g_assert_cmpstr(qdict_get_str(resp, "device-id"), ==, "standby0");
@@ -888,7 +862,7 @@ static void test_off_migrate_out(gconstpointer opaque)
 static void test_off_migrate_in(gconstpointer opaque)

[PATCH for-8.2 6/7] target/riscv: add 'max' CPU type

The 'max' CPU type is used by tooling to determine what's the most
capable CPU a current QEMU version implements. Other archs such as ARM
implements this type. Let's add it to RISC-V.

What we consider "most capable CPU" in this context are related to
ratified, non-vendor extensions. This means that we want the 'max' CPU
to enable all (possible) ratified extensions by default. The reasoning
behind this design is (1) vendor extensions can conflict with each other
and we won't play favorities deciding which one is default or not and
(2) non-ratified extensions are always prone to changes, not being
stable enough to be enabled by default.

All this said, we're still not able to enable all ratified extensions
due to conflicts between them. Zfinx and all its dependencies aren't
enabled because of a conflict with RVF. zce, zcmp and zcmt are also
disabled due to RVD conflicts. When running with 64 bits we're also
disabling zcf.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu-qom.h |  1 +
 target/riscv/cpu.c | 50 ++
 2 files changed, 51 insertions(+)

diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index 04af50983e..f3fbe37a2c 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -30,6 +30,7 @@
 #define CPU_RESOLVING_TYPE TYPE_RISCV_CPU
 
 #define TYPE_RISCV_CPU_ANY  RISCV_CPU_TYPE_NAME("any")
+#define TYPE_RISCV_CPU_MAX  RISCV_CPU_TYPE_NAME("max")
 #define TYPE_RISCV_CPU_BASE32   RISCV_CPU_TYPE_NAME("rv32")
 #define TYPE_RISCV_CPU_BASE64   RISCV_CPU_TYPE_NAME("rv64")
 #define TYPE_RISCV_CPU_BASE128  RISCV_CPU_TYPE_NAME("x-rv128")
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index b61465c8c4..125cf096c4 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -248,6 +248,7 @@ static const char * const riscv_intr_names[] = {
 };
 
 static void riscv_cpu_add_user_properties(Object *obj);
+static void riscv_init_max_cpu_extensions(Object *obj);
 
 const char *riscv_cpu_get_trap_name(target_ulong cause, bool async)
 {
@@ -374,6 +375,25 @@ static void riscv_any_cpu_init(Object *obj)
 cpu->cfg.pmp = true;
 }
 
+static void riscv_max_cpu_init(Object *obj)
+{
+RISCVCPU *cpu = RISCV_CPU(obj);
+CPURISCVState *env = &cpu->env;
+RISCVMXL mlx = MXL_RV64;
+
+#ifdef TARGET_RISCV32
+mlx = MXL_RV32;
+#endif
+set_misa(env, mlx, 0);
+riscv_cpu_add_user_properties(obj);
+riscv_init_max_cpu_extensions(obj);
+env->priv_ver = PRIV_VERSION_LATEST;
+#ifndef CONFIG_USER_ONLY
+set_satp_mode_max_supported(RISCV_CPU(obj), mlx == MXL_RV32 ?
+VM_1_10_SV32 : VM_1_10_SV57);
+#endif
+}
+
 #if defined(TARGET_RISCV64)
 static void rv64_base_cpu_init(Object *obj)
 {
@@ -1934,6 +1954,35 @@ static void riscv_cpu_add_user_properties(Object *obj)
 ADD_CPU_PROPERTIES_ARRAY(dev, riscv_cpu_experimental_exts);
 }
 
+/*
+ * The 'max' type CPU will have all possible ratified
+ * non-vendor extensions enabled.
+ */
+static void riscv_init_max_cpu_extensions(Object *obj)
+{
+RISCVCPU *cpu = RISCV_CPU(obj);
+CPURISCVState *env = &cpu->env;
+Property *prop;
+
+for (prop = riscv_cpu_extensions; prop && prop->name; prop++) {
+object_property_set_bool(obj, prop->name, true, NULL);
+}
+
+/* Zfinx is not compatible with F. Disable it */
+object_property_set_bool(obj, "zfinx", false, NULL);
+object_property_set_bool(obj, "zdinx", false, NULL);
+object_property_set_bool(obj, "zhinx", false, NULL);
+object_property_set_bool(obj, "zhinxmin", false, NULL);
+
+object_property_set_bool(obj, "zce", false, NULL);
+object_property_set_bool(obj, "zcmp", false, NULL);
+object_property_set_bool(obj, "zcmt", false, NULL);
+
+if (env->misa_mxl != MXL_RV32) {
+object_property_set_bool(obj, "zcf", false, NULL);
+}
+}
+
 static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("debug", RISCVCPU, cfg.debug, true),
 
@@ -2272,6 +2321,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 .abstract = true,
 },
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_ANY,  riscv_any_cpu_init),
+DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX,  riscv_max_cpu_init),
 #if defined(CONFIG_KVM)
 DEFINE_CPU(TYPE_RISCV_CPU_HOST, riscv_host_cpu_init),
 #endif
-- 
2.41.0

[PATCH for-8.2 7/7] avocado, risc-v: add opensbi tests for 'max' CPU

Add smoke tests to ensure that we'll not break the 'max' CPU type when
adding new ratified extensions to be enabled.

Signed-off-by: Daniel Henrique Barboza 
---
 tests/avocado/riscv_opensbi.py | 16 
 1 file changed, 16 insertions(+)

diff --git a/tests/avocado/riscv_opensbi.py b/tests/avocado/riscv_opensbi.py
index bfff9cc3c3..15fd57fe51 100644
--- a/tests/avocado/riscv_opensbi.py
+++ b/tests/avocado/riscv_opensbi.py
@@ -61,3 +61,19 @@ def test_riscv64_virt(self):
 :avocado: tags=machine:virt
 """
 self.boot_opensbi()
+
+def test_riscv32_virt_maxcpu(self):
+"""
+:avocado: tags=arch:riscv32
+:avocado: tags=machine:virt
+:avocado: tags=cpu:max
+"""
+self.boot_opensbi()
+
+def test_riscv64_virt_maxcpu(self):
+"""
+:avocado: tags=arch:riscv64
+:avocado: tags=machine:virt
+:avocado: tags=cpu:max
+"""
+self.boot_opensbi()
-- 
2.41.0

[PATCH for-8.2 2/7] target/riscv/cpu.c: skip 'bool' check when filtering KVM props

After the introduction of riscv_cpu_options[] all properties in
riscv_cpu_extensions[] are booleans. This check is now obsolete.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 14 --
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index cdf9eeeb6b..735e0ed793 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1907,17 +1907,11 @@ static void riscv_cpu_add_user_properties(Object *obj)
  * Set the default to disabled for every extension
  * unknown to KVM and error out if the user attempts
  * to enable any of them.
- *
- * We're giving a pass for non-bool properties since they're
- * not related to the availability of extensions and can be
- * safely ignored as is.
  */
-if (prop->info == &qdev_prop_bool) {
-object_property_add(obj, prop->name, "bool",
-NULL, cpu_set_cfg_unavailable,
-NULL, (void *)prop->name);
-continue;
-}
+object_property_add(obj, prop->name, "bool",
+NULL, cpu_set_cfg_unavailable,
+NULL, (void *)prop->name);
+continue;
 }
 #endif
 qdev_property_add_static(dev, prop);
-- 
2.41.0

[PATCH for-8.2 3/7] target/riscv/cpu.c: split vendor exts from riscv_cpu_extensions[]

Our goal is to make riscv_cpu_extensions[] hold only ratified,
non-vendor extensions.

Create a new riscv_cpu_vendor_exts[] array for them, changing
riscv_cpu_add_user_properties() accordingly.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 34 --
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 735e0ed793..9bbdc46126 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1808,20 +1808,6 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false),
 DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false),
 
-/* Vendor-specific custom extensions */
-DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false),
-DEFINE_PROP_BOOL("xtheadbb", RISCVCPU, cfg.ext_xtheadbb, false),
-DEFINE_PROP_BOOL("xtheadbs", RISCVCPU, cfg.ext_xtheadbs, false),
-DEFINE_PROP_BOOL("xtheadcmo", RISCVCPU, cfg.ext_xtheadcmo, false),
-DEFINE_PROP_BOOL("xtheadcondmov", RISCVCPU, cfg.ext_xtheadcondmov, false),
-DEFINE_PROP_BOOL("xtheadfmemidx", RISCVCPU, cfg.ext_xtheadfmemidx, false),
-DEFINE_PROP_BOOL("xtheadfmv", RISCVCPU, cfg.ext_xtheadfmv, false),
-DEFINE_PROP_BOOL("xtheadmac", RISCVCPU, cfg.ext_xtheadmac, false),
-DEFINE_PROP_BOOL("xtheadmemidx", RISCVCPU, cfg.ext_xtheadmemidx, false),
-DEFINE_PROP_BOOL("xtheadmempair", RISCVCPU, cfg.ext_xtheadmempair, false),
-DEFINE_PROP_BOOL("xtheadsync", RISCVCPU, cfg.ext_xtheadsync, false),
-DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, 
false),
-
 /* These are experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
 
@@ -1840,6 +1826,23 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+static Property riscv_cpu_vendor_exts[] = {
+DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false),
+DEFINE_PROP_BOOL("xtheadbb", RISCVCPU, cfg.ext_xtheadbb, false),
+DEFINE_PROP_BOOL("xtheadbs", RISCVCPU, cfg.ext_xtheadbs, false),
+DEFINE_PROP_BOOL("xtheadcmo", RISCVCPU, cfg.ext_xtheadcmo, false),
+DEFINE_PROP_BOOL("xtheadcondmov", RISCVCPU, cfg.ext_xtheadcondmov, false),
+DEFINE_PROP_BOOL("xtheadfmemidx", RISCVCPU, cfg.ext_xtheadfmemidx, false),
+DEFINE_PROP_BOOL("xtheadfmv", RISCVCPU, cfg.ext_xtheadfmv, false),
+DEFINE_PROP_BOOL("xtheadmac", RISCVCPU, cfg.ext_xtheadmac, false),
+DEFINE_PROP_BOOL("xtheadmemidx", RISCVCPU, cfg.ext_xtheadmemidx, false),
+DEFINE_PROP_BOOL("xtheadmempair", RISCVCPU, cfg.ext_xtheadmempair, false),
+DEFINE_PROP_BOOL("xtheadsync", RISCVCPU, cfg.ext_xtheadsync, false),
+DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, 
false),
+
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static Property riscv_cpu_options[] = {
 DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 
@@ -1921,6 +1924,9 @@ static void riscv_cpu_add_user_properties(Object *obj)
 qdev_property_add_static(dev, prop);
 }
 
+for (prop = riscv_cpu_vendor_exts; prop && prop->name; prop++) {
+qdev_property_add_static(dev, prop);
+}
 }
 
 static Property riscv_cpu_properties[] = {
-- 
2.41.0

[PATCH for-8.2 4/7] target/riscv/cpu.c: split non-ratified exts from riscv_cpu_extensions[]

Create a new riscv_cpu_experimental_exts[] to store the non-ratified
extensions properties. Once they are ratified we'll move them back to
riscv_cpu_extensions[].

Change riscv_cpu_add_user_properties to keep adding them to users.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 38 +++---
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9bbdc46126..c0826b449d 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1808,21 +1808,6 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false),
 DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false),
 
-/* These are experimental so mark with 'x-' */
-DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
-
-/* ePMP 0.9.3 */
-DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
-DEFINE_PROP_BOOL("x-smaia", RISCVCPU, cfg.ext_smaia, false),
-DEFINE_PROP_BOOL("x-ssaia", RISCVCPU, cfg.ext_ssaia, false),
-
-DEFINE_PROP_BOOL("x-zvfh", RISCVCPU, cfg.ext_zvfh, false),
-DEFINE_PROP_BOOL("x-zvfhmin", RISCVCPU, cfg.ext_zvfhmin, false),
-
-DEFINE_PROP_BOOL("x-zfbfmin", RISCVCPU, cfg.ext_zfbfmin, false),
-DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false),
-DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
-
 DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1843,6 +1828,25 @@ static Property riscv_cpu_vendor_exts[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+/* These are experimental so mark with 'x-' */
+static Property riscv_cpu_experimental_exts[] = {
+DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false),
+
+/* ePMP 0.9.3 */
+DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
+DEFINE_PROP_BOOL("x-smaia", RISCVCPU, cfg.ext_smaia, false),
+DEFINE_PROP_BOOL("x-ssaia", RISCVCPU, cfg.ext_ssaia, false),
+
+DEFINE_PROP_BOOL("x-zvfh", RISCVCPU, cfg.ext_zvfh, false),
+DEFINE_PROP_BOOL("x-zvfhmin", RISCVCPU, cfg.ext_zvfhmin, false),
+
+DEFINE_PROP_BOOL("x-zfbfmin", RISCVCPU, cfg.ext_zfbfmin, false),
+DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false),
+DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false),
+
+DEFINE_PROP_END_OF_LIST(),
+};
+
 static Property riscv_cpu_options[] = {
 DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 
@@ -1927,6 +1931,10 @@ static void riscv_cpu_add_user_properties(Object *obj)
 for (prop = riscv_cpu_vendor_exts; prop && prop->name; prop++) {
 qdev_property_add_static(dev, prop);
 }
+
+for (prop = riscv_cpu_experimental_exts; prop && prop->name; prop++) {
+qdev_property_add_static(dev, prop);
+}
 }
 
 static Property riscv_cpu_properties[] = {
-- 
2.41.0

[PATCH for-8.2 0/7] target/riscv: add 'max' CPU type

Hi,

Following the discussions made in [1] I decided to go ahead and implement
the 'max' CPU type.

It's a CPU that has (almost) all ratified non-vendor extensions enabled
by default. Tooling such as libvirt uses this kind of CPU to do capabilities
discovery. It's also used for testing purposes.

To implement this CPU I did some cleanups in the riscv_cpu_extensions[]
array. After this series this array contains only ratified extensions.
This is a preliminary step for future changes we're planning to do in
the CPU modelling in QEMU, including 'profile' support.

Daniel Henrique Barboza (7):
  target/riscv/cpu.c: split CPU options from riscv_cpu_extensions[]
  target/riscv/cpu.c: skip 'bool' check when filtering KVM props
  target/riscv/cpu.c: split vendor exts from riscv_cpu_extensions[]
  target/riscv/cpu.c: split non-ratified exts from
riscv_cpu_extensions[]
  target/riscv/cpu.c: add a ADD_CPU_PROPERTIES_ARRAY() macro
  target/riscv: add 'max' CPU type
  avocado, risc-v: add opensbi tests for 'max' CPU

 target/riscv/cpu-qom.h |   1 +
 target/riscv/cpu.c | 106 ++---
 tests/avocado/riscv_opensbi.py |  16 +
 3 files changed, 103 insertions(+), 20 deletions(-)

-- 
2.41.0

[PATCH for-8.2 5/7] target/riscv/cpu.c: add a ADD_CPU_PROPERTIES_ARRAY() macro

The code inside riscv_cpu_add_user_properties() became quite repetitive
after recent changes. Add a macro to hide the repetition away.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index c0826b449d..b61465c8c4 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1881,6 +1881,11 @@ static void cpu_set_cfg_unavailable(Object *obj, Visitor 
*v,
 }
 #endif
 
+#define ADD_CPU_PROPERTIES_ARRAY(_dev, _array) \
+for (prop = _array; prop && prop->name; prop++) { \
+qdev_property_add_static(_dev, prop); \
+} \
+
 /*
  * Add CPU properties with user-facing flags.
  *
@@ -1924,17 +1929,9 @@ static void riscv_cpu_add_user_properties(Object *obj)
 qdev_property_add_static(dev, prop);
 }
 
-for (prop = riscv_cpu_options; prop && prop->name; prop++) {
-qdev_property_add_static(dev, prop);
-}
-
-for (prop = riscv_cpu_vendor_exts; prop && prop->name; prop++) {
-qdev_property_add_static(dev, prop);
-}
-
-for (prop = riscv_cpu_experimental_exts; prop && prop->name; prop++) {
-qdev_property_add_static(dev, prop);
-}
+ADD_CPU_PROPERTIES_ARRAY(dev, riscv_cpu_options);
+ADD_CPU_PROPERTIES_ARRAY(dev, riscv_cpu_vendor_exts);
+ADD_CPU_PROPERTIES_ARRAY(dev, riscv_cpu_experimental_exts);
 }
 
 static Property riscv_cpu_properties[] = {
-- 
2.41.0

[PATCH for-8.2 1/7] target/riscv/cpu.c: split CPU options from riscv_cpu_extensions[]

We'll add a new CPU type that will enable a considerable amount of
extensions. To make it easier for us we'll do a few cleanups in our
existing riscv_cpu_extensions[] array.

Start by splitting all CPU non-boolean options from it. Create a new
riscv_cpu_options[] array for them. Add all these properties in
riscv_cpu_add_user_properties() as it is already being done today.

No functional changes made.

Signed-off-by: Daniel Henrique Barboza 
---
 target/riscv/cpu.c | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9339c0241d..cdf9eeeb6b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1751,7 +1751,6 @@ static void riscv_cpu_add_misa_properties(Object *cpu_obj)
 
 static Property riscv_cpu_extensions[] = {
 /* Defaults for standard extensions */
-DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
 DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false),
 DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
 DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
@@ -1767,11 +1766,6 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_BOOL("pmp", RISCVCPU, cfg.pmp, true),
 DEFINE_PROP_BOOL("sstc", RISCVCPU, cfg.ext_sstc, true),
 
-DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
-DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
-DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
-DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
-
 DEFINE_PROP_BOOL("smstateen", RISCVCPU, cfg.ext_smstateen, false),
 DEFINE_PROP_BOOL("svadu", RISCVCPU, cfg.ext_svadu, true),
 DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false),
@@ -1802,9 +1796,7 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_BOOL("zhinxmin", RISCVCPU, cfg.ext_zhinxmin, false),
 
 DEFINE_PROP_BOOL("zicbom", RISCVCPU, cfg.ext_icbom, true),
-DEFINE_PROP_UINT16("cbom_blocksize", RISCVCPU, cfg.cbom_blocksize, 64),
 DEFINE_PROP_BOOL("zicboz", RISCVCPU, cfg.ext_icboz, true),
-DEFINE_PROP_UINT16("cboz_blocksize", RISCVCPU, cfg.cboz_blocksize, 64),
 
 DEFINE_PROP_BOOL("zmmul", RISCVCPU, cfg.ext_zmmul, false),
 
@@ -1848,6 +1840,20 @@ static Property riscv_cpu_extensions[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+static Property riscv_cpu_options[] = {
+DEFINE_PROP_UINT8("pmu-num", RISCVCPU, cfg.pmu_num, 16),
+
+DEFINE_PROP_STRING("priv_spec", RISCVCPU, cfg.priv_spec),
+DEFINE_PROP_STRING("vext_spec", RISCVCPU, cfg.vext_spec),
+
+DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
+DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
+
+DEFINE_PROP_UINT16("cbom_blocksize", RISCVCPU, cfg.cbom_blocksize, 64),
+DEFINE_PROP_UINT16("cboz_blocksize", RISCVCPU, cfg.cboz_blocksize, 64),
+
+DEFINE_PROP_END_OF_LIST(),
+};
 
 #ifndef CONFIG_USER_ONLY
 static void cpu_set_cfg_unavailable(Object *obj, Visitor *v,
@@ -1916,6 +1922,11 @@ static void riscv_cpu_add_user_properties(Object *obj)
 #endif
 qdev_property_add_static(dev, prop);
 }
+
+for (prop = riscv_cpu_options; prop && prop->name; prop++) {
+qdev_property_add_static(dev, prop);
+}
+
 }
 
 static Property riscv_cpu_properties[] = {
-- 
2.41.0

Re: [PATCH v3 1/4] tests/lcitool: Generate distribution packages list in JSON format

2023-07-12 Thread Warner Losh

On Wed, Jul 12, 2023 at 5:07 AM Philippe Mathieu-Daudé 
wrote:

> On 11/7/23 21:39, Warner Losh wrote:
> > On Tue, Jul 11, 2023 at 8:49 AM Philippe Mathieu-Daudé
> > mailto:phi...@linaro.org>> wrote:
> >
> > Add the generate_pkglist() helper to generate a list of packages
> > required by a distribution to build QEMU.
> >
> > Since we can not add a "THIS FILE WAS AUTO-GENERATED" comment in
> > JSON, create the files under tests/vm/generated/ sub-directory;
> > add a README mentioning the files are generated.
> >
> > Suggested-by: Erik Skultety  > >
> > Signed-off-by: Philippe Mathieu-Daudé  > >
> >
> >
> > Reviewed-by: Warner Losh mailto:i...@bsdimp.com>>
> >
> > also, FreeBSD 14 branches next month... do I just grep for FreeBSD-13 to
> > find all the places to update for 14.0?
>
> Per docs/about/build-platforms.rst:
>
>Linux OS, macOS, FreeBSD, NetBSD, OpenBSD
>-
>
>The project aims to support the most recent major version at all
>times for up to five years after its initial release. Support
>for the previous major version will be dropped 2 years after the
>new major version is released or when the vendor itself drops
>support, whichever comes first. [...]
>
> We want to be able to test the oldest/newest releases.
>
> Maybe we can add a pair of definitions, so we'd have to only
> change 2 lines in a single place when releases occur?
>

I'd love that. There's two bits of information per release: name and
checksum
of release artifacts.

Warner


>
> > @@ -191,6 +197,11 @@ try:
> >   generate_cirrus("freebsd-13")
> >   generate_cirrus("macos-12")
> >
> > +#
> > +# VM packages lists
> > +#
> > +generate_pkglist("freebsd", "freebsd-13")
>
>

Re: [PATCH v2] target/ppc: Generate storage interrupts for radix RC changes

2023-07-12 Thread Shawn Anastasio

On 7/12/23 11:56 AM, Cédric Le Goater wrote:
> Hello Shawn,
> 
> On 7/12/23 18:13, Shawn Anastasio wrote:
>> Change radix model to always generate a storage interrupt when the R/C
>> bits are not set appropriately in a PTE instead of setting the bits
>> itself.  According to the ISA both behaviors are valid, but in practice
>> this change more closely matches behavior observed on the POWER9 CPU.
> 
> How did you spotted this dark corner case in emulation ? Do you have
> MMU unit tests ?

I'm currently porting Xen to Power and have been using QEMU's powernv
model extensively for early bring up. I noticed the issue when my radix
implementation worked in QEMU but failed on actual hardware since I
didn't have a proper storage interrupt handler implemented.

>> Signed-off-by: Shawn Anastasio 
> Reviewed-by: Cédric Le Goater 

Much appreciated.

> Thanks,
> 
> C.

Thanks,
Shawn

Re: [PATCH v4] kconfig: Add PCIe devices to s390x machines

2023-07-12 Thread Cédric Le Goater


diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig
index 454e0ff4b613..4c068d7960b9 100644
--- a/hw/s390x/Kconfig
+++ b/hw/s390x/Kconfig
@@ -5,7 +5,8 @@ config S390_CCW_VIRTIO
  imply VFIO_AP
  imply VFIO_CCW
  imply WDT_DIAG288
-    select PCI
+    imply PCIE_DEVICES
+    select PCI_EXPRESS


I'm confused, TYPE_S390_PCI_HOST_BRIDGE exposes a PCI bus...
At a minimum you'd need:

-- >8 --
  static const TypeInfo s390_pcihost_info = {
  .name  = TYPE_S390_PCI_HOST_BRIDGE,
-    .parent    = TYPE_PCI_HOST_BRIDGE,
+    .parent    = TYPE_PCIE_HOST_BRIDGE,
  .instance_size = sizeof(S390pciState),
  .class_init    = s390_pcihost_class_init,
  .interfaces = (InterfaceInfo[]) {
---

Actually I can see:

     if (s390_pci_msix_init(pbdev) && !pbdev->interp) {
     error_setg(errp, "MSI-X support is mandatory "
    "in the S390 architecture");
     return;
     }

So this must be PCIe, not legacy PCI, right?


It seems difficult to change now without breaking migration compatibility.

C.

Re: [PATCH v2] target/ppc: Generate storage interrupts for radix RC changes

2023-07-12 Thread Cédric Le Goater


Hello Shawn,

On 7/12/23 18:13, Shawn Anastasio wrote:

Change radix model to always generate a storage interrupt when the R/C
bits are not set appropriately in a PTE instead of setting the bits
itself.  According to the ISA both behaviors are valid, but in practice
this change more closely matches behavior observed on the POWER9 CPU.


How did you spotted this dark corner case in emulation ? Do you have
MMU unit tests ?
 

 From the POWER9 Processor User's Manual, Section 4.10.13.1: "When
performing Radix translation, the POWER9 hardware triggers the
appropriate interrupt ... for the mode and type of access whenever
Reference (R) and Change (C) bits require setting in either the guest or
host page-table entry (PTE)."


Nick, could you please take a look also ? You know better that part
in Linux than I do.
 

Signed-off-by: Shawn Anastasio 

Reviewed-by: Cédric Le Goater 

Thanks,

C.


---
Changes in v2:
   - Raise interrupt in ppc_radix64_process_scoped_xlate and
 ppc_radix64_partition_scoped_xlate instead of ppc_radix64_check_rc

  target/ppc/mmu-radix64.c | 74 ++--
  1 file changed, 49 insertions(+), 25 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 920084bd8f..5823e039e6 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -219,27 +219,25 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, 
MMUAccessType access_type,
  return false;
  }

-static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type,
-   uint64_t pte, hwaddr pte_addr, int *prot)
+static int ppc_radix64_check_rc(MMUAccessType access_type, uint64_t pte)
  {
-CPUState *cs = CPU(cpu);
-uint64_t npte;
-
-npte = pte | R_PTE_R; /* Always set reference bit */
+switch (access_type) {
+case MMU_DATA_STORE:
+if (!(pte & R_PTE_C)) {
+break;
+}
+/* fall through */
+case MMU_INST_FETCH:
+case MMU_DATA_LOAD:
+if (!(pte & R_PTE_R)) {
+break;
+}

-if (access_type == MMU_DATA_STORE) { /* Store/Write */
-npte |= R_PTE_C; /* Set change bit */
-} else {
-/*
- * Treat the page as read-only for now, so that a later write
- * will pass through this function again to set the C bit.
- */
-*prot &= ~PAGE_WRITE;
+/* R/C bits are already set appropriately for this access */
+return 0;
  }

-if (pte ^ npte) { /* If pte has changed then write it back */
-stq_phys(cs->as, pte_addr, npte);
-}
+return 1;
  }

  static bool ppc_radix64_is_valid_level(int level, int psize, uint64_t nls)
@@ -380,7 +378,8 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
*cpu,
ppc_v3_pate_t pate,
hwaddr *h_raddr, int *h_prot,
int *h_page_size, bool pde_addr,
-  int mmu_idx, bool guest_visible)
+  int mmu_idx, uint64_t lpid,
+  bool guest_visible)
  {
  MMUAccessType access_type = orig_access_type;
  int fault_cause = 0;
@@ -418,7 +417,24 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
*cpu,
  }

  if (guest_visible) {
-ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot);
+if (ppc_radix64_check_rc(access_type, pte)) {
+/*
+ * Per ISA 3.1 Book III, 7.5.3 and 7.5.5, failure to set R/C during
+ * partition-scoped translation when effLPID = 0 results in normal
+ * (non-Hypervisor) Data and Instruction Storage Interrupts
+ * respectively.
+ *
+ * ISA 3.0 is ambiguous about this, but tests on POWER9 hardware
+ * seem to exhibit the same behavior.
+ */
+if (lpid > 0) {
+ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr,
+  DSISR_ATOMIC_RC);
+} else {
+ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_ATOMIC_RC);
+}
+return 1;
+}
  }

  return 0;
@@ -447,7 +463,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu,
  vaddr eaddr, uint64_t pid,
  ppc_v3_pate_t pate, hwaddr 
*g_raddr,
  int *g_prot, int *g_page_size,
-int mmu_idx, bool guest_visible)
+int mmu_idx, uint64_t lpid,
+bool guest_visible)
  {
  CPUState *cs = CPU(cpu);
  CPUPPCState *env = &cpu->env;
@@ -497,7 +514,7 @@ static int ppc_radix64_process_scoped_x

[PATCH v2] io: remove io watch if TLS channel is closed during handshake

2023-07-12 Thread Daniel P . Berrangé

The TLS handshake make take some time to complete, during which time an
I/O watch might be registered with the main loop. If the owner of the
I/O channel invokes qio_channel_close() while the handshake is waiting
to continue the I/O watch must be removed. Failing to remove it will
later trigger the completion callback which the owner is not expecting
to receive. In the case of the VNC server, this results in a SEGV as
vnc_disconnect_start() tries to shutdown a client connection that is
already gone / NULL.

CVE-2023-3354
Reported-by: jiangyegen 
Signed-off-by: Daniel P. Berrangé 
---

In v2:

 - Use g_clear_handle_id to set source ID to zero

 include/io/channel-tls.h |  1 +
 io/channel-tls.c | 18 --
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/include/io/channel-tls.h b/include/io/channel-tls.h
index 5672479e9e..26c67f17e2 100644
--- a/include/io/channel-tls.h
+++ b/include/io/channel-tls.h
@@ -48,6 +48,7 @@ struct QIOChannelTLS {
 QIOChannel *master;
 QCryptoTLSSession *session;
 QIOChannelShutdown shutdown;
+guint hs_ioc_tag;
 };
 
 /**
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 9805dd0a3f..847d5297c3 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -198,12 +198,13 @@ static void qio_channel_tls_handshake_task(QIOChannelTLS 
*ioc,
 }
 
 trace_qio_channel_tls_handshake_pending(ioc, status);
-qio_channel_add_watch_full(ioc->master,
-   condition,
-   qio_channel_tls_handshake_io,
-   data,
-   NULL,
-   context);
+ioc->hs_ioc_tag =
+qio_channel_add_watch_full(ioc->master,
+   condition,
+   qio_channel_tls_handshake_io,
+   data,
+   NULL,
+   context);
 }
 }
 
@@ -218,6 +219,7 @@ static gboolean qio_channel_tls_handshake_io(QIOChannel 
*ioc,
 QIOChannelTLS *tioc = QIO_CHANNEL_TLS(
 qio_task_get_source(task));
 
+tioc->hs_ioc_tag = 0;
 g_free(data);
 qio_channel_tls_handshake_task(tioc, task, context);
 
@@ -378,6 +380,10 @@ static int qio_channel_tls_close(QIOChannel *ioc,
 {
 QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
 
+if (tioc->hs_ioc_tag) {
+g_clear_handle_id(&tioc->hs_ioc_tag, g_source_remove);
+}
+
 return qio_channel_close(tioc->master, errp);
 }
 
-- 
2.41.0

[PATCH 02/10] hw/riscv: virt: Add PCI bus reference in RISCVVirtState

The PCI bus information is needed in RISCVVirtState so that other
files like virt-acpi-build.c can make use of it. Add new field in
RISCVVirtState so that ACPI code can use it.

Signed-off-by: Sunil V L 
---
 hw/riscv/virt.c | 6 --
 include/hw/riscv/virt.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index d90286dc46..46d3341113 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1073,7 +1073,8 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion 
*sys_mem,
   hwaddr high_mmio_base,
   hwaddr high_mmio_size,
   hwaddr pio_base,
-  DeviceState *irqchip)
+  DeviceState *irqchip,
+  RISCVVirtState *s)
 {
 DeviceState *dev;
 MemoryRegion *ecam_alias, *ecam_reg;
@@ -1113,6 +1114,7 @@ static inline DeviceState *gpex_pcie_init(MemoryRegion 
*sys_mem,
 gpex_set_irq_num(GPEX_HOST(dev), i, PCIE_IRQ + i);
 }
 
+s->bus = PCI_HOST_BRIDGE(dev)->bus;
 return dev;
 }
 
@@ -1502,7 +1504,7 @@ static void virt_machine_init(MachineState *machine)
virt_high_pcie_memmap.base,
virt_high_pcie_memmap.size,
memmap[VIRT_PCIE_PIO].base,
-   pcie_irqchip);
+   pcie_irqchip, s);
 
 create_platform_bus(s, mmio_irqchip);
 
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index e5c474b26e..4ef1f660ab 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -60,6 +60,7 @@ struct RISCVVirtState {
 char *oem_table_id;
 OnOffAuto acpi;
 const MemMapEntry *memmap;
+PCIBus *bus;
 };
 
 enum {
-- 
2.39.2

[PATCH 03/10] hw/riscv: virt: Make few IMSIC macros and functions public

Some macros and static function related to IMSIC are defined
in virt.c. They are required in virt-acpi-build.c. So, make them
public.

Signed-off-by: Sunil V L 
---
 hw/riscv/virt.c | 25 +
 include/hw/riscv/virt.h | 25 +
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 46d3341113..f6067db8ec 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -37,7 +37,6 @@
 #include "hw/riscv/numa.h"
 #include "hw/intc/riscv_aclint.h"
 #include "hw/intc/riscv_aplic.h"
-#include "hw/intc/riscv_imsic.h"
 #include "hw/intc/sifive_plic.h"
 #include "hw/misc/sifive_test.h"
 #include "hw/platform-bus.h"
@@ -53,28 +52,6 @@
 #include "hw/acpi/aml-build.h"
 #include "qapi/qapi-visit-common.h"
 
-/*
- * The virt machine physical address space used by some of the devices
- * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets,
- * number of CPUs, and number of IMSIC guest files.
- *
- * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS,
- * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization
- * of virt machine physical address space.
- */
-
-#define VIRT_IMSIC_GROUP_MAX_SIZE  (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
-#if VIRT_IMSIC_GROUP_MAX_SIZE < \
-IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS)
-#error "Can't accomodate single IMSIC group in address space"
-#endif
-
-#define VIRT_IMSIC_MAX_SIZE(VIRT_SOCKETS_MAX * \
-VIRT_IMSIC_GROUP_MAX_SIZE)
-#if 0x400 < VIRT_IMSIC_MAX_SIZE
-#error "Can't accomodate all IMSIC groups in address space"
-#endif
-
 static const MemMapEntry virt_memmap[] = {
 [VIRT_DEBUG] ={0x0, 0x100 },
 [VIRT_MROM] = { 0x1000,0xf000 },
@@ -505,7 +482,7 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
 g_free(plic_cells);
 }
 
-static uint32_t imsic_num_bits(uint32_t count)
+uint32_t imsic_num_bits(uint32_t count)
 {
 uint32_t ret = 0;
 
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 4ef1f660ab..00c22492a7 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -23,6 +23,7 @@
 #include "hw/riscv/riscv_hart.h"
 #include "hw/sysbus.h"
 #include "hw/block/flash.h"
+#include "hw/intc/riscv_imsic.h"
 
 #define VIRT_CPUS_MAX_BITS 9
 #define VIRT_CPUS_MAX  (1 << VIRT_CPUS_MAX_BITS)
@@ -128,4 +129,28 @@ enum {
 
 bool virt_is_acpi_enabled(RISCVVirtState *s);
 void virt_acpi_setup(RISCVVirtState *vms);
+uint32_t imsic_num_bits(uint32_t count);
+
+/*
+ * The virt machine physical address space used by some of the devices
+ * namely ACLINT, PLIC, APLIC, and IMSIC depend on number of Sockets,
+ * number of CPUs, and number of IMSIC guest files.
+ *
+ * Various limits defined by VIRT_SOCKETS_MAX_BITS, VIRT_CPUS_MAX_BITS,
+ * and VIRT_IRQCHIP_MAX_GUESTS_BITS are tuned for maximum utilization
+ * of virt machine physical address space.
+ */
+
+#define VIRT_IMSIC_GROUP_MAX_SIZE  (1U << IMSIC_MMIO_GROUP_MIN_SHIFT)
+#if VIRT_IMSIC_GROUP_MAX_SIZE < \
+IMSIC_GROUP_SIZE(VIRT_CPUS_MAX_BITS, VIRT_IRQCHIP_MAX_GUESTS_BITS)
+#error "Can't accomodate single IMSIC group in address space"
+#endif
+
+#define VIRT_IMSIC_MAX_SIZE(VIRT_SOCKETS_MAX * \
+VIRT_IMSIC_GROUP_MAX_SIZE)
+#if 0x400 < VIRT_IMSIC_MAX_SIZE
+#error "Can't accomodate all IMSIC groups in address space"
+#endif
+
 #endif
-- 
2.39.2

[PATCH 05/10] hw/riscv/virt-acpi-build.c: Add AIA support in RINTC

Update the RINTC structure in MADT with AIA related fields.

Signed-off-by: Sunil V L 
---
 hw/riscv/virt-acpi-build.c | 66 +++---
 1 file changed, 62 insertions(+), 4 deletions(-)

diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 01843e4509..12b8ef0352 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -37,6 +37,7 @@
 #include "hw/intc/riscv_aclint.h"
 
 #define ACPI_BUILD_TABLE_SIZE 0x2
+#define ACPI_BUILD_INTC_ID(socket, index) ((socket << 24) | (index))
 
 typedef struct AcpiBuildState {
 /* Copy of table in RAM (for patching) */
@@ -57,18 +58,42 @@ static void acpi_align_size(GArray *blob, unsigned align)
 }
 
 static void riscv_acpi_madt_add_rintc(uint32_t uid,
+  uint32_t local_cpu_id,
   const CPUArchIdList *arch_ids,
-  GArray *entry)
+  GArray *entry,
+  RISCVVirtAIAType aia_type,
+  uint64_t imsic_addr,
+  uint32_t imsic_size)
 {
 uint64_t hart_id = arch_ids->cpus[uid].arch_id;
 
 build_append_int_noprefix(entry, 0x18, 1);   /* Type */
-build_append_int_noprefix(entry, 20, 1); /* Length   */
+build_append_int_noprefix(entry, 36, 1); /* Length   */
 build_append_int_noprefix(entry, 1, 1);  /* Version  */
 build_append_int_noprefix(entry, 0, 1);  /* Reserved */
 build_append_int_noprefix(entry, 0x1, 4);/* Flags*/
 build_append_int_noprefix(entry, hart_id, 8);/* Hart ID  */
 build_append_int_noprefix(entry, uid, 4);/* ACPI Processor UID */
+/* External Interrupt Controller ID */
+if (aia_type == VIRT_AIA_TYPE_APLIC) {
+build_append_int_noprefix(entry,
+  ACPI_BUILD_INTC_ID(
+  arch_ids->cpus[uid].props.node_id,
+  local_cpu_id),
+  4);
+} else {
+build_append_int_noprefix(entry, 0, 4);
+}
+
+if (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
+/* IMSIC Base address */
+build_append_int_noprefix(entry, imsic_addr, 8);
+/* IMSIC Size */
+build_append_int_noprefix(entry, imsic_size, 4);
+} else {
+build_append_int_noprefix(entry, 0, 8);
+build_append_int_noprefix(entry, 0, 4);
+}
 }
 
 static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState *s)
@@ -76,6 +101,11 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState 
*s)
 MachineClass *mc = MACHINE_GET_CLASS(s);
 MachineState *ms = MACHINE(s);
 const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
+uint64_t imsic_socket_addr, imsic_addr;
+uint8_t  guest_index_bits;
+uint32_t imsic_size, local_cpu_id, socket_id;
+
+guest_index_bits = imsic_num_bits(s->aia_guests + 1);
 
 for (int i = 0; i < arch_ids->len; i++) {
 Aml *dev;
@@ -86,8 +116,19 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState 
*s)
 aml_append(dev, aml_name_decl("_UID",
aml_int(arch_ids->cpus[i].arch_id)));
 
+socket_id = arch_ids->cpus[i].props.node_id;
+local_cpu_id = (arch_ids->cpus[i].arch_id -
+riscv_socket_first_hartid(ms, socket_id)) %
+riscv_socket_hart_count(ms, socket_id);
 /* build _MAT object */
-riscv_acpi_madt_add_rintc(i, arch_ids, madt_buf);
+imsic_socket_addr = s->memmap[VIRT_IMSIC_S].base +
+(socket_id * VIRT_IMSIC_GROUP_MAX_SIZE);
+imsic_addr = imsic_socket_addr +
+ local_cpu_id * IMSIC_HART_SIZE(guest_index_bits);
+imsic_size = IMSIC_HART_SIZE(guest_index_bits);
+
+riscv_acpi_madt_add_rintc(i, local_cpu_id, arch_ids, madt_buf,
+  s->aia_type, imsic_addr, imsic_size);
 aml_append(dev, aml_name_decl("_MAT",
   aml_buffer(madt_buf->len,
   (uint8_t *)madt_buf->data)));
@@ -226,6 +267,7 @@ static void build_dsdt(GArray *table_data,
  * 5.2.12 Multiple APIC Description Table (MADT)
  * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15
  *  https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view
+ *  https://drive.google.com/file/d/1oMGPyOD58JaPgMl1pKasT-VKsIKia7zR/view
  */
 static void build_madt(GArray *table_data,
BIOSLinker *linker,
@@ -234,6 +276,12 @@ static void build_madt(GArray *table_data,
 MachineClass *mc = MACHINE_GET_CLASS(s);
 MachineState *ms = MACHINE(s);
 const CPUArchIdList *arch_ids = mc->pos

[PATCH 07/10] hw/riscv/virt-acpi-build.c: Add APLIC in the MADT

Add APLIC structures for each socket in the MADT when
system is configured with APLIC as the external wired
interrupt controller.

Signed-off-by: Sunil V L 
---
 hw/riscv/virt-acpi-build.c | 36 ++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index ebdc3bffea..9f2d0c92b0 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -276,9 +276,9 @@ static void build_madt(GArray *table_data,
 MachineClass *mc = MACHINE_GET_CLASS(s);
 MachineState *ms = MACHINE(s);
 const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
-uint64_t imsic_socket_addr, imsic_addr;
+uint64_t imsic_socket_addr, imsic_addr, aplic_addr;
+uint32_t imsic_size, gsi_base;
 uint8_t  guest_index_bits;
-uint32_t imsic_size;
 uint32_t local_cpu_id, socket_id;
 uint8_t  hart_index_bits, group_index_bits, group_index_shift;
 uint16_t imsic_max_hart_per_socket = 0;
@@ -340,6 +340,38 @@ static void build_madt(GArray *table_data,
 build_append_int_noprefix(table_data, group_index_shift, 1);
 }
 
+if (s->aia_type != VIRT_AIA_TYPE_NONE) {
+/* APLICs */
+for (socket = 0; socket < riscv_socket_count(ms); socket++) {
+aplic_addr = s->memmap[VIRT_APLIC_S].base +
+ s->memmap[VIRT_APLIC_S].size * socket;
+gsi_base = VIRT_IRQCHIP_NUM_SOURCES * socket;
+build_append_int_noprefix(table_data, 0x1A, 1);/* Type */
+build_append_int_noprefix(table_data, 36, 1);  /* Length */
+build_append_int_noprefix(table_data, 1, 1);   /* Version */
+build_append_int_noprefix(table_data, socket, 1);  /* APLIC ID */
+build_append_int_noprefix(table_data, 0, 4);   /* Flags */
+build_append_int_noprefix(table_data, 0, 8);   /* Hardware ID 
*/
+/* Number of IDCs */
+if (s->aia_type == VIRT_AIA_TYPE_APLIC) {
+build_append_int_noprefix(table_data,
+  s->soc[socket].num_harts,
+  2);
+} else {
+build_append_int_noprefix(table_data, 0, 2);
+}
+/* Total External Interrupt Sources Supported */
+build_append_int_noprefix(table_data, VIRT_IRQCHIP_NUM_SOURCES, 2);
+/* Global System Interrupt Base */
+build_append_int_noprefix(table_data, gsi_base, 4);
+/* APLIC Address */
+build_append_int_noprefix(table_data, aplic_addr, 8);
+/* APLIC size */
+build_append_int_noprefix(table_data,
+  s->memmap[VIRT_APLIC_S].size, 4);
+}
+}
+
 acpi_table_end(linker, &table);
 }
 
-- 
2.39.2

[PATCH 09/10] hw/riscv/virt-acpi-build.c: Add MMU node in RHCT

MMU type information is available via MMU node in RHCT.
Add this node in RHCT.

Signed-off-by: Sunil V L 
---
 hw/riscv/virt-acpi-build.c | 36 
 1 file changed, 36 insertions(+)

diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 2d2bd3b970..25745eee4c 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -158,6 +158,8 @@ static void build_rhct(GArray *table_data,
 size_t len, aligned_len;
 uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0;
 RISCVCPU *cpu = &s->soc[0].harts[0];
+uint32_t mmu_offset = 0;
+uint8_t satp_mode_max;
 char *isa;
 
 AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
@@ -177,6 +179,10 @@ static void build_rhct(GArray *table_data,
 num_rhct_nodes++;
 }
 
+if (cpu->cfg.satp_mode.supported != 0) {
+num_rhct_nodes++;
+}
+
 /* Number of RHCT nodes*/
 build_append_int_noprefix(table_data, num_rhct_nodes, 4);
 
@@ -202,6 +208,26 @@ static void build_rhct(GArray *table_data,
 build_append_int_noprefix(table_data, 0x0, 1);   /* Optional Padding */
 }
 
+/* MMU node structure */
+if (cpu->cfg.satp_mode.supported != 0) {
+satp_mode_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map);
+mmu_offset = table_data->len - table.table_offset;
+build_append_int_noprefix(table_data, 1, 2);/* Type */
+build_append_int_noprefix(table_data, 8, 2);/* Total Length */
+build_append_int_noprefix(table_data, 0x1, 2);  /* Revision */
+build_append_int_noprefix(table_data, 0, 1);/* Reserved */
+/* Virtual Address Scheme */
+if (satp_mode_max == VM_1_10_SV57) {
+build_append_int_noprefix(table_data, 2, 1);/* Sv57 */
+} else if (satp_mode_max == VM_1_10_SV48) {
+build_append_int_noprefix(table_data, 1, 1);/* Sv48 */
+} else if (satp_mode_max == VM_1_10_SV39) {
+build_append_int_noprefix(table_data, 0, 1);/* Sv39 */
+} else {
+assert(1);
+}
+}
+
 /* CMO node */
 if (cpu->cfg.ext_icbom || cpu->cfg.ext_icboz) {
 cmo_offset = table_data->len - table.table_offset;
@@ -244,6 +270,11 @@ static void build_rhct(GArray *table_data,
 num_offsets++;
 }
 
+if (mmu_offset) {
+len += 4;
+num_offsets++;
+}
+
 build_append_int_noprefix(table_data, len, 2);
 build_append_int_noprefix(table_data, 0x1, 2); /* Revision */
 /* Number of offsets */
@@ -252,9 +283,14 @@ static void build_rhct(GArray *table_data,
 
 /* Offsets */
 build_append_int_noprefix(table_data, isa_offset, 4);
+
 if (cmo_offset) {
 build_append_int_noprefix(table_data, cmo_offset, 4);
 }
+
+if (mmu_offset) {
+build_append_int_noprefix(table_data, mmu_offset, 4);
+}
 }
 
 acpi_table_end(linker, &table);
-- 
2.39.2

[PATCH 10/10] hw/riscv/virt-acpi-build.c: Add IO controllers and devices

Add basic IO controllers and devices like PCI, VirtIO and UART
in the ACPI namespace.

Signed-off-by: Sunil V L 
---
 hw/riscv/Kconfig   |  1 +
 hw/riscv/virt-acpi-build.c | 87 ++
 2 files changed, 88 insertions(+)

diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index b6a5eb4452..a50717be87 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -45,6 +45,7 @@ config RISCV_VIRT
 select FW_CFG_DMA
 select PLATFORM_BUS
 select ACPI
+select ACPI_PCI
 
 config SHAKTI_C
 bool
diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 25745eee4c..91f06fdc97 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -27,6 +27,7 @@
 #include "hw/acpi/acpi-defs.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/aml-build.h"
+#include "hw/acpi/pci.h"
 #include "hw/acpi/utils.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
@@ -35,6 +36,7 @@
 #include "hw/riscv/virt.h"
 #include "hw/riscv/numa.h"
 #include "hw/intc/riscv_aclint.h"
+#include "hw/pci-host/gpex.h"
 
 #define ACPI_BUILD_TABLE_SIZE 0x2
 #define ACPI_BUILD_INTC_ID(socket, index) ((socket << 24) | (index))
@@ -138,6 +140,55 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState 
*s)
 }
 }
 
+static void
+acpi_dsdt_add_uart(Aml *scope, const MemMapEntry *uart_memmap,
+uint32_t uart_irq)
+{
+Aml *dev = aml_device("COM0");
+aml_append(dev, aml_name_decl("_HID", aml_string("PNP0501")));
+aml_append(dev, aml_name_decl("_UID", aml_int(0)));
+
+Aml *crs = aml_resource_template();
+aml_append(crs, aml_memory32_fixed(uart_memmap->base,
+ uart_memmap->size, AML_READ_WRITE));
+aml_append(crs,
+aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+   AML_EXCLUSIVE, &uart_irq, 1));
+aml_append(dev, aml_name_decl("_CRS", crs));
+
+Aml *pkg = aml_package(2);
+aml_append(pkg, aml_string("clock-frequency"));
+aml_append(pkg, aml_int(3686400));
+
+Aml *UUID = aml_touuid("DAFFD814-6EBA-4D8C-8A91-BC9BBF4AA301");
+
+Aml *pkg1 = aml_package(1);
+aml_append(pkg1, pkg);
+
+Aml *package = aml_package(2);
+aml_append(package, UUID);
+aml_append(package, pkg1);
+
+aml_append(dev, aml_name_decl("_DSD", package));
+aml_append(scope, dev);
+}
+
+static void
+acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
+   uint32_t irq, RISCVVirtState *s)
+{
+struct GPEXConfig cfg = {
+.mmio32 = memmap[VIRT_PCIE_MMIO],
+.mmio64 = memmap[VIRT_HIGH_PCIE_MMIO],
+.pio = memmap[VIRT_PCIE_PIO],
+.ecam = memmap[VIRT_PCIE_ECAM],
+.irq = irq,
+.bus = s->bus,
+};
+
+acpi_dsdt_add_gpex(scope, &cfg);
+}
+
 /* RHCT Node[N] starts at offset 56 */
 #define RHCT_NODE_ARRAY_OFFSET 56
 
@@ -318,6 +369,8 @@ static void build_dsdt(GArray *table_data,
RISCVVirtState *s)
 {
 Aml *scope, *dsdt;
+MachineState *ms = MACHINE(s);
+uint8_t socket_count;
 const MemMapEntry *memmap = s->memmap;
 AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = s->oem_id,
 .oem_table_id = s->oem_table_id };
@@ -337,6 +390,30 @@ static void build_dsdt(GArray *table_data,
 
 acpi_dsdt_add_fw_cfg(scope, &memmap[VIRT_FW_CFG]);
 
+socket_count = riscv_socket_count(ms);
+
+acpi_dsdt_add_uart(scope, &memmap[VIRT_UART0], UART0_IRQ);
+
+if (socket_count == 1) {
+acpi_dsdt_add_virtio(scope, &memmap[VIRT_VIRTIO],
+ VIRTIO_IRQ, VIRTIO_COUNT);
+acpi_dsdt_add_pci(scope, memmap, PCIE_IRQ, s);
+} else if (socket_count == 2) {
+acpi_dsdt_add_virtio(scope, &memmap[VIRT_VIRTIO],
+ VIRTIO_IRQ + VIRT_IRQCHIP_NUM_SOURCES,
+ VIRTIO_COUNT);
+acpi_dsdt_add_pci(scope, memmap,
+  PCIE_IRQ + VIRT_IRQCHIP_NUM_SOURCES,
+  s);
+} else {
+acpi_dsdt_add_virtio(scope, &memmap[VIRT_VIRTIO],
+ VIRTIO_IRQ + VIRT_IRQCHIP_NUM_SOURCES,
+ VIRTIO_COUNT);
+acpi_dsdt_add_pci(scope, memmap,
+  PCIE_IRQ + VIRT_IRQCHIP_NUM_SOURCES * 2,
+  s);
+}
+
 aml_append(dsdt, scope);
 
 /* copy AML table into ACPI tables blob and patch header there */
@@ -486,6 +563,16 @@ static void virt_acpi_build(RISCVVirtState *s, 
AcpiBuildTables *tables)
 acpi_add_table(table_offsets, tables_blob);
 build_rhct(tables_blob, tables->linker, s);
 
+acpi_add_table(table_offsets, tables_blob);
+{
+AcpiMcfgInfo mcfg = {
+   .base = s->memmap[VIRT_PCIE_MMIO].base,
+   .size = s->memmap[VIRT_PCIE_MMIO].size,
+};
+build_mcfg(tables_blob, tables->linker, &mcfg, s->oem_id,
+   s

[PATCH 06/10] hw/riscv/virt-acpi-build.c: Add IMSIC in the MADT

Add IMSIC structure in MADT when IMSIC is configured.

Signed-off-by: Sunil V L 
---
 hw/riscv/virt-acpi-build.c | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 12b8ef0352..ebdc3bffea 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -280,8 +280,20 @@ static void build_madt(GArray *table_data,
 uint8_t  guest_index_bits;
 uint32_t imsic_size;
 uint32_t local_cpu_id, socket_id;
+uint8_t  hart_index_bits, group_index_bits, group_index_shift;
+uint16_t imsic_max_hart_per_socket = 0;
+uint8_t  socket;
+
+for (socket = 0; socket < riscv_socket_count(ms); socket++) {
+if (imsic_max_hart_per_socket < s->soc[socket].num_harts) {
+imsic_max_hart_per_socket = s->soc[socket].num_harts;
+}
+}
 
 guest_index_bits = imsic_num_bits(s->aia_guests + 1);
+hart_index_bits = imsic_num_bits(imsic_max_hart_per_socket);
+group_index_bits = imsic_num_bits(riscv_socket_count(ms));
+group_index_shift = IMSIC_MMIO_GROUP_MIN_SHIFT;
 
 AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id,
 .oem_table_id = s->oem_table_id };
@@ -306,6 +318,28 @@ static void build_madt(GArray *table_data,
   s->aia_type, imsic_addr, imsic_size);
 }
 
+/* IMSIC */
+if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
+/* IMSIC */
+build_append_int_noprefix(table_data, 0x19, 1); /* Type */
+build_append_int_noprefix(table_data, 16, 1);   /* Length */
+build_append_int_noprefix(table_data, 1, 1);/* Version */
+build_append_int_noprefix(table_data, 0, 1);/* Reserved */
+build_append_int_noprefix(table_data, 0, 4);/* Flags */
+/* Number of supervisor mode Interrupt Identities */
+build_append_int_noprefix(table_data, VIRT_IRQCHIP_NUM_MSIS, 2);
+/* Number of guest mode Interrupt Identities */
+build_append_int_noprefix(table_data, VIRT_IRQCHIP_NUM_MSIS, 2);
+/* Guest Index Bits */
+build_append_int_noprefix(table_data, guest_index_bits, 1);
+/* Hart Index Bits */
+build_append_int_noprefix(table_data, hart_index_bits, 1);
+/* Group Index Bits */
+build_append_int_noprefix(table_data, group_index_bits, 1);
+/* Group Index Shift */
+build_append_int_noprefix(table_data, group_index_shift, 1);
+}
+
 acpi_table_end(linker, &table);
 }
 
-- 
2.39.2

[PATCH 01/10] hw/arm/virt-acpi-build.c: Move fw_cfg and virtio to common location

The functions which add fw_cfg and virtio to DSDT are same for ARM
and RISC-V. So, instead of duplicating in RISC-V, move them from
hw/arm/virt-acpi-build.c to common aml-build.c.

Signed-off-by: Sunil V L 
---
 hw/acpi/aml-build.c | 41 
 hw/arm/virt-acpi-build.c| 42 -
 hw/riscv/virt-acpi-build.c  | 16 --
 include/hw/acpi/aml-build.h |  6 ++
 4 files changed, 47 insertions(+), 58 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index ea331a20d1..eeb1263c8c 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2467,3 +2467,44 @@ Aml *aml_i2c_serial_bus_device(uint16_t address, const 
char *resource_source)
 
 return var;
 }
+
+void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap)
+{
+Aml *dev = aml_device("FWCF");
+aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
+/* device present, functioning, decoding, not shown in UI */
+aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
+aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+
+Aml *crs = aml_resource_template();
+aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base,
+   fw_cfg_memmap->size, AML_READ_WRITE));
+aml_append(dev, aml_name_decl("_CRS", crs));
+aml_append(scope, dev);
+}
+
+void acpi_dsdt_add_virtio(Aml *scope,
+  const MemMapEntry *virtio_mmio_memmap,
+  uint32_t mmio_irq, int num)
+{
+hwaddr base = virtio_mmio_memmap->base;
+hwaddr size = virtio_mmio_memmap->size;
+int i;
+
+for (i = 0; i < num; i++) {
+uint32_t irq = mmio_irq + i;
+Aml *dev = aml_device("VR%02u", i);
+aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005")));
+aml_append(dev, aml_name_decl("_UID", aml_int(i)));
+aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
+
+Aml *crs = aml_resource_template();
+aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));
+aml_append(crs,
+   aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
+ AML_EXCLUSIVE, &irq, 1));
+aml_append(dev, aml_name_decl("_CRS", crs));
+aml_append(scope, dev);
+base += size;
+}
+}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 6b674231c2..fdedb68e2b 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -35,7 +35,6 @@
 #include "target/arm/cpu.h"
 #include "hw/acpi/acpi-defs.h"
 #include "hw/acpi/acpi.h"
-#include "hw/nvram/fw_cfg.h"
 #include "hw/acpi/bios-linker-loader.h"
 #include "hw/acpi/aml-build.h"
 #include "hw/acpi/utils.h"
@@ -94,21 +93,6 @@ static void acpi_dsdt_add_uart(Aml *scope, const MemMapEntry 
*uart_memmap,
 aml_append(scope, dev);
 }
 
-static void acpi_dsdt_add_fw_cfg(Aml *scope, const MemMapEntry *fw_cfg_memmap)
-{
-Aml *dev = aml_device("FWCF");
-aml_append(dev, aml_name_decl("_HID", aml_string("QEMU0002")));
-/* device present, functioning, decoding, not shown in UI */
-aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
-aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
-
-Aml *crs = aml_resource_template();
-aml_append(crs, aml_memory32_fixed(fw_cfg_memmap->base,
-   fw_cfg_memmap->size, AML_READ_WRITE));
-aml_append(dev, aml_name_decl("_CRS", crs));
-aml_append(scope, dev);
-}
-
 static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap)
 {
 Aml *dev, *crs;
@@ -133,32 +117,6 @@ static void acpi_dsdt_add_flash(Aml *scope, const 
MemMapEntry *flash_memmap)
 aml_append(scope, dev);
 }
 
-static void acpi_dsdt_add_virtio(Aml *scope,
- const MemMapEntry *virtio_mmio_memmap,
- uint32_t mmio_irq, int num)
-{
-hwaddr base = virtio_mmio_memmap->base;
-hwaddr size = virtio_mmio_memmap->size;
-int i;
-
-for (i = 0; i < num; i++) {
-uint32_t irq = mmio_irq + i;
-Aml *dev = aml_device("VR%02u", i);
-aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0005")));
-aml_append(dev, aml_name_decl("_UID", aml_int(i)));
-aml_append(dev, aml_name_decl("_CCA", aml_int(1)));
-
-Aml *crs = aml_resource_template();
-aml_append(crs, aml_memory32_fixed(base, size, AML_READ_WRITE));
-aml_append(crs,
-   aml_interrupt(AML_CONSUMER, AML_LEVEL, AML_ACTIVE_HIGH,
- AML_EXCLUSIVE, &irq, 1));
-aml_append(dev, aml_name_decl("_CRS", crs));
-aml_append(scope, dev);
-base += size;
-}
-}
-
 static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
   uint32_t irq, VirtMachineState *vms)
 {
diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 733

[PATCH 08/10] hw/riscv/virt-acpi-build.c: Add CMO information in RHCT

When CMO related extensions like Zicboz, Zicbom and Zicbop
are enabled, the block size for those extensions need to be
communicated via CMO node in RHCT. Add CMO node in RHCT if
any of those CMO extensions are detected.

Signed-off-by: Sunil V L 
---
 hw/riscv/virt-acpi-build.c | 64 +-
 1 file changed, 56 insertions(+), 8 deletions(-)

diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 9f2d0c92b0..2d2bd3b970 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -146,6 +146,7 @@ static void acpi_dsdt_add_cpus(Aml *scope, RISCVVirtState 
*s)
  * 5.2.36 RISC-V Hart Capabilities Table (RHCT)
  * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16
  *  https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view
+ *  https://drive.google.com/file/d/1sKbOa8m1UZw1JkquZYe3F1zQBN1xXsaf/view
  */
 static void build_rhct(GArray *table_data,
BIOSLinker *linker,
@@ -155,8 +156,8 @@ static void build_rhct(GArray *table_data,
 MachineState *ms = MACHINE(s);
 const CPUArchIdList *arch_ids = mc->possible_cpu_arch_ids(ms);
 size_t len, aligned_len;
-uint32_t isa_offset, num_rhct_nodes;
-RISCVCPU *cpu;
+uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0;
+RISCVCPU *cpu = &s->soc[0].harts[0];
 char *isa;
 
 AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
@@ -172,6 +173,9 @@ static void build_rhct(GArray *table_data,
 
 /* ISA + N hart info */
 num_rhct_nodes = 1 + ms->smp.cpus;
+if (cpu->cfg.ext_icbom || cpu->cfg.ext_icboz) {
+num_rhct_nodes++;
+}
 
 /* Number of RHCT nodes*/
 build_append_int_noprefix(table_data, num_rhct_nodes, 4);
@@ -183,7 +187,6 @@ static void build_rhct(GArray *table_data,
 isa_offset = table_data->len - table.table_offset;
 build_append_int_noprefix(table_data, 0, 2);   /* Type 0 */
 
-cpu = &s->soc[0].harts[0];
 isa = riscv_isa_string(cpu);
 len = 8 + strlen(isa) + 1;
 aligned_len = (len % 2) ? (len + 1) : len;
@@ -199,14 +202,59 @@ static void build_rhct(GArray *table_data,
 build_append_int_noprefix(table_data, 0x0, 1);   /* Optional Padding */
 }
 
+/* CMO node */
+if (cpu->cfg.ext_icbom || cpu->cfg.ext_icboz) {
+cmo_offset = table_data->len - table.table_offset;
+build_append_int_noprefix(table_data, 1, 2);/* Type */
+build_append_int_noprefix(table_data, 10, 2);   /* Total Length */
+build_append_int_noprefix(table_data, 0x1, 2);  /* Revision */
+build_append_int_noprefix(table_data, 0, 1);/* Reserved */
+
+/* CBOM block size */
+if (cpu->cfg.cbom_blocksize) {
+build_append_int_noprefix(table_data,
+  __builtin_ctz(cpu->cfg.cbom_blocksize),
+  1);
+} else {
+build_append_int_noprefix(table_data, 0, 1);
+}
+
+/* CBOP block size */
+build_append_int_noprefix(table_data, 0, 1);
+
+/* CBOZ block size */
+if (cpu->cfg.cboz_blocksize) {
+build_append_int_noprefix(table_data,
+  __builtin_ctz(cpu->cfg.cboz_blocksize),
+  1);
+} else {
+build_append_int_noprefix(table_data, 0, 1);
+}
+}
+
 /* Hart Info Node */
 for (int i = 0; i < arch_ids->len; i++) {
+len = 16;
+int num_offsets = 1;
 build_append_int_noprefix(table_data, 0x, 2);  /* Type */
-build_append_int_noprefix(table_data, 16, 2);  /* Length */
-build_append_int_noprefix(table_data, 0x1, 2); /* Revision */
-build_append_int_noprefix(table_data, 1, 2);/* Number of offsets */
-build_append_int_noprefix(table_data, i, 4);/* ACPI Processor UID 
*/
-build_append_int_noprefix(table_data, isa_offset, 4); /* Offsets[0] */
+
+/* Length */
+if (cmo_offset) {
+len += 4;
+num_offsets++;
+}
+
+build_append_int_noprefix(table_data, len, 2);
+build_append_int_noprefix(table_data, 0x1, 2); /* Revision */
+/* Number of offsets */
+build_append_int_noprefix(table_data, num_offsets, 2);
+build_append_int_noprefix(table_data, i, 4);   /* ACPI Processor UID */
+
+/* Offsets */
+build_append_int_noprefix(table_data, isa_offset, 4);
+if (cmo_offset) {
+build_append_int_noprefix(table_data, cmo_offset, 4);
+}
 }
 
 acpi_table_end(linker, &table);
-- 
2.39.2

[PATCH 04/10] hw/riscv: virt: Add PCIe HIGHMEM in memmap

PCIe High MMIO base is actually dynamic and fixed at
run time based on the RAM configured. Currently, this is
not part of the memmap and kept in separate static variable
in virt.c. However, ACPI code also needs this information
to populate DSDT. So, once the base is discovered, merge
this into the final memmap which can be used to create
ACPI tables later.

Signed-off-by: Sunil V L 
---
 hw/riscv/virt.c | 31 ++-
 include/hw/riscv/virt.h |  9 +++--
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index f6067db8ec..7aee06f021 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -84,6 +84,22 @@ static const MemMapEntry virt_memmap[] = {
 
 static MemMapEntry virt_high_pcie_memmap;
 
+/*
+ * virt_memmap doesn't include floating High Mem IO address entry. To enable
+ * code organization in multiple files (ex: ACPI), it is better to have single
+ * memmap which has complete information.
+ *
+ * VIRT_HIGH_PCIE_MMIO is always greater than the last memmap entry and hence
+ * full_virt_memmap is capable of holding both virt_memmap and
+ * VIRT_HIGH_PCIE_MMIO entry.
+ *
+ * The values for these floating entries will be updated when top of RAM is
+ * discovered.
+ */
+static MemMapEntry full_virt_memmap[] = {
+[VIRT_HIGH_PCIE_MMIO] = { 0x0, 0 },
+};
+
 #define VIRT_FLASH_SECTOR_SIZE (256 * KiB)
 
 static PFlashCFI01 *virt_flash_create1(RISCVVirtState *s,
@@ -1444,7 +1460,20 @@ static void virt_machine_init(MachineState *machine)
 ROUND_UP(virt_high_pcie_memmap.base, virt_high_pcie_memmap.size);
 }
 
-s->memmap = virt_memmap;
+/*
+ * Initialize the floating values in full memory map
+ */
+full_virt_memmap[VIRT_HIGH_PCIE_MMIO].base = virt_high_pcie_memmap.base;
+full_virt_memmap[VIRT_HIGH_PCIE_MMIO].size = virt_high_pcie_memmap.size;
+
+s->memmap = full_virt_memmap;
+/*
+ * Copy the base virt_memmap entries to full memmap
+ */
+for (i = 0; i < ARRAY_SIZE(virt_memmap); i++) {
+s->memmap[i] = virt_memmap[i];
+}
+
 
 /* register system main memory (actual RAM) */
 memory_region_add_subregion(system_memory, memmap[VIRT_DRAM].base,
diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h
index 00c22492a7..1d7ddf5df0 100644
--- a/include/hw/riscv/virt.h
+++ b/include/hw/riscv/virt.h
@@ -60,7 +60,7 @@ struct RISCVVirtState {
 char *oem_id;
 char *oem_table_id;
 OnOffAuto acpi;
-const MemMapEntry *memmap;
+MemMapEntry *memmap;
 PCIBus *bus;
 };
 
@@ -84,7 +84,12 @@ enum {
 VIRT_PCIE_MMIO,
 VIRT_PCIE_PIO,
 VIRT_PLATFORM_BUS,
-VIRT_PCIE_ECAM
+VIRT_PCIE_ECAM,
+VIRT_LAST_MEMMAP /* Keep this entry always last */
+};
+
+enum {
+VIRT_HIGH_PCIE_MMIO = VIRT_LAST_MEMMAP,
 };
 
 enum {
-- 
2.39.2

[PATCH 00/10] RISC-V: ACPI: Enable AIA and update RHC