date:20210202

Signed-off-by: Chuan Zheng 
Reviewed-by: Dr. David Alan Gilbert 
---
 migration/rdma.c | 52 
 1 file changed, 52 insertions(+)

diff --git a/migration/rdma.c b/migration/rdma.c
index e0ea86d..996afb0 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -4011,6 +4011,48 @@ static void rdma_accept_incoming_migration(void *opaque)
 }
 }
 
+static bool multifd_rdma_load_setup(const char *host_port,
+RDMAContext *rdma, Error **errp)
+{
+int thread_count;
+int i;
+int idx;
+MultiFDRecvParams *multifd_recv_param;
+RDMAContext *multifd_rdma;
+
+if (!migrate_use_multifd()) {
+return true;
+}
+
+if (multifd_load_setup(errp) != 0) {
+/*
+ * We haven't been able to create multifd threads
+ * nothing better to do
+ */
+return false;
+}
+
+thread_count = migrate_multifd_channels();
+for (i = 0; i < thread_count; i++) {
+if (get_multifd_recv_param(i, &multifd_recv_param) < 0) {
+ERROR(errp, "rdma: error getting multifd_recv_param(%d)", i);
+return false;
+}
+
+multifd_rdma = qemu_rdma_data_init(host_port, errp);
+for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
+multifd_rdma->wr_data[idx].control_len = 0;
+multifd_rdma->wr_data[idx].control_curr = NULL;
+}
+/* the CM channel and CM id is shared */
+multifd_rdma->channel = rdma->channel;
+multifd_rdma->listen_id = rdma->listen_id;
+multifd_recv_param->rdma = (void *)multifd_rdma;
+}
+
+return true;
+}
+
 void rdma_start_incoming_migration(const char *host_port, Error **errp)
 {
 int ret;
@@ -4058,6 +4100,16 @@ void rdma_start_incoming_migration(const char 
*host_port, Error **errp)
 qemu_rdma_return_path_dest_init(rdma_return_path, rdma);
 }
 
+/* multifd rdma setup */
+if (!multifd_rdma_load_setup(host_port, rdma, &local_err)) {
+/*
+ * We haven't been able to create multifd threads
+ * nothing better to do
+ */
+error_report_err(local_err);
+goto err;
+}
+
 qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
 NULL, (void *)(intptr_t)rdma);
 return;
-- 
1.8.3.1

Re: [PATCH] blockjob: Fix crash with IOthread when block commit after snapshot

2021-02-02 Thread Vladimir Sementsov-Ogievskiy


subject should start with [PATCH v5]

03.02.2021 05:40, 08005...@163.com wrote:

From: Michael Qiu 

v5: reformat the commit log with backtrace of main thread
 Add a boolean variable to make main thread could re-acquire
 aio_context on success path.

v4: rebase to latest code

v3: reformat the commit log, remove duplicate content


patch history shouldn't go into commit message. So you should place it under 
'---' [*], after calling git format-patch



Currently, if guest has workloads, IO thread will acquire aio_context
lock before do io_submit, it leads to segmentfault when do block commit
after snapshot. Just like below:

Program received signal SIGSEGV, Segmentation fault.

[Switching to Thread 0x7f7c7d91f700 (LWP 99907)]
0x5576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437
1437../block/mirror.c: No such file or directory.
(gdb) p s->job
$17 = (MirrorBlockJob *) 0x0
(gdb) p s->stop
$18 = false

Call trace of IO thread:
0  0x5576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437
1  0x5576d0f7f3ab in bdrv_driver_pwritev at ../block/io.c:1174
2  0x5576d0f8139d in bdrv_aligned_pwritev at ../block/io.c:1988
3  0x5576d0f81b65 in bdrv_co_pwritev_part at ../block/io.c:2156
4  0x5576d0f8e6b7 in blk_do_pwritev_part at ../block/block-backend.c:1260
5  0x5576d0f8e84d in blk_aio_write_entry at ../block/block-backend.c:1476
...

Switch to qemu main thread:
0  0x7f903be704ed in __lll_lock_wait at
/lib/../lib64/libpthread.so.0
1  0x7f903be6bde6 in _L_lock_941 at /lib/../lib64/libpthread.so.0
2  0x7f903be6bcdf in pthread_mutex_lock at
/lib/../lib64/libpthread.so.0
3  0x564b21456889 in qemu_mutex_lock_impl at
../util/qemu-thread-posix.c:79
4  0x564b213af8a5 in block_job_add_bdrv at ../blockjob.c:224
5  0x564b213b00ad in block_job_create at ../blockjob.c:440
6  0x564b21357c0a in mirror_start_job at ../block/mirror.c:1622
7  0x564b2135a9af in commit_active_start at ../block/mirror.c:1867
8  0x564b2133d132 in qmp_block_commit at ../blockdev.c:2768
9  0x564b2141fef3 in qmp_marshal_block_commit at
qapi/qapi-commands-block-core.c:346
10 0x564b214503c9 in do_qmp_dispatch_bh at
../qapi/qmp-dispatch.c:110
11 0x564b21451996 in aio_bh_poll at ../util/async.c:164
12 0x564b2146018e in aio_dispatch at ../util/aio-posix.c:381
13 0x564b2145187e in aio_ctx_dispatch at ../util/async.c:306
14 0x7f9040239049 in g_main_context_dispatch at
/lib/../lib64/libglib-2.0.so.0
15 0x564b21447368 in main_loop_wait at ../util/main-loop.c:232
16 0x564b21447368 in main_loop_wait at ../util/main-loop.c:255
17 0x564b21447368 in main_loop_wait at ../util/main-loop.c:531
18 0x564b212304e1 in qemu_main_loop at ../softmmu/runstate.c:721
19 0x564b20f7975e in main at ../softmmu/main.c:50

In IO thread when do bdrv_mirror_top_pwritev, the job is NULL, and stop field
is false, this means the MirrorBDSOpaque "s" object has not been initialized
yet, and this object is initialized by block_job_create(), but the initialize
process is stuck in acquiring the lock.

In this situation, IO thread come to bdrv_mirror_top_pwritev(),which means that
mirror-top node is already inserted into block graph, but its bs->opaque->job
is not initialized.

The root cause is that qemu main thread do release/acquire when hold the lock,
at the same time, IO thread get the lock after release stage, and the crash
occured.

Actually, in this situation, job->job.aio_context will not equal to
qemu_get_aio_context(), and will be the same as bs->aio_context,
thus, no need to release the lock, becasue bdrv_root_attach_child()
will not change the context.

This patch fix this issue.

Fixes: 132ada80 "block: Adjust AioContexts when attaching nodes"

Signed-off-by: Michael Qiu 


I feel like there may be more problems (like the fact that drained section 
should be expanded, and
that expanding doesn't help as Michael said), but I think that temporary 
releasing locks is unsafe
thing, and if we can avoid it for some cases it's good, especially if it fixes 
some bug:

Reviewed-by: Vladimir Sementsov-Ogievskiy 


---


[*] patch history and anything that you don't want to put into final commit 
message goes here.


  blockjob.c | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index db3a21699c..d9dca36f65 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -212,15 +212,21 @@ int block_job_add_bdrv(BlockJob *job, const char *name, 
BlockDriverState *bs,
 uint64_t perm, uint64_t shared_perm, Error **errp)
  {
  BdrvChild *c;
+bool need_context_ops;
  
  bdrv_ref(bs);

-if (job->job.aio_context != qemu_get_aio_context()) {
+
+need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context;


I'd also put the second condition into same variable, just for less typing. 
Still it should work as is.


+
+if (need_context_ops &&
+job->job.aio_context != qemu_get_

[PATCH v4 16/18] migration/rdma: add rdma_channel into Migrationstate field

Multifd RDMA is need to poll when we send data, record it.

Signed-off-by: Chuan Zheng 
---
 migration/migration.c |  1 +
 migration/migration.h |  1 +
 migration/rdma.c  | 14 ++
 3 files changed, 16 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index b8f4844..47bd11d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1926,6 +1926,7 @@ void migrate_init(MigrationState *s)
 s->migration_thread_running = false;
 s->enabled_rdma_migration = false;
 s->host_port = NULL;
+s->rdma_channel = 0;
 error_free(s->error);
 s->error = NULL;
 s->hostname = NULL;
diff --git a/migration/migration.h b/migration/migration.h
index 537ee09..5ff46e6 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -288,6 +288,7 @@ struct MigrationState {
 
 /* Need by Multi-RDMA */
 char *host_port;
+int rdma_channel;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
diff --git a/migration/rdma.c b/migration/rdma.c
index f5eb563..2097839 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -183,6 +183,20 @@ typedef struct {
 } RDMAWorkRequestData;
 
 /*
+ * Get the multifd RDMA channel used to send data.
+ */
+static int get_multifd_RDMA_channel(void)
+{
+int thread_count = migrate_multifd_channels();
+MigrationState *s = migrate_get_current();
+
+s->rdma_channel++;
+s->rdma_channel %= thread_count;
+
+return s->rdma_channel;
+}
+
+/*
  * Negotiate RDMA capabilities during connection-setup time.
  */
 typedef struct {
-- 
1.8.3.1

[PATCH v4 08/18] migration/rdma: export getQIOChannel to get QIOchannel in rdma

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/qemu-file.c | 5 +
 migration/qemu-file.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index be21518..37f6201 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -260,6 +260,11 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t 
flags)
 }
 }
 
+void *getQIOChannel(QEMUFile *f)
+{
+return f->opaque;
+}
+
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags)
 {
 int ret = 0;
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index a9b6d6c..4cef043 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -165,6 +165,7 @@ void qemu_file_set_blocking(QEMUFile *f, bool block);
 void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
+void *getQIOChannel(QEMUFile *f);
 
 /* Whenever this is found in the data stream, the flags
  * will be passed to ram_control_load_hook in the incoming-migration
-- 
1.8.3.1

[PATCH v4 17/18] migration/rdma: send data for both rdma-pin-all and NOT rdma-pin-all mode

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/rdma.c | 65 
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 2097839..c19a91f 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2002,6 +2002,20 @@ static int qemu_rdma_write_one(QEMUFile *f, RDMAContext 
*rdma,
.repeat = 1,
  };
 
+/* use multifd to send data */
+if (migrate_use_multifd()) {
+int channel = get_multifd_RDMA_channel();
+int ret = 0;
+MultiFDSendParams *multifd_send_param = NULL;
+ret = get_multifd_send_param(channel, &multifd_send_param);
+if (ret) {
+error_report("rdma: error getting multifd_send_param(%d)", 
channel);
+return -EINVAL;
+}
+rdma = (RDMAContext *)multifd_send_param->rdma;
+block = &(rdma->local_ram_blocks.block[current_index]);
+}
+
 retry:
 sge.addr = (uintptr_t)(block->local_host_addr +
 (current_addr - block->offset));
@@ -2197,6 +2211,27 @@ retry:
 return 0;
 }
 
+static int multifd_rdma_write_flush(void)
+{
+/* The multifd RDMA threads send data */
+MultiFDSendParams *multifd_send_param = NULL;
+RDMAContext *rdma = NULL;
+MigrationState *s = migrate_get_current();
+int ret = 0;
+
+ret = get_multifd_send_param(s->rdma_channel,
+ &multifd_send_param);
+if (ret) {
+error_report("rdma: error getting multifd_send_param(%d)",
+ s->rdma_channel);
+return ret;
+}
+rdma = (RDMAContext *)(multifd_send_param->rdma);
+rdma->nb_sent++;
+
+return ret;
+}
+
 /*
  * Push out any unwritten RDMA operations.
  *
@@ -2219,8 +2254,15 @@ static int qemu_rdma_write_flush(QEMUFile *f, 
RDMAContext *rdma)
 }
 
 if (ret == 0) {
-rdma->nb_sent++;
-trace_qemu_rdma_write_flush(rdma->nb_sent);
+if (migrate_use_multifd()) {
+ret = multifd_rdma_write_flush();
+if (ret) {
+return ret;
+}
+} else {
+rdma->nb_sent++;
+trace_qemu_rdma_write_flush(rdma->nb_sent);
+}
 }
 
 rdma->current_length = 0;
@@ -4062,6 +4104,7 @@ wait_reg_complete:
 }
 
 qemu_sem_post(&multifd_send_param->sem_sync);
+qemu_sem_wait(&multifd_send_param->sem);
 }
 }
 
@@ -4443,6 +4486,7 @@ static void *multifd_rdma_send_thread(void *opaque)
 Error *local_err = NULL;
 int ret = 0;
 RDMAControlHeader head = { .len = 0, .repeat = 1 };
+RDMAContext *rdma = p->rdma;
 
 trace_multifd_send_thread_start(p->id);
 if (multifd_send_initial_packet(p, &local_err) < 0) {
@@ -4451,7 +4495,7 @@ static void *multifd_rdma_send_thread(void *opaque)
 
 /* wait for semaphore notification to register memory */
 qemu_sem_wait(&p->sem_sync);
-if (qemu_rdma_registration(p->rdma) < 0) {
+if (qemu_rdma_registration(rdma) < 0) {
 goto out;
 }
 /*
@@ -4466,12 +4510,25 @@ static void *multifd_rdma_send_thread(void *opaque)
 break;
 }
 }
+/* To complete polling(CQE) */
+while (rdma->nb_sent) {
+ret = qemu_rdma_block_for_wrid(rdma, RDMA_WRID_RDMA_WRITE, NULL);
+if (ret < 0) {
+error_report("multifd RDMA migration: "
+ "complete polling error!");
+return NULL;
+}
+}
 /* Send FINISHED to the destination */
 head.type = RDMA_CONTROL_REGISTER_FINISHED;
-ret = qemu_rdma_exchange_send(p->rdma, &head, NULL, NULL, NULL, NULL);
+ret = qemu_rdma_exchange_send(rdma, &head, NULL, NULL, NULL, NULL);
 if (ret < 0) {
+error_report("multifd RDMA migration: "
+ "sending remote error!");
 return NULL;
 }
+/* sync main thread */
+qemu_sem_post(&p->sem);
 }
 
 out:
-- 
1.8.3.1

[PATCH v4 10/18] migration/rdma: Create the multifd recv channels for RDMA

We still don't transmit anything through them, and we only build
the RDMA connections.

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/rdma.c | 69 ++--
 1 file changed, 67 insertions(+), 2 deletions(-)

diff --git a/migration/rdma.c b/migration/rdma.c
index 996afb0..ed8a015 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3267,6 +3267,40 @@ static void rdma_cm_poll_handler(void *opaque)
 }
 }
 
+static bool qemu_rdma_accept_setup(RDMAContext *rdma)
+{
+RDMAContext *multifd_rdma = NULL;
+int thread_count;
+int i;
+MultiFDRecvParams *multifd_recv_param;
+thread_count = migrate_multifd_channels();
+/* create the multifd channels for RDMA */
+for (i = 0; i < thread_count; i++) {
+if (get_multifd_recv_param(i, &multifd_recv_param) < 0) {
+error_report("rdma: error getting multifd_recv_param(%d)", i);
+return false;
+}
+
+multifd_rdma = (RDMAContext *) multifd_recv_param->rdma;
+if (multifd_rdma->cm_id == NULL) {
+break;
+} else {
+multifd_rdma = NULL;
+}
+}
+
+if (multifd_rdma) {
+qemu_set_fd_handler(rdma->channel->fd,
+rdma_accept_incoming_migration,
+NULL, (void *)(intptr_t)multifd_rdma);
+} else {
+qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler,
+NULL, rdma);
+}
+
+return true;
+}
+
 static int qemu_rdma_accept(RDMAContext *rdma)
 {
 RDMACapabilities cap;
@@ -3366,6 +3400,10 @@ static int qemu_rdma_accept(RDMAContext *rdma)
 qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration,
 NULL,
 (void *)(intptr_t)rdma->return_path);
+} else if (migrate_use_multifd()) {
+if (!qemu_rdma_accept_setup(rdma)) {
+goto err_rdma_dest_wait;
+}
 } else {
 qemu_set_fd_handler(rdma->channel->fd, rdma_cm_poll_handler,
 NULL, rdma);
@@ -3976,6 +4014,34 @@ static QEMUFile *qemu_fopen_rdma(RDMAContext *rdma, 
const char *mode)
 return rioc->file;
 }
 
+static void migration_rdma_process_incoming(QEMUFile *f,
+RDMAContext *rdma, Error **errp)
+{
+MigrationIncomingState *mis = migration_incoming_get_current();
+QIOChannel *ioc = NULL;
+bool start_migration = false;
+
+if (!migrate_use_multifd()) {
+rdma->migration_started_on_destination = 1;
+migration_fd_process_incoming(f, errp);
+return;
+}
+
+if (!mis->from_src_file) {
+mis->from_src_file = f;
+qemu_file_set_blocking(f, false);
+} else {
+ioc = QIO_CHANNEL(getQIOChannel(f));
+/* Multiple connections */
+assert(migrate_use_multifd());
+start_migration = multifd_recv_new_channel(ioc, errp);
+}
+
+if (start_migration) {
+migration_incoming_process();
+}
+}
+
 static void rdma_accept_incoming_migration(void *opaque)
 {
 RDMAContext *rdma = opaque;
@@ -4004,8 +4070,7 @@ static void rdma_accept_incoming_migration(void *opaque)
 return;
 }
 
-rdma->migration_started_on_destination = 1;
-migration_fd_process_incoming(f, &local_err);
+migration_rdma_process_incoming(f, rdma, &local_err);
 if (local_err) {
 error_reportf_err(local_err, "RDMA ERROR:");
 }
-- 
1.8.3.1

[PATCH v4 02/18] migration/rdma: judge whether or not the RDMA is used for migration

Add enabled_rdma_migration into MigrationState to judge
whether or not the RDMA is used for migration.

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/migration.c | 13 +
 migration/migration.h |  6 ++
 2 files changed, 19 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index 447dfb9..129c81a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -418,11 +418,13 @@ void migrate_add_address(SocketAddress *address)
 static void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
 const char *p = NULL;
+MigrationState *s = migrate_get_current();
 
 if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
 return;
 }
 
+s->enabled_rdma_migration = false;
 qapi_event_send_migration(MIGRATION_STATUS_SETUP);
 if (strstart(uri, "tcp:", &p) ||
 strstart(uri, "unix:", NULL) ||
@@ -430,6 +432,7 @@ static void qemu_start_incoming_migration(const char *uri, 
Error **errp)
 socket_start_incoming_migration(p ? p : uri, errp);
 #ifdef CONFIG_RDMA
 } else if (strstart(uri, "rdma:", &p)) {
+s->enabled_rdma_migration = true;
 rdma_start_incoming_migration(p, errp);
 #endif
 } else if (strstart(uri, "exec:", &p)) {
@@ -1921,6 +1924,7 @@ void migrate_init(MigrationState *s)
 s->start_postcopy = false;
 s->postcopy_after_devices = false;
 s->migration_thread_running = false;
+s->enabled_rdma_migration = false;
 error_free(s->error);
 s->error = NULL;
 s->hostname = NULL;
@@ -2162,6 +2166,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 socket_start_outgoing_migration(s, p ? p : uri, &local_err);
 #ifdef CONFIG_RDMA
 } else if (strstart(uri, "rdma:", &p)) {
+s->enabled_rdma_migration = true;
 rdma_start_outgoing_migration(s, p, &local_err);
 #endif
 } else if (strstart(uri, "exec:", &p)) {
@@ -2391,6 +2396,14 @@ bool migrate_rdma_pin_all(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
 }
 
+bool migrate_use_rdma(void)
+{
+MigrationState *s;
+s = migrate_get_current();
+
+return s->enabled_rdma_migration;
+}
+
 bool migrate_use_multifd(void)
 {
 MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index 22b36f3..da5681b 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -280,6 +280,11 @@ struct MigrationState {
  * This save hostname when out-going migration starts
  */
 char *hostname;
+
+/*
+ * Enable RDMA migration
+ */
+bool enabled_rdma_migration;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
@@ -317,6 +322,7 @@ bool migrate_validate_uuid(void);
 
 bool migrate_auto_converge(void);
 bool migrate_rdma_pin_all(void);
+bool migrate_use_rdma(void);
 bool migrate_use_multifd(void);
 bool migrate_pause_before_switchover(void);
 int migrate_multifd_channels(void);
-- 
1.8.3.1

[PATCH v4 14/18] migration/rdma: register memory for multifd RDMA channels

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/multifd.c |  3 ++
 migration/rdma.c| 92 +++--
 2 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 919a414..1186246 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -537,6 +537,9 @@ void multifd_send_terminate_threads(Error *err)
 qemu_mutex_lock(&p->mutex);
 p->quit = true;
 qemu_sem_post(&p->sem);
+if (migrate_use_rdma()) {
+qemu_sem_post(&p->sem_sync);
+}
 qemu_mutex_unlock(&p->mutex);
 }
 }
diff --git a/migration/rdma.c b/migration/rdma.c
index 1095a8f..c906cc7 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3838,6 +3838,19 @@ static int rdma_load_hook(QEMUFile *f, void *opaque, 
uint64_t flags, void *data)
 return rdma_block_notification_handle(opaque, data);
 
 case RAM_CONTROL_HOOK:
+if (migrate_use_multifd()) {
+int i;
+MultiFDRecvParams *multifd_recv_param = NULL;
+int thread_count = migrate_multifd_channels();
+/* Inform dest recv_thread to poll */
+for (i = 0; i < thread_count; i++) {
+if (get_multifd_recv_param(i, &multifd_recv_param)) {
+return -1;
+}
+qemu_sem_post(&multifd_recv_param->sem_sync);
+}
+}
+
 return qemu_rdma_registration_handle(f, opaque);
 
 default:
@@ -3910,6 +3923,24 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void 
*opaque,
 head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
 trace_qemu_rdma_registration_stop_ram();
 
+if (migrate_use_multifd()) {
+/*
+ * Inform the multifd channels to register memory
+ */
+int i;
+int thread_count = migrate_multifd_channels();
+MultiFDSendParams *multifd_send_param = NULL;
+for (i = 0; i < thread_count; i++) {
+ret = get_multifd_send_param(i, &multifd_send_param);
+if (ret) {
+error_report("rdma: error getting multifd(%d)", i);
+return ret;
+}
+
+qemu_sem_post(&multifd_send_param->sem_sync);
+}
+}
+
 /*
  * Make sure that we parallelize the pinning on both sides.
  * For very large guests, doing this serially takes a really
@@ -3968,6 +3999,21 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void 
*opaque,
 rdma->dest_blocks[i].remote_host_addr;
 local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
 }
+/* Wait for all multifd channels to complete registration */
+if (migrate_use_multifd()) {
+int i;
+int thread_count = migrate_multifd_channels();
+MultiFDSendParams *multifd_send_param = NULL;
+for (i = 0; i < thread_count; i++) {
+ret = get_multifd_send_param(i, &multifd_send_param);
+if (ret) {
+error_report("rdma: error getting multifd(%d)", i);
+return ret;
+}
+
+qemu_sem_wait(&multifd_send_param->sem);
+}
+}
 }
 
 trace_qemu_rdma_registration_stop(flags);
@@ -3979,6 +4025,24 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void 
*opaque,
 goto err;
 }
 
+if (migrate_use_multifd()) {
+/*
+ * Inform src send_thread to send FINISHED signal.
+ * Wait for multifd RDMA send threads to poll the CQE.
+ */
+int i;
+int thread_count = migrate_multifd_channels();
+MultiFDSendParams *multifd_send_param = NULL;
+for (i = 0; i < thread_count; i++) {
+ret = get_multifd_send_param(i, &multifd_send_param);
+if (ret < 0) {
+goto err;
+}
+
+qemu_sem_post(&multifd_send_param->sem_sync);
+}
+}
+
 return 0;
 err:
 rdma->error_state = ret;
@@ -4355,19 +4419,37 @@ static void *multifd_rdma_send_thread(void *opaque)
 {
 MultiFDSendParams *p = opaque;
 Error *local_err = NULL;
+int ret = 0;
+RDMAControlHeader head = { .len = 0, .repeat = 1 };
 
 trace_multifd_send_thread_start(p->id);
 if (multifd_send_initial_packet(p, &local_err) < 0) {
 goto out;
 }
 
+/* wait for semaphore notification to register memory */
+qemu_sem_wait(&p->sem_sync);
+if (qemu_rdma_registration(p->rdma) < 0) {
+goto out;
+}
+/*
+ * Inform the main RDMA thread to run when multifd
+ * RDMA thread have completed registration.
+ */
+qemu_sem_post(&p->sem);
 while (true) {
+qemu_sem_wait(&p->sem_sync);
 WITH_QEMU_LOCK_GUARD(&p->mutex) {
 if (p->quit) {
 br

[PATCH v4 07/18] migration/rdma: add rdma field into multifd send/recv param

Note we do want to export any rdma struct, take void * instead.

Signed-off-by: Chuan Zheng 
---
 migration/multifd.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/migration/multifd.h b/migration/multifd.h
index d57756c..b17a2c1 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -108,6 +108,10 @@ typedef struct {
 QemuSemaphore sem_sync;
 /* used for compression methods */
 void *data;
+/* used for multifd rdma */
+void *rdma;
+/* communication channel */
+QEMUFile *file;
 }  MultiFDSendParams;
 
 typedef struct {
@@ -147,6 +151,10 @@ typedef struct {
 QemuSemaphore sem_sync;
 /* used for de-compression methods */
 void *data;
+/* used for multifd rdma */
+void *rdma;
+/* communication channel */
+QEMUFile *file;
 } MultiFDRecvParams;
 
 typedef struct {
-- 
1.8.3.1

[PATCH v4 11/18] migration/rdma: record host_port for multifd RDMA

Signed-off-by: Chuan Zheng 
---
 migration/migration.c | 1 +
 migration/migration.h | 3 +++
 migration/rdma.c  | 3 +++
 3 files changed, 7 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index 129c81a..b8f4844 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1925,6 +1925,7 @@ void migrate_init(MigrationState *s)
 s->postcopy_after_devices = false;
 s->migration_thread_running = false;
 s->enabled_rdma_migration = false;
+s->host_port = NULL;
 error_free(s->error);
 s->error = NULL;
 s->hostname = NULL;
diff --git a/migration/migration.h b/migration/migration.h
index da5681b..537ee09 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -285,6 +285,9 @@ struct MigrationState {
  * Enable RDMA migration
  */
 bool enabled_rdma_migration;
+
+/* Need by Multi-RDMA */
+char *host_port;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
diff --git a/migration/rdma.c b/migration/rdma.c
index ed8a015..9654b87 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -4206,6 +4206,8 @@ void rdma_start_outgoing_migration(void *opaque,
 goto err;
 }
 
+s->host_port = g_strdup(host_port);
+
 ret = qemu_rdma_source_init(rdma,
 s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp);
 
@@ -4250,6 +4252,7 @@ void rdma_start_outgoing_migration(void *opaque,
 
 s->to_dst_file = qemu_fopen_rdma(rdma, "wb");
 migrate_fd_connect(s, NULL);
+g_free(s->host_port);
 return;
 return_path_err:
 qemu_rdma_cleanup(rdma);
-- 
1.8.3.1

[PATCH v4 05/18] migration/rdma: do not need sync main for rdma

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/multifd.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/migration/multifd.c b/migration/multifd.c
index 4820702..5d34950 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -583,6 +583,10 @@ void multifd_send_sync_main(QEMUFile *f)
 if (!migrate_use_multifd()) {
 return;
 }
+ /* Do not need sync for rdma */
+if (migrate_use_rdma()) {
+return;
+}
 if (multifd_send_state->pages->used) {
 if (multifd_send_pages(f) < 0) {
 error_report("%s: multifd_send_pages fail", __func__);
@@ -1024,6 +1028,10 @@ void multifd_recv_sync_main(void)
 if (!migrate_use_multifd()) {
 return;
 }
+/* Do not need sync for rdma */
+if (migrate_use_rdma()) {
+return;
+}
 for (i = 0; i < migrate_multifd_channels(); i++) {
 MultiFDRecvParams *p = &multifd_recv_state->params[i];
 
-- 
1.8.3.1

[PATCH v4 15/18] migration/rdma: only register the memory for multifd channels

All data is sent by multifd Channels, so we only register its for
multifd channels and main channel don't register its.

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/rdma.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/migration/rdma.c b/migration/rdma.c
index c906cc7..f5eb563 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3939,6 +3939,12 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void 
*opaque,
 
 qemu_sem_post(&multifd_send_param->sem_sync);
 }
+
+/*
+ * Use multifd to migrate, we only register memory for
+ * multifd RDMA channel and main channel don't register it.
+ */
+goto wait_reg_complete;
 }
 
 /*
@@ -3999,6 +4005,8 @@ static int qemu_rdma_registration_stop(QEMUFile *f, void 
*opaque,
 rdma->dest_blocks[i].remote_host_addr;
 local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
 }
+
+wait_reg_complete:
 /* Wait for all multifd channels to complete registration */
 if (migrate_use_multifd()) {
 int i;
-- 
1.8.3.1

[PATCH v4 13/18] migration/rdma: Add the function for dynamic page registration

Add the 'qemu_rdma_registration' function, multifd send threads
call it to register memory.

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/rdma.c | 51 +++
 1 file changed, 51 insertions(+)

diff --git a/migration/rdma.c b/migration/rdma.c
index cff9446..1095a8f 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -3739,6 +3739,57 @@ out:
 return ret;
 }
 
+/*
+ * Dynamic page registrations for multifd RDMA threads.
+ */
+static int qemu_rdma_registration(void *opaque)
+{
+RDMAContext *rdma = opaque;
+RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
+RDMALocalBlocks *local = &rdma->local_ram_blocks;
+int reg_result_idx, i, nb_dest_blocks;
+RDMAControlHeader head = { .len = 0, .repeat = 1 };
+int ret = 0;
+
+head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
+
+ret = qemu_rdma_exchange_send(rdma, &head, NULL, &resp,
+®_result_idx, rdma->pin_all ?
+qemu_rdma_reg_whole_ram_blocks : NULL);
+if (ret < 0) {
+goto out;
+}
+
+nb_dest_blocks = resp.len / sizeof(RDMADestBlock);
+
+if (local->nb_blocks != nb_dest_blocks) {
+rdma->error_state = -EINVAL;
+ret = -1;
+goto out;
+}
+
+qemu_rdma_move_header(rdma, reg_result_idx, &resp);
+memcpy(rdma->dest_blocks,
+   rdma->wr_data[reg_result_idx].control_curr, resp.len);
+
+for (i = 0; i < nb_dest_blocks; i++) {
+network_to_dest_block(&rdma->dest_blocks[i]);
+
+/* We require that the blocks are in the same order */
+if (rdma->dest_blocks[i].length != local->block[i].length) {
+rdma->error_state = -EINVAL;
+ret = -1;
+goto out;
+}
+local->block[i].remote_host_addr =
+rdma->dest_blocks[i].remote_host_addr;
+local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
+}
+
+out:
+return ret;
+}
+
 /* Destination:
  * Called via a ram_control_load_hook during the initial RAM load section which
  * lists the RAMBlocks by name.  This lets us know the order of the RAMBlocks
-- 
1.8.3.1

[PATCH v4 18/18] migration/rdma: RDMA cleanup for multifd migration

Signed-off-by: Chuan Zheng 
---
 migration/multifd.c |  6 ++
 migration/multifd.h |  1 +
 migration/rdma.c| 16 +++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 1186246..4031648 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -577,6 +577,9 @@ void multifd_save_cleanup(void)
 p->packet_len = 0;
 g_free(p->packet);
 p->packet = NULL;
+#ifdef CONFIG_RDMA
+multifd_rdma_cleanup(p->rdma);
+#endif
 multifd_send_state->ops->send_cleanup(p, &local_err);
 if (local_err) {
 migrate_set_error(migrate_get_current(), local_err);
@@ -1039,6 +1042,9 @@ int multifd_load_cleanup(Error **errp)
 p->packet_len = 0;
 g_free(p->packet);
 p->packet = NULL;
+#ifdef CONFIG_RDMA
+multifd_rdma_cleanup(p->rdma);
+#endif
 multifd_recv_state->ops->recv_cleanup(p);
 }
 qemu_sem_destroy(&multifd_recv_state->sem_sync);
diff --git a/migration/multifd.h b/migration/multifd.h
index 26d4489..0ecec5e 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -183,6 +183,7 @@ typedef struct {
 
 #ifdef CONFIG_RDMA
 extern MultiFDSetup multifd_rdma_ops;
+void multifd_rdma_cleanup(void *opaque);
 #endif
 void multifd_send_terminate_threads(Error *err);
 int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp);
diff --git a/migration/rdma.c b/migration/rdma.c
index c19a91f..f14357f 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2369,7 +2369,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
 {
 int idx;
 
-if (rdma->cm_id && rdma->connected) {
+if (rdma->channel && rdma->cm_id && rdma->connected) {
 if ((rdma->error_state ||
  migrate_get_current()->state == MIGRATION_STATUS_CANCELLING) &&
 !rdma->received_error) {
@@ -4599,6 +4599,20 @@ static void multifd_rdma_recv_channel_setup(QIOChannel 
*ioc,
 return;
 }
 
+void multifd_rdma_cleanup(void *opaque)
+{
+RDMAContext *rdma = (RDMAContext *)opaque;
+
+if (!migrate_use_rdma()) {
+return;
+}
+
+rdma->listen_id = NULL;
+rdma->channel = NULL;
+qemu_rdma_cleanup(rdma);
+g_free(rdma);
+}
+
 MultiFDSetup multifd_rdma_ops = {
 .send_thread = multifd_rdma_send_thread,
 .recv_thread = multifd_rdma_recv_thread,
-- 
1.8.3.1

[PATCH v4 03/18] migration/rdma: create multifd_setup_ops for Tx/Rx thread

Create multifd_setup_ops for TxRx thread, no logic change.

Signed-off-by: Chuan Zheng 
---
 migration/multifd.c | 44 +++-
 migration/multifd.h |  7 +++
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 1a1e589..cb1fc01 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -386,6 +386,8 @@ struct {
 int exiting;
 /* multifd ops */
 MultiFDMethods *ops;
+/* multifd setup ops */
+MultiFDSetup *setup_ops;
 } *multifd_send_state;
 
 /*
@@ -805,8 +807,9 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
 } else {
 /* update for tls qio channel */
 p->c = ioc;
-qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
-   QEMU_THREAD_JOINABLE);
+qemu_thread_create(&p->thread, p->name,
+   multifd_send_state->setup_ops->send_thread,
+   p, QEMU_THREAD_JOINABLE);
}
return false;
 }
@@ -854,6 +857,11 @@ cleanup:
 multifd_new_send_channel_cleanup(p, sioc, local_err);
 }
 
+static void multifd_send_channel_setup(MultiFDSendParams *p)
+{
+socket_send_channel_create(multifd_new_send_channel_async, p);
+}
+
 int multifd_save_setup(Error **errp)
 {
 int thread_count;
@@ -871,6 +879,7 @@ int multifd_save_setup(Error **errp)
 multifd_send_state->pages = multifd_pages_init(page_count);
 qemu_sem_init(&multifd_send_state->channels_ready, 0);
 qatomic_set(&multifd_send_state->exiting, 0);
+multifd_send_state->setup_ops = multifd_setup_ops_init();
 multifd_send_state->ops = multifd_ops[migrate_multifd_compression()];
 
 for (i = 0; i < thread_count; i++) {
@@ -890,7 +899,7 @@ int multifd_save_setup(Error **errp)
 p->packet->version = cpu_to_be32(MULTIFD_VERSION);
 p->name = g_strdup_printf("multifdsend_%d", i);
 p->tls_hostname = g_strdup(s->hostname);
-socket_send_channel_create(multifd_new_send_channel_async, p);
+multifd_send_state->setup_ops->send_channel_setup(p);
 }
 
 for (i = 0; i < thread_count; i++) {
@@ -917,6 +926,8 @@ struct {
 uint64_t packet_num;
 /* multifd ops */
 MultiFDMethods *ops;
+/* multifd setup ops */
+MultiFDSetup *setup_ops;
 } *multifd_recv_state;
 
 static void multifd_recv_terminate_threads(Error *err)
@@ -1117,6 +1128,7 @@ int multifd_load_setup(Error **errp)
 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
 qatomic_set(&multifd_recv_state->count, 0);
 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
+multifd_recv_state->setup_ops = multifd_setup_ops_init();
 multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()];
 
 for (i = 0; i < thread_count; i++) {
@@ -1195,9 +1207,31 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error 
**errp)
 p->num_packets = 1;
 
 p->running = true;
-qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
-   QEMU_THREAD_JOINABLE);
+multifd_recv_state->setup_ops->recv_channel_setup(ioc, p);
+qemu_thread_create(&p->thread, p->name,
+   multifd_recv_state->setup_ops->recv_thread,
+   p, QEMU_THREAD_JOINABLE);
 qatomic_inc(&multifd_recv_state->count);
 return qatomic_read(&multifd_recv_state->count) ==
migrate_multifd_channels();
 }
+
+static void multifd_recv_channel_setup(QIOChannel *ioc, MultiFDRecvParams *p)
+{
+return;
+}
+
+static MultiFDSetup multifd_socket_ops = {
+.send_thread = multifd_send_thread,
+.recv_thread = multifd_recv_thread,
+.send_channel_setup = multifd_send_channel_setup,
+.recv_channel_setup = multifd_recv_channel_setup
+};
+
+MultiFDSetup *multifd_setup_ops_init(void)
+{
+MultiFDSetup *ops;
+
+ops = &multifd_socket_ops;
+return ops;
+}
diff --git a/migration/multifd.h b/migration/multifd.h
index 8d6751f..1d2dc90 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -166,6 +166,13 @@ typedef struct {
 int (*recv_pages)(MultiFDRecvParams *p, uint32_t used, Error **errp);
 } MultiFDMethods;
 
+typedef struct {
+void *(*send_thread)(void *opaque);
+void *(*recv_thread)(void *opaque);
+void (*send_channel_setup)(MultiFDSendParams *p);
+void (*recv_channel_setup)(QIOChannel *ioc, MultiFDRecvParams *p);
+} MultiFDSetup;
+
 void multifd_register_ops(int method, MultiFDMethods *ops);
 
 #endif
-- 
1.8.3.1

[PATCH v4 06/18] migration/rdma: export MultiFDSendParams/MultiFDRecvParams

MultiFDSendParams and MultiFDRecvParams is need for rdma, export it

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
---
 migration/multifd.c | 26 ++
 migration/multifd.h |  2 ++
 2 files changed, 28 insertions(+)

diff --git a/migration/multifd.c b/migration/multifd.c
index 5d34950..ae0b7f0 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -390,6 +390,19 @@ struct {
 MultiFDSetup *setup_ops;
 } *multifd_send_state;
 
+int get_multifd_send_param(int id, MultiFDSendParams **param)
+{
+int ret = 0;
+
+if (id < 0 || id >= migrate_multifd_channels()) {
+ret = -1;
+} else {
+*param = &(multifd_send_state->params[id]);
+}
+
+return ret;
+}
+
 /*
  * How we use multifd_send_state->pages and channel->pages?
  *
@@ -934,6 +947,19 @@ struct {
 MultiFDSetup *setup_ops;
 } *multifd_recv_state;
 
+int get_multifd_recv_param(int id, MultiFDRecvParams **param)
+{
+int ret = 0;
+
+if (id < 0 || id >= migrate_multifd_channels()) {
+ret = -1;
+} else {
+*param = &(multifd_recv_state->params[id]);
+}
+
+return ret;
+}
+
 static void multifd_recv_terminate_threads(Error *err)
 {
 int i;
diff --git a/migration/multifd.h b/migration/multifd.h
index e3ab4b0..d57756c 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -176,6 +176,8 @@ typedef struct {
 #ifdef CONFIG_RDMA
 extern MultiFDSetup multifd_rdma_ops;
 #endif
+int get_multifd_send_param(int id, MultiFDSendParams **param);
+int get_multifd_recv_param(int id, MultiFDRecvParams **param);
 MultiFDSetup *multifd_setup_ops_init(void);
 
 void multifd_register_ops(int method, MultiFDMethods *ops);
-- 
1.8.3.1

[PATCH v4 12/18] migration/rdma: Create the multifd send channels for RDMA

Signed-off-by: Chuan Zheng 
---
 migration/multifd.c |  4 ++--
 migration/multifd.h |  2 ++
 migration/rdma.c| 57 +
 3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index ae0b7f0..919a414 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -176,7 +176,7 @@ void multifd_register_ops(int method, MultiFDMethods *ops)
 multifd_ops[method] = ops;
 }
 
-static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
+int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
 {
 MultiFDInit_t msg = {};
 int ret;
@@ -503,7 +503,7 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, 
ram_addr_t offset)
 return 1;
 }
 
-static void multifd_send_terminate_threads(Error *err)
+void multifd_send_terminate_threads(Error *err)
 {
 int i;
 
diff --git a/migration/multifd.h b/migration/multifd.h
index b17a2c1..26d4489 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -184,6 +184,8 @@ typedef struct {
 #ifdef CONFIG_RDMA
 extern MultiFDSetup multifd_rdma_ops;
 #endif
+void multifd_send_terminate_threads(Error *err);
+int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp);
 int get_multifd_send_param(int id, MultiFDSendParams **param);
 int get_multifd_recv_param(int id, MultiFDRecvParams **param);
 MultiFDSetup *multifd_setup_ops_init(void);
diff --git a/migration/rdma.c b/migration/rdma.c
index 9654b87..cff9446 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -4261,9 +4261,54 @@ err:
 g_free(rdma_return_path);
 }
 
+static int multifd_channel_rdma_connect(void *opaque)
+{
+MultiFDSendParams *p = opaque;
+Error *local_err = NULL;
+int ret = 0;
+MigrationState *s = migrate_get_current();
+
+p->rdma = qemu_rdma_data_init(s->host_port, &local_err);
+if (p->rdma == NULL) {
+goto out;
+}
+
+ret = qemu_rdma_source_init(p->rdma,
+migrate_rdma_pin_all(),
+&local_err);
+if (ret) {
+goto out;
+}
+
+ret = qemu_rdma_connect(p->rdma, &local_err);
+if (ret) {
+goto out;
+}
+
+p->file = qemu_fopen_rdma(p->rdma, "wb");
+if (p->file == NULL) {
+goto out;
+}
+
+p->c = QIO_CHANNEL(getQIOChannel(p->file));
+
+out:
+if (local_err) {
+trace_multifd_send_error(p->id);
+}
+
+return ret;
+}
+
 static void *multifd_rdma_send_thread(void *opaque)
 {
 MultiFDSendParams *p = opaque;
+Error *local_err = NULL;
+
+trace_multifd_send_thread_start(p->id);
+if (multifd_send_initial_packet(p, &local_err) < 0) {
+goto out;
+}
 
 while (true) {
 WITH_QEMU_LOCK_GUARD(&p->mutex) {
@@ -4274,6 +4319,12 @@ static void *multifd_rdma_send_thread(void *opaque)
 qemu_sem_wait(&p->sem);
 }
 
+out:
+if (local_err) {
+trace_multifd_send_error(p->id);
+multifd_send_terminate_threads(local_err);
+}
+
 WITH_QEMU_LOCK_GUARD(&p->mutex) {
 p->running = false;
 }
@@ -4285,6 +4336,12 @@ static void 
multifd_rdma_send_channel_setup(MultiFDSendParams *p)
 {
 Error *local_err = NULL;
 
+if (multifd_channel_rdma_connect(p)) {
+error_setg(&local_err, "multifd: rdma channel %d not established",
+   p->id);
+return ;
+}
+
 if (p->quit) {
 error_setg(&local_err, "multifd: send id %d already quit", p->id);
 return ;
-- 
1.8.3.1

[PATCH v4 00/18] Support Multifd for RDMA migration

The RDMA bandwidth is not fully utilized for over 25Gigabit NIC because
of single channel for RDMA migration. This patch series is going to support
multifd for RDMA migration based on multifd framework.

Comparsion is between origion and multifd RDMA migration is re-tested for v3.
The VM specifications for migration are as follows:
- VM use 4k page;
- the number of VCPU is 4;
- the total memory is 16Gigabit;
- use 'mempress' tool to pressurize VM(mempress 8000 500);
- use 25Gigabit network card to migrate;

For origin RDMA and MultiRDMA migration, the total migration times of
VM are as follows:
+
| | NOT rdma-pin-all | rdma-pin-all |
+
| origin RDMA |   26 s   | 29 s |
-
|  MultiRDMA  |   16 s   | 17 s |
+

Test the multifd RDMA migration like this:
virsh migrate --live --parallel --migrateuri
rdma://192.168.1.100 [VM] --listen-address 0.0.0.0  
qemu+tcp://192.168.1.100/system --verbose

v3 -> v4:
modify some function names
export multifd_rdma_ops instead of a function
fix minior codestyle issues

v2 -> v3:
create multifd ops for both tcp and rdma
do not export rdma to avoid multifd code in mess
fix build issue for non-rdma
fix some codestyle and buggy code

Chuan Zheng (18):
  migration/rdma: add the 'migrate_rdma_pin_all' function
  migration/rdma: judge whether or not the RDMA is used for migration
  migration/rdma: create multifd_setup_ops for Tx/Rx thread
  migration/rdma: add multifd_setup_ops for rdma
  migration/rdma: do not need sync main for rdma
  migration/rdma: export MultiFDSendParams/MultiFDRecvParams
  migration/rdma: add rdma field into multifd send/recv param
  migration/rdma: export getQIOChannel to get QIOchannel in rdma
  migration/rdma: add multifd_rdma_load_setup() to setup multifd rdma
  migration/rdma: Create the multifd recv channels for RDMA
  migration/rdma: record host_port for multifd RDMA
  migration/rdma: Create the multifd send channels for RDMA
  migration/rdma: Add the function for dynamic page registration
  migration/rdma: register memory for multifd RDMA channels
  migration/rdma: only register the memory for multifd channels
  migration/rdma: add rdma_channel into Migrationstate field
  migration/rdma: send data for both rdma-pin-all and NOT rdma-pin-all
mode
  migration/rdma: RDMA cleanup for multifd migration

 migration/migration.c |  24 +++
 migration/migration.h |  11 ++
 migration/multifd.c   |  97 +-
 migration/multifd.h   |  25 +++
 migration/qemu-file.c |   5 +
 migration/qemu-file.h |   1 +
 migration/rdma.c  | 490 +-
 7 files changed, 641 insertions(+), 12 deletions(-)

-- 
1.8.3.1

[PATCH v4 04/18] migration/rdma: add multifd_setup_ops for rdma

Signed-off-by: Chuan Zheng 
---
 migration/multifd.c |  6 +
 migration/multifd.h |  5 
 migration/rdma.c| 71 +
 3 files changed, 82 insertions(+)

diff --git a/migration/multifd.c b/migration/multifd.c
index cb1fc01..4820702 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1232,6 +1232,12 @@ MultiFDSetup *multifd_setup_ops_init(void)
 {
 MultiFDSetup *ops;
 
+#ifdef CONFIG_RDMA
+if (migrate_use_rdma()) {
+ops = &multifd_rdma_ops;
+return ops;
+}
+#endif
 ops = &multifd_socket_ops;
 return ops;
 }
diff --git a/migration/multifd.h b/migration/multifd.h
index 1d2dc90..e3ab4b0 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -173,6 +173,11 @@ typedef struct {
 void (*recv_channel_setup)(QIOChannel *ioc, MultiFDRecvParams *p);
 } MultiFDSetup;
 
+#ifdef CONFIG_RDMA
+extern MultiFDSetup multifd_rdma_ops;
+#endif
+MultiFDSetup *multifd_setup_ops_init(void);
+
 void multifd_register_ops(int method, MultiFDMethods *ops);
 
 #endif
diff --git a/migration/rdma.c b/migration/rdma.c
index 00eac34..e0ea86d 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -19,6 +19,7 @@
 #include "qemu/cutils.h"
 #include "rdma.h"
 #include "migration.h"
+#include "multifd.h"
 #include "qemu-file.h"
 #include "ram.h"
 #include "qemu-file-channel.h"
@@ -4139,3 +4140,73 @@ err:
 g_free(rdma);
 g_free(rdma_return_path);
 }
+
+static void *multifd_rdma_send_thread(void *opaque)
+{
+MultiFDSendParams *p = opaque;
+
+while (true) {
+WITH_QEMU_LOCK_GUARD(&p->mutex) {
+if (p->quit) {
+break;
+}
+}
+qemu_sem_wait(&p->sem);
+}
+
+WITH_QEMU_LOCK_GUARD(&p->mutex) {
+p->running = false;
+}
+
+return NULL;
+}
+
+static void multifd_rdma_send_channel_setup(MultiFDSendParams *p)
+{
+Error *local_err = NULL;
+
+if (p->quit) {
+error_setg(&local_err, "multifd: send id %d already quit", p->id);
+return ;
+}
+p->running = true;
+
+qemu_thread_create(&p->thread, p->name, multifd_rdma_send_thread, p,
+   QEMU_THREAD_JOINABLE);
+}
+
+static void *multifd_rdma_recv_thread(void *opaque)
+{
+MultiFDRecvParams *p = opaque;
+
+while (true) {
+WITH_QEMU_LOCK_GUARD(&p->mutex) {
+if (p->quit) {
+break;
+}
+}
+qemu_sem_wait(&p->sem_sync);
+}
+
+WITH_QEMU_LOCK_GUARD(&p->mutex) {
+p->running = false;
+}
+
+return NULL;
+}
+
+static void multifd_rdma_recv_channel_setup(QIOChannel *ioc,
+MultiFDRecvParams *p)
+{
+QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+
+p->file = rioc->file;
+return;
+}
+
+MultiFDSetup multifd_rdma_ops = {
+.send_thread = multifd_rdma_send_thread,
+.recv_thread = multifd_rdma_recv_thread,
+.send_channel_setup = multifd_rdma_send_channel_setup,
+.recv_channel_setup = multifd_rdma_recv_channel_setup
+};
-- 
1.8.3.1

[PATCH v4 01/18] migration/rdma: add the 'migrate_rdma_pin_all' function

Signed-off-by: Zhimin Feng 
Signed-off-by: Chuan Zheng 
Reviewed-by: Dr. David Alan Gilbert 
---
 migration/migration.c | 9 +
 migration/migration.h | 1 +
 2 files changed, 10 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index 1986cb8..447dfb9 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2382,6 +2382,15 @@ bool migrate_use_events(void)
 return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
 }
 
+bool migrate_rdma_pin_all(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
+}
+
 bool migrate_use_multifd(void)
 {
 MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index d096b77..22b36f3 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -316,6 +316,7 @@ bool migrate_ignore_shared(void);
 bool migrate_validate_uuid(void);
 
 bool migrate_auto_converge(void);
+bool migrate_rdma_pin_all(void);
 bool migrate_use_multifd(void);
 bool migrate_pause_before_switchover(void);
 int migrate_multifd_channels(void);
-- 
1.8.3.1

Re: [PATCH] replay: rng-builtin support

2021-02-02 Thread Paolo Bonzini


On 03/02/21 07:00, Pavel Dovgalyuk wrote:

This patch enables using rng-builtin with record/replay
by making the callbacks deterministic.

Signed-off-by: Pavel Dovgalyuk 
---
  backends/rng-builtin.c |3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backends/rng-builtin.c b/backends/rng-builtin.c
index f38dff117d..f367eb665c 100644
--- a/backends/rng-builtin.c
+++ b/backends/rng-builtin.c
@@ -10,6 +10,7 @@
  #include "qemu/main-loop.h"
  #include "qemu/guest-random.h"
  #include "qom/object.h"
+#include "sysemu/replay.h"
  
  OBJECT_DECLARE_SIMPLE_TYPE(RngBuiltin, RNG_BUILTIN)
  
@@ -37,7 +38,7 @@ static void rng_builtin_request_entropy(RngBackend *b, RngRequest *req)

  {
  RngBuiltin *s = RNG_BUILTIN(b);
  
-qemu_bh_schedule(s->bh);

+replay_bh_schedule_event(s->bh);
  }
  
  static void rng_builtin_init(Object *obj)




Queued, thanks.

Paolo

[PATCH] replay: rng-builtin support

2021-02-02 Thread Pavel Dovgalyuk

This patch enables using rng-builtin with record/replay
by making the callbacks deterministic.

Signed-off-by: Pavel Dovgalyuk 
---
 backends/rng-builtin.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backends/rng-builtin.c b/backends/rng-builtin.c
index f38dff117d..f367eb665c 100644
--- a/backends/rng-builtin.c
+++ b/backends/rng-builtin.c
@@ -10,6 +10,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/guest-random.h"
 #include "qom/object.h"
+#include "sysemu/replay.h"
 
 OBJECT_DECLARE_SIMPLE_TYPE(RngBuiltin, RNG_BUILTIN)
 
@@ -37,7 +38,7 @@ static void rng_builtin_request_entropy(RngBackend *b, 
RngRequest *req)
 {
 RngBuiltin *s = RNG_BUILTIN(b);
 
-qemu_bh_schedule(s->bh);
+replay_bh_schedule_event(s->bh);
 }
 
 static void rng_builtin_init(Object *obj)

[Bug 1914353] Re: QEMU: aarch64: :GIC: out-of-bounds access via interrupt ID

2021-02-02 Thread P J P

CVE requested.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1914353

Title:
  QEMU: aarch64: :GIC: out-of-bounds access via interrupt ID

Status in QEMU:
  New

Bug description:
  Via [qemu-security] list

  +-- On Sun, 31 Jan 2021, Philippe Mathieu-Daudé wrote --+
  | On 1/31/21 11:34 AM, Philippe Mathieu-Daudé wrote:
  | > Per the ARM Generic Interrupt Controller Architecture specification
  | > (document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit,
  | > not 10:
  | >
  | >- Table 4-21 GICD_SGIR bit assignments
  | >
  | >The Interrupt ID of the SGI to forward to the specified CPU
  | >interfaces. The value of this field is the Interrupt ID, in
  | >the range 0-15, for example a value of 0b0011 specifies
  | >Interrupt ID 3.
  | >
  ...
  | > Correct the irq mask to fix an undefined behavior (which eventually
  | > lead to a heap-buffer-overflow, see [Buglink]):
  | >
  | >$ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M 
virt,accel=qtest -qtest stdio
  | >[I 1612088147.116987] OPENED
  | >  [R +0.278293] writel 0x8000f00 0xff4affb0
  | >  ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for 
type 'uint8_t [16][8]'
  | >  SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior 
../hw/intc/arm_gic.c:1498:13
  | >
  | > Cc: qemu-sta...@nongnu.org
  | > Fixes: 9ee6e8bb853 ("ARMv7 support.")
  | > Buglink: https://bugs.launchpad.net/qemu/+bug/1913916
  | > Buglink: https://bugs.launchpad.net/qemu/+bug/1913917
  ...

  On 210202 1221, Peter Maydell wrote:
  > In both cases the overrun is on the first writel to 0x8000f00,
  > but the fuzzer has for some reason not reported that but instead
  > blundered on until it happens to trigger some other issue that
  > resulted from the memory corruption it induced with the first write.
  >
  ...
  > On the CVE:
  >
  > Since this can affect systems using KVM, this is a security bug for
  > us. However, it only affects an uncommon configuration:
  > you are only vulnerable if you are using "kernel-irqchip=off"
  > (the default is 'on', and turning it off is an odd thing to do).
  >
  > thanks
  > -- PMM
  >

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1914353/+subscriptions

Re: [PATCH v4 01/23] tcg: Introduce target-specific page data for user-only

On 2/2/21 4:40 PM, Richard Henderson wrote:
> Well, here's the thing: this appears to be v3, reposted.
> 
> All of the work I did for v4 has gone missing.  I went to go fix the single 
> use
> of current_cpu, and it wasn't where I expected it to be, and that's when I
> noticed.  I'm grepping blobs now, but I must have made some horrible git 
> error.
>  :-(

It's not quite that bad.  About half of the changes are here.
But e.g. patch 11 had been split into at least 4, and those are all missing.  I
presume all of the r-b that I collected before that point got lost at the same
time.

r~

[Bug 1914353] [NEW] QEMU: aarch64: :GIC: out-of-bounds access via interrupt ID

2021-02-02 Thread P J P

*** This bug is a security vulnerability ***

Public security bug reported:

Via [qemu-security] list

+-- On Sun, 31 Jan 2021, Philippe Mathieu-Daudé wrote --+
| On 1/31/21 11:34 AM, Philippe Mathieu-Daudé wrote:
| > Per the ARM Generic Interrupt Controller Architecture specification
| > (document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit,
| > not 10:
| >
| >- Table 4-21 GICD_SGIR bit assignments
| >
| >The Interrupt ID of the SGI to forward to the specified CPU
| >interfaces. The value of this field is the Interrupt ID, in
| >the range 0-15, for example a value of 0b0011 specifies
| >Interrupt ID 3.
| >
...
| > Correct the irq mask to fix an undefined behavior (which eventually
| > lead to a heap-buffer-overflow, see [Buglink]):
| >
| >$ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M 
virt,accel=qtest -qtest stdio
| >[I 1612088147.116987] OPENED
| >  [R +0.278293] writel 0x8000f00 0xff4affb0
| >  ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for 
type 'uint8_t [16][8]'
| >  SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior 
../hw/intc/arm_gic.c:1498:13
| >
| > Cc: qemu-sta...@nongnu.org
| > Fixes: 9ee6e8bb853 ("ARMv7 support.")
| > Buglink: https://bugs.launchpad.net/qemu/+bug/1913916
| > Buglink: https://bugs.launchpad.net/qemu/+bug/1913917
...

On 210202 1221, Peter Maydell wrote:
> In both cases the overrun is on the first writel to 0x8000f00,
> but the fuzzer has for some reason not reported that but instead
> blundered on until it happens to trigger some other issue that
> resulted from the memory corruption it induced with the first write.
>
...
> On the CVE:
>
> Since this can affect systems using KVM, this is a security bug for
> us. However, it only affects an uncommon configuration:
> you are only vulnerable if you are using "kernel-irqchip=off"
> (the default is 'on', and turning it off is an odd thing to do).
>
> thanks
> -- PMM
>

** Affects: qemu
 Importance: Undecided
 Status: New


** Tags: cve security

** Information type changed from Private Security to Public Security

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1914353

Title:
  QEMU: aarch64: :GIC: out-of-bounds access via interrupt ID

Status in QEMU:
  New

Bug description:
  Via [qemu-security] list

  +-- On Sun, 31 Jan 2021, Philippe Mathieu-Daudé wrote --+
  | On 1/31/21 11:34 AM, Philippe Mathieu-Daudé wrote:
  | > Per the ARM Generic Interrupt Controller Architecture specification
  | > (document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit,
  | > not 10:
  | >
  | >- Table 4-21 GICD_SGIR bit assignments
  | >
  | >The Interrupt ID of the SGI to forward to the specified CPU
  | >interfaces. The value of this field is the Interrupt ID, in
  | >the range 0-15, for example a value of 0b0011 specifies
  | >Interrupt ID 3.
  | >
  ...
  | > Correct the irq mask to fix an undefined behavior (which eventually
  | > lead to a heap-buffer-overflow, see [Buglink]):
  | >
  | >$ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M 
virt,accel=qtest -qtest stdio
  | >[I 1612088147.116987] OPENED
  | >  [R +0.278293] writel 0x8000f00 0xff4affb0
  | >  ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for 
type 'uint8_t [16][8]'
  | >  SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior 
../hw/intc/arm_gic.c:1498:13
  | >
  | > Cc: qemu-sta...@nongnu.org
  | > Fixes: 9ee6e8bb853 ("ARMv7 support.")
  | > Buglink: https://bugs.launchpad.net/qemu/+bug/1913916
  | > Buglink: https://bugs.launchpad.net/qemu/+bug/1913917
  ...

  On 210202 1221, Peter Maydell wrote:
  > In both cases the overrun is on the first writel to 0x8000f00,
  > but the fuzzer has for some reason not reported that but instead
  > blundered on until it happens to trigger some other issue that
  > resulted from the memory corruption it induced with the first write.
  >
  ...
  > On the CVE:
  >
  > Since this can affect systems using KVM, this is a security bug for
  > us. However, it only affects an uncommon configuration:
  > you are only vulnerable if you are using "kernel-irqchip=off"
  > (the default is 'on', and turning it off is an odd thing to do).
  >
  > thanks
  > -- PMM
  >

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1914353/+subscriptions

[Bug 1914353] Re: QEMU: aarch64: :GIC: out-of-bounds access via interrupt ID

2021-02-02 Thread P J P

Upstream patch:
  -> https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg00709.html

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1914353

Title:
  QEMU: aarch64: :GIC: out-of-bounds access via interrupt ID

Status in QEMU:
  New

Bug description:
  Via [qemu-security] list

  +-- On Sun, 31 Jan 2021, Philippe Mathieu-Daudé wrote --+
  | On 1/31/21 11:34 AM, Philippe Mathieu-Daudé wrote:
  | > Per the ARM Generic Interrupt Controller Architecture specification
  | > (document "ARM IHI 0048B.b (ID072613)"), the SGIINTID field is 4 bit,
  | > not 10:
  | >
  | >- Table 4-21 GICD_SGIR bit assignments
  | >
  | >The Interrupt ID of the SGI to forward to the specified CPU
  | >interfaces. The value of this field is the Interrupt ID, in
  | >the range 0-15, for example a value of 0b0011 specifies
  | >Interrupt ID 3.
  | >
  ...
  | > Correct the irq mask to fix an undefined behavior (which eventually
  | > lead to a heap-buffer-overflow, see [Buglink]):
  | >
  | >$ echo 'writel 0x8000f00 0xff4affb0' | qemu-system-aarch64 -M 
virt,accel=qtest -qtest stdio
  | >[I 1612088147.116987] OPENED
  | >  [R +0.278293] writel 0x8000f00 0xff4affb0
  | >  ../hw/intc/arm_gic.c:1498:13: runtime error: index 944 out of bounds for 
type 'uint8_t [16][8]'
  | >  SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior 
../hw/intc/arm_gic.c:1498:13
  | >
  | > Cc: qemu-sta...@nongnu.org
  | > Fixes: 9ee6e8bb853 ("ARMv7 support.")
  | > Buglink: https://bugs.launchpad.net/qemu/+bug/1913916
  | > Buglink: https://bugs.launchpad.net/qemu/+bug/1913917
  ...

  On 210202 1221, Peter Maydell wrote:
  > In both cases the overrun is on the first writel to 0x8000f00,
  > but the fuzzer has for some reason not reported that but instead
  > blundered on until it happens to trigger some other issue that
  > resulted from the memory corruption it induced with the first write.
  >
  ...
  > On the CVE:
  >
  > Since this can affect systems using KVM, this is a security bug for
  > us. However, it only affects an uncommon configuration:
  > you are only vulnerable if you are using "kernel-irqchip=off"
  > (the default is 'on', and turning it off is an odd thing to do).
  >
  > thanks
  > -- PMM
  >

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1914353/+subscriptions

Re: [PATCH] cpu-throttle: Remove timer_mod() from cpu_throttle_set()

2021-02-02 Thread Utkarsh Tripathi

PING

From: Utkarsh Tripathi 
Date: Thursday, 31 December 2020 at 6:43 PM
To: qemu-devel@nongnu.org 
Cc: Utkarsh Tripathi , Paolo Bonzini 

Subject: [PATCH] cpu-throttle: Remove timer_mod() from cpu_throttle_set()
During migrations, after each iteration, cpu_throttle_set() is called,
which irrespective of input, re-arms the timer according to value of
new_throttle_pct. This causes cpu_throttle_thread() to be delayed in
getting scheduled and consqeuntly lets guest run for more time than what
the throttle value should allow. This leads to spikes in guest throughput
at high cpu-throttle percentage whenever cpu_throttle_set() is called.

A solution would be not to modify the timer immediately in
cpu_throttle_set(), instead, only modify throttle_percentage so that the
throttle would automatically adjust to the required percentage when
cpu_throttle_timer_tick() is invoked.

Manually tested the patch using following configuration:

Guest:
Centos7 (3.10.0-123.el7.x86_64)
Total Memory - 64GB , CPUs - 16
Tool used - stress (1.0.4)
Workload - stress --vm 32 --vm-bytes 1G --vm-keep

Migration Parameters:
Network Bandwidth - 500MBPS
cpu-throttle-initial - 99

Results:
With timer_mod(): fails to converge, continues indefinitely
Without timer_mod(): converges in 249 sec

Signed-off-by: Utkarsh Tripathi 
---
 softmmu/cpu-throttle.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/softmmu/cpu-throttle.c b/softmmu/cpu-throttle.c
index 2ec4b8e..8c2144a 100644
--- a/softmmu/cpu-throttle.c
+++ b/softmmu/cpu-throttle.c
@@ -90,14 +90,21 @@ static void cpu_throttle_timer_tick(void *opaque)

 void cpu_throttle_set(int new_throttle_pct)
 {
+/*
+ * boolean to store whether throttle is already active or not,
+ * before modifying throttle_percentage
+ */
+bool throttle_active = cpu_throttle_active();
+
 /* Ensure throttle percentage is within valid range */
 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);

 qatomic_set(&throttle_percentage, new_throttle_pct);

-timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
-   CPU_THROTTLE_TIMESLICE_NS);
+if (!throttle_active) {
+cpu_throttle_timer_tick(NULL);
+}
 }

 void cpu_throttle_stop(void)
--
1.8.3.1

[PATCH v4 4/4] target/arm: Set ID_PFR0.DIT to 1 for "max" 32-bit CPU

Enable FEAT_DIT for the "max" 32-bit CPU.

Signed-off-by: Rebecca Cran 
Reviewed-by: Richard Henderson 
---
 target/arm/cpu.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 40142ac141e5..c98f44624423 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2197,6 +2197,10 @@ static void arm_max_initfn(Object *obj)
 t = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */
 t = FIELD_DP32(t, ID_MMFR4, XNX, 1); /* TTS2UXN */
 cpu->isar.id_mmfr4 = t;
+
+t = cpu->isar.id_pfr0;
+t = FIELD_DP32(t, ID_PFR0, DIT, 1);
+cpu->isar.id_pfr0 = t;
 }
 #endif
 }
-- 
2.26.2

[PATCH v4 3/4] target/arm: Set ID_AA64PFR0.DIT and ID_PFR0.DIT to 1 for "max" AA64 CPU

Enable FEAT_DIT for the "max" AARCH64 CPU.

Signed-off-by: Rebecca Cran 
Reviewed-by: Richard Henderson 
---
 target/arm/cpu64.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 5e851028c592..9a5cfd4fc632 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -666,6 +666,7 @@ static void aarch64_max_initfn(Object *obj)
 t = FIELD_DP64(t, ID_AA64PFR0, FP, 1);
 t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1);
 t = FIELD_DP64(t, ID_AA64PFR0, SEL2, 1);
+t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1);
 cpu->isar.id_aa64pfr0 = t;
 
 t = cpu->isar.id_aa64pfr1;
@@ -715,6 +716,10 @@ static void aarch64_max_initfn(Object *obj)
 u = FIELD_DP32(u, ID_ISAR6, SPECRES, 1);
 cpu->isar.id_isar6 = u;
 
+u = cpu->isar.id_pfr0;
+u = FIELD_DP32(u, ID_PFR0, DIT, 1);
+cpu->isar.id_pfr0 = u;
+
 u = cpu->isar.id_mmfr3;
 u = FIELD_DP32(u, ID_MMFR3, PAN, 2); /* ATS1E1 */
 cpu->isar.id_mmfr3 = u;
-- 
2.26.2

[PATCH v4 2/4] target/arm: Support AA32 DIT by moving PSTATE_SS from cpsr into env->pstate

cpsr has been treated as being the same as spsr, but it isn't.
Since PSTATE_SS isn't in cpsr, remove it and move it into env->pstate.

This allows us to add support for CPSR_DIT, adding helper functions
to merge SPSR_ELx to and from CPSR.

Signed-off-by: Rebecca Cran 
---
 target/arm/helper-a64.c | 32 +---
 target/arm/helper.c | 27 -
 target/arm/op_helper.c  |  9 +-
 3 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index c426c23d2c4e..be5d3f6e75cb 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -945,11 +945,31 @@ static int el_from_spsr(uint32_t spsr)
 }
 }
 
+static void cpsr_write_from_spsr_elx(CPUARMState *env,
+ uint32_t val)
+{
+uint32_t mask;
+
+/* Save SPSR_ELx.SS into PSTATE. */
+env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS);
+val &= ~PSTATE_SS;
+
+/* Move DIT to the correct location for CPSR */
+if (val & PSTATE_DIT) {
+val &= ~PSTATE_DIT;
+val |= CPSR_DIT;
+}
+
+mask = aarch32_cpsr_valid_mask(env->features, \
+&env_archcpu(env)->isar);
+cpsr_write(env, val, mask, CPSRWriteRaw);
+}
+
 void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
 {
 int cur_el = arm_current_el(env);
 unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
-uint32_t mask, spsr = env->banked_spsr[spsr_idx];
+uint32_t spsr = env->banked_spsr[spsr_idx];
 int new_el;
 bool return_to_aa64 = (spsr & PSTATE_nRW) == 0;
 
@@ -998,11 +1018,13 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t 
new_pc)
  * will sort the register banks out for us, and we've already
  * caught all the bad-mode cases in el_from_spsr().
  */
-mask = aarch32_cpsr_valid_mask(env->features, &env_archcpu(env)->isar);
-cpsr_write(env, spsr, mask, CPSRWriteRaw);
+cpsr_write_from_spsr_elx(env, spsr);
 if (!arm_singlestep_active(env)) {
-env->uncached_cpsr &= ~PSTATE_SS;
+env->pstate &= ~PSTATE_SS;
+} else {
+env->pstate |= PSTATE_SS;
 }
+
 aarch64_sync_64_to_32(env);
 
 if (spsr & CPSR_T) {
@@ -1022,6 +1044,8 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t 
new_pc)
 pstate_write(env, spsr);
 if (!arm_singlestep_active(env)) {
 env->pstate &= ~PSTATE_SS;
+} else {
+env->pstate |= PSTATE_SS;
 }
 aarch64_restore_sp(env, new_el);
 helper_rebuild_hflags_a64(env, new_el);
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0aad6d79dcb1..a31f37e2a257 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -9420,6 +9420,21 @@ void aarch64_sync_64_to_32(CPUARMState *env)
 env->regs[15] = env->pc;
 }
 
+static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env)
+{
+uint32_t ret = cpsr_read(env);
+
+/* Move DIT to the correct location for SPSR_ELx */
+if (ret & CPSR_DIT) {
+ret &= ~CPSR_DIT;
+ret |= PSTATE_DIT;
+}
+/* Merge PSTATE.SS into SPSR_ELx */
+ret |= env->pstate & PSTATE_SS;
+
+return ret;
+}
+
 static void take_aarch32_exception(CPUARMState *env, int new_mode,
uint32_t mask, uint32_t offset,
uint32_t newpc)
@@ -9433,8 +9448,9 @@ static void take_aarch32_exception(CPUARMState *env, int 
new_mode,
  * For exceptions taken to AArch32 we must clear the SS bit in both
  * PSTATE and in the old-state value we save to SPSR_, so zero it 
now.
  */
-env->uncached_cpsr &= ~PSTATE_SS;
-env->spsr = cpsr_read(env);
+env->pstate &= ~PSTATE_SS;
+env->spsr = cpsr_read_for_spsr_elx(env);
+
 /* Clear IT bits.  */
 env->condexec_bits = 0;
 /* Switch to the new mode, and to the correct instruction set.  */
@@ -9911,7 +9927,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
 aarch64_save_sp(env, arm_current_el(env));
 env->elr_el[new_el] = env->pc;
 } else {
-old_mode = cpsr_read(env);
+old_mode = cpsr_read_for_spsr_elx(env);
 env->elr_el[new_el] = env->regs[15];
 
 aarch64_sync_32_to_64(env);
@@ -13201,7 +13217,6 @@ void cpu_get_tb_cpu_state(CPUARMState *env, 
target_ulong *pc,
   target_ulong *cs_base, uint32_t *pflags)
 {
 uint32_t flags = env->hflags;
-uint32_t pstate_for_ss;
 
 *cs_base = 0;
 assert_hflags_rebuild_correctly(env);
@@ -13211,7 +13226,6 @@ void cpu_get_tb_cpu_state(CPUARMState *env, 
target_ulong *pc,
 if (cpu_isar_feature(aa64_bti, env_archcpu(env))) {
 flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
 }
-pstate_for_ss = env->pstate;
 } else {
 *pc = env->regs[15];
 
@@ -13259,7 +13273,6 @@ void cpu_get_tb_cpu_state(CPUARMState *env, 
target_ulong *pc

[PATCH v4 1/4] target/arm: Add support for FEAT_DIT, Data Independent Timing

Add support for FEAT_DIT. DIT (Data Independent Timing) is a required
feature for ARMv8.4. Since virtual machine execution is largely
nondeterministic and TCG is outside of the security domain, it's
implemented as a NOP.

Signed-off-by: Rebecca Cran 
Reviewed-by: Richard Henderson 
---
 target/arm/cpu.h   | 12 +++
 target/arm/helper.c| 22 
 target/arm/internals.h |  6 ++
 target/arm/translate-a64.c | 12 +++
 4 files changed, 52 insertions(+)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index d080239863c0..2e5853928474 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1243,6 +1243,7 @@ void pmu_init(ARMCPU *cpu);
 #define CPSR_IT_2_7 (0xfc00U)
 #define CPSR_GE (0xfU << 16)
 #define CPSR_IL (1U << 20)
+#define CPSR_DIT (1U << 21)
 #define CPSR_PAN (1U << 22)
 #define CPSR_J (1U << 24)
 #define CPSR_IT_0_1 (3U << 25)
@@ -1310,6 +1311,7 @@ void pmu_init(ARMCPU *cpu);
 #define PSTATE_SS (1U << 21)
 #define PSTATE_PAN (1U << 22)
 #define PSTATE_UAO (1U << 23)
+#define PSTATE_DIT (1U << 24)
 #define PSTATE_TCO (1U << 25)
 #define PSTATE_V (1U << 28)
 #define PSTATE_C (1U << 29)
@@ -3876,6 +3878,11 @@ static inline bool isar_feature_aa32_tts2uxn(const 
ARMISARegisters *id)
 return FIELD_EX32(id->id_mmfr4, ID_MMFR4, XNX) != 0;
 }
 
+static inline bool isar_feature_aa32_dit(const ARMISARegisters *id)
+{
+return FIELD_EX32(id->id_pfr0, ID_PFR0, DIT) != 0;
+}
+
 /*
  * 64-bit feature tests via id registers.
  */
@@ -4120,6 +4127,11 @@ static inline bool isar_feature_aa64_tts2uxn(const 
ARMISARegisters *id)
 return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, XNX) != 0;
 }
 
+static inline bool isar_feature_aa64_dit(const ARMISARegisters *id)
+{
+return FIELD_EX64(id->id_aa64pfr0, ID_AA64PFR0, DIT) != 0;
+}
+
 /*
  * Feature tests for "does this exist in either 32-bit or 64-bit?"
  */
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 47e266d7e64f..0aad6d79dcb1 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4419,6 +4419,24 @@ static const ARMCPRegInfo uao_reginfo = {
 .readfn = aa64_uao_read, .writefn = aa64_uao_write
 };
 
+static uint64_t aa64_dit_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+return env->pstate & PSTATE_DIT;
+}
+
+static void aa64_dit_write(CPUARMState *env, const ARMCPRegInfo *ri,
+   uint64_t value)
+{
+env->pstate = (env->pstate & ~PSTATE_DIT) | (value & PSTATE_DIT);
+}
+
+static const ARMCPRegInfo dit_reginfo = {
+.name = "DIT", .state = ARM_CP_STATE_AA64,
+.opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 5,
+.type = ARM_CP_NO_RAW, .access = PL0_RW,
+.readfn = aa64_dit_read, .writefn = aa64_dit_write
+};
+
 static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env,
   const ARMCPRegInfo *ri,
   bool isread)
@@ -8212,6 +8230,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 define_one_arm_cp_reg(cpu, &uao_reginfo);
 }
 
+if (cpu_isar_feature(aa64_dit, cpu)) {
+define_one_arm_cp_reg(cpu, &dit_reginfo);
+}
+
 if (arm_feature(env, ARM_FEATURE_EL2) && cpu_isar_feature(aa64_vh, cpu)) {
 define_arm_cp_regs(cpu, vhe_reginfo);
 }
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 853fa88fd616..3d11e42d8e1b 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1222,6 +1222,9 @@ static inline uint32_t aarch32_cpsr_valid_mask(uint64_t 
features,
 if (isar_feature_aa32_pan(id)) {
 valid |= CPSR_PAN;
 }
+if (isar_feature_aa32_dit(id)) {
+valid |= CPSR_DIT;
+}
 
 return valid;
 }
@@ -1240,6 +1243,9 @@ static inline uint32_t aarch64_pstate_valid_mask(const 
ARMISARegisters *id)
 if (isar_feature_aa64_uao(id)) {
 valid |= PSTATE_UAO;
 }
+if (isar_feature_aa64_dit(id)) {
+valid |= PSTATE_DIT;
+}
 if (isar_feature_aa64_mte(id)) {
 valid |= PSTATE_TCO;
 }
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index ffc060e5d70c..1c4b8d02f3b8 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1700,6 +1700,18 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,
 tcg_temp_free_i32(t1);
 break;
 
+case 0x1a: /* DIT */
+if (!dc_isar_feature(aa64_dit, s)) {
+goto do_unallocated;
+}
+if (crm & 1) {
+set_pstate_bits(PSTATE_DIT);
+} else {
+clear_pstate_bits(PSTATE_DIT);
+}
+/* There's no need to rebuild hflags because DIT is a nop */
+break;
+
 case 0x1e: /* DAIFSet */
 t1 = tcg_const_i32(crm);
 gen_helper_msr_i_daifset(cpu_env, t1);
-- 
2.26.2

[PATCH v4 0/4] target/arm: Add support for FEAT_DIT, Data Independent Timing

Add support for FEAT_DIT. DIT (Data Independent Timing) is a required
feature for ARMv8.4.

Changes from v3 to v4:

o Fixed AA32 DIT/PSTATE_SS patch following review feedback.

Rebecca Cran (4):
  target/arm: Add support for FEAT_DIT, Data Independent Timing
  target/arm: Support AA32 DIT by moving PSTATE_SS from cpsr into
env->pstate
  target/arm: Set ID_AA64PFR0.DIT and ID_PFR0.DIT to 1 for "max" AA64
CPU
  target/arm: Set ID_PFR0.DIT to 1 for "max" 32-bit CPU

 target/arm/cpu.c   |  4 ++
 target/arm/cpu.h   | 12 +
 target/arm/cpu64.c |  5 ++
 target/arm/helper-a64.c| 32 +++--
 target/arm/helper.c| 49 +---
 target/arm/internals.h |  6 +++
 target/arm/op_helper.c |  9 +---
 target/arm/translate-a64.c | 12 +
 8 files changed, 110 insertions(+), 19 deletions(-)

-- 
2.26.2

Re: [PATCH v8 12/13] confidential guest support: Alter virtio default properties for protected guests

2021-02-02 Thread David Gibson

On Tue, Feb 02, 2021 at 06:06:34PM -0500, Michael S. Tsirkin wrote:
> On Tue, Feb 02, 2021 at 03:13:14PM +1100, David Gibson wrote:
> > The default behaviour for virtio devices is not to use the platforms normal
> > DMA paths, but instead to use the fact that it's running in a hypervisor
> > to directly access guest memory.  That doesn't work if the guest's memory
> > is protected from hypervisor access, such as with AMD's SEV or POWER's PEF.
> > 
> > So, if a confidential guest mechanism is enabled, then apply the
> > iommu_platform=on option so it will go through normal DMA mechanisms.
> > Those will presumably have some way of marking memory as shared with
> > the hypervisor or hardware so that DMA will work.
> > 
> > Signed-off-by: David Gibson 
> > Reviewed-by: Dr. David Alan Gilbert 
> > Reviewed-by: Cornelia Huck 
> > Reviewed-by: Greg Kurz 
> 
> 
> > ---
> >  hw/core/machine.c | 13 +
> >  1 file changed, 13 insertions(+)
> > 
> > diff --git a/hw/core/machine.c b/hw/core/machine.c
> > index 94194ab82d..497949899b 100644
> > --- a/hw/core/machine.c
> > +++ b/hw/core/machine.c
> > @@ -33,6 +33,8 @@
> >  #include "migration/global_state.h"
> >  #include "migration/vmstate.h"
> >  #include "exec/confidential-guest-support.h"
> > +#include "hw/virtio/virtio.h"
> > +#include "hw/virtio/virtio-pci.h"
> >  
> >  GlobalProperty hw_compat_5_2[] = {};
> >  const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
> > @@ -1196,6 +1198,17 @@ void machine_run_board_init(MachineState *machine)
> >   * areas.
> >   */
> >  machine_set_mem_merge(OBJECT(machine), false, &error_abort);
> > +
> > +/*
> > + * Virtio devices can't count on directly accessing guest
> > + * memory, so they need iommu_platform=on to use normal DMA
> > + * mechanisms.  That requires also disabling legacy virtio
> > + * support for those virtio pci devices which allow it.
> > + */
> > +object_register_sugar_prop(TYPE_VIRTIO_PCI, "disable-legacy",
> > +   "on", true);
> > +object_register_sugar_prop(TYPE_VIRTIO_DEVICE, "iommu_platform",
> > +   "on", false);
> 
> So overriding a boolean property always poses a problem:
> if user does set iommu_platform=off we are ignoring this
> silently.

No, we don't.  That's why this is register_sugar_prop() rather than an
outright set_prop().  An explicitly given option will take precedence.

> Can we change iommu_platform to on/off/auto? Then we can
> change how does auto behave.

I've never had a satisfactory explanation of what the semantics of
"auto" need to be.

> 
> Bonus points for adding "access_platform" and making it
> a synonym of platform_iommu.
> 
> >  }
> >  
> >  machine_class->init(machine);
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[Bug 1906156] Re: Host OS Reboot Required, for Guest kext to Load (Fully)

2021-02-02 Thread Launchpad Bug Tracker

[Expired for QEMU because there has been no activity for 60 days.]

** Changed in: qemu
   Status: Incomplete => Expired

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1906156

Title:
  Host OS Reboot Required, for Guest kext to Load (Fully)

Status in QEMU:
  Expired

Bug description:
  Hi,

  Finding this one a bit odd, but I am loading a driver (kext) in a
  macOS guest ... and it works, on the first VM (domain) startup after a
  full / clean host OS boot (or reboot). However, if I even reboot the
  guest OS, then the driver load fails => can be "corrected" by a full
  host OS reboot (which seems very extreme).

  Is this a known issue, and/or is there a workaround?

  FYI, running,
  QEMU emulator version 5.0.0 (Debian 1:5.0-5ubuntu9.1)
  Copyright (c) 2003-2020 Fabrice Bellard and the QEMU Project developers

  This is for a macOS guest, on a Linux host.

  Thanks!

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1906156/+subscriptions

ARM Snapshots Not Backwards-Compatible

2021-02-02 Thread Aaron Lindsay

Hello,

I'm attempting to restore an AArch64 snapshot taken on QEMU 4.1.0 on
QEMU 5.2.0, using system mode. My previous impression, possibly from
https://wiki.qemu.org/Features/Migration/Troubleshooting#Basics was that
this ought to work:

> Note that QEMU supports migrating forward between QEMU versions

Note that I'm using qemu-system-aarch64 with -loadvm.

However, I've run into several issues I thought I should report. The
first of them was that this commit changed the address of CBAR, which
resulted in a mismatch of the register IDs in `cpu_post_load` in
target/arm/machine.c:
https://patchwork.kernel.org/project/qemu-devel/patch/20190927144249.2-2-peter.mayd...@linaro.org/

The second was that several system registers have changed which bits are
allowed to be written in different circumstances, seemingly as a result
of a combination of bugfixes and implementation of additional behavior.
These hit errors detected in `write_list_to_cpustate` in
target/arm/helper.c.

The third is that meanings of the bits in env->features (as defined by
`enum arm_features` in target/arm/cpu.h) has shifted. For example,
ARM_FEATURE_PXN, ARM_FEATURE_CRC, ARM_FEATURE_VFP, ARM_FEATURE_VFP3,
ARM_FEATURE_VFP4 have all been removed and ARM_FEATURE_V8_1M has been
added since 4.1.0. Heck, even I have added a field there in the past.
Unfortunately, these additions/removals mean that when env->features is
saved on one version and restored on another the bits can mean different
things. Notably, the removal of the *VFP features means that a snapshot
of a CPU reporting it supports ARM_FEATURE_VFP3 on 4.1.0 thinks it's now
ARM_FEATURE_M on 5.2.0!

My guess, given the numerous issues and the additional complexity
required to properly implement backwards-compatible snapshotting, is
that this is not a primary goal. However, if it is a goal, what steps
can/should we take to support it more thoroughly?

Thanks!

-Aaron

p.s. Now for an admission: the snapshots I'm testing with were
originally taken with `-cpu max`. This was unintentional, and I
understand if the response is that I can't expect `-cpu max` checkpoints
to work across QEMU versions... but I also don't think that all of these
issues can be blamed on that (notably CBAR and env->features).

Re: [PATCH v4 01/23] tcg: Introduce target-specific page data for user-only

On 2/2/21 4:29 AM, Peter Maydell wrote:
> On Thu, 28 Jan 2021 at 22:41, Richard Henderson
>  wrote:
>>
>> This data can be allocated by page_alloc_target_data() and
>> released by page_set_flags(start, end, prot | PAGE_RESET).
>>
>> This data will be used to hold tag memory for AArch64 MTE.
>>
>> Signed-off-by: Richard Henderson 
>> ---
>> v3: Add doc comments; tweak alloc so that the !PAGE_VALID case is clear.
>> ---
>>  include/exec/cpu-all.h| 42 +--
>>  accel/tcg/translate-all.c | 28 ++
>>  linux-user/mmap.c |  4 +++-
>>  linux-user/syscall.c  |  4 ++--
>>  4 files changed, 69 insertions(+), 9 deletions(-)
> 
> I reviewed this (and some of the other patches) in v3, but
> you didn't pick up the tags :-(
> 
> Here it is again:
> Reviewed-by: Peter Maydell 

Well, here's the thing: this appears to be v3, reposted.

All of the work I did for v4 has gone missing.  I went to go fix the single use
of current_cpu, and it wasn't where I expected it to be, and that's when I
noticed.  I'm grepping blobs now, but I must have made some horrible git error.
 :-(


r~

[PATCH] blockjob: Fix crash with IOthread when block commit after snapshot

2021-02-02 Thread 08005325

From: Michael Qiu 

v5: reformat the commit log with backtrace of main thread
Add a boolean variable to make main thread could re-acquire
aio_context on success path.

v4: rebase to latest code

v3: reformat the commit log, remove duplicate content

Currently, if guest has workloads, IO thread will acquire aio_context
lock before do io_submit, it leads to segmentfault when do block commit
after snapshot. Just like below:

Program received signal SIGSEGV, Segmentation fault.

[Switching to Thread 0x7f7c7d91f700 (LWP 99907)]
0x5576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437
1437../block/mirror.c: No such file or directory.
(gdb) p s->job
$17 = (MirrorBlockJob *) 0x0
(gdb) p s->stop
$18 = false

Call trace of IO thread:
0  0x5576d0f65aab in bdrv_mirror_top_pwritev at ../block/mirror.c:1437
1  0x5576d0f7f3ab in bdrv_driver_pwritev at ../block/io.c:1174
2  0x5576d0f8139d in bdrv_aligned_pwritev at ../block/io.c:1988
3  0x5576d0f81b65 in bdrv_co_pwritev_part at ../block/io.c:2156
4  0x5576d0f8e6b7 in blk_do_pwritev_part at ../block/block-backend.c:1260
5  0x5576d0f8e84d in blk_aio_write_entry at ../block/block-backend.c:1476
...

Switch to qemu main thread:
0  0x7f903be704ed in __lll_lock_wait at
/lib/../lib64/libpthread.so.0
1  0x7f903be6bde6 in _L_lock_941 at /lib/../lib64/libpthread.so.0
2  0x7f903be6bcdf in pthread_mutex_lock at
/lib/../lib64/libpthread.so.0
3  0x564b21456889 in qemu_mutex_lock_impl at
../util/qemu-thread-posix.c:79
4  0x564b213af8a5 in block_job_add_bdrv at ../blockjob.c:224
5  0x564b213b00ad in block_job_create at ../blockjob.c:440
6  0x564b21357c0a in mirror_start_job at ../block/mirror.c:1622
7  0x564b2135a9af in commit_active_start at ../block/mirror.c:1867
8  0x564b2133d132 in qmp_block_commit at ../blockdev.c:2768
9  0x564b2141fef3 in qmp_marshal_block_commit at
qapi/qapi-commands-block-core.c:346
10 0x564b214503c9 in do_qmp_dispatch_bh at
../qapi/qmp-dispatch.c:110
11 0x564b21451996 in aio_bh_poll at ../util/async.c:164
12 0x564b2146018e in aio_dispatch at ../util/aio-posix.c:381
13 0x564b2145187e in aio_ctx_dispatch at ../util/async.c:306
14 0x7f9040239049 in g_main_context_dispatch at
/lib/../lib64/libglib-2.0.so.0
15 0x564b21447368 in main_loop_wait at ../util/main-loop.c:232
16 0x564b21447368 in main_loop_wait at ../util/main-loop.c:255
17 0x564b21447368 in main_loop_wait at ../util/main-loop.c:531
18 0x564b212304e1 in qemu_main_loop at ../softmmu/runstate.c:721
19 0x564b20f7975e in main at ../softmmu/main.c:50

In IO thread when do bdrv_mirror_top_pwritev, the job is NULL, and stop field
is false, this means the MirrorBDSOpaque "s" object has not been initialized
yet, and this object is initialized by block_job_create(), but the initialize
process is stuck in acquiring the lock.

In this situation, IO thread come to bdrv_mirror_top_pwritev(),which means that
mirror-top node is already inserted into block graph, but its bs->opaque->job
is not initialized.

The root cause is that qemu main thread do release/acquire when hold the lock,
at the same time, IO thread get the lock after release stage, and the crash
occured.

Actually, in this situation, job->job.aio_context will not equal to
qemu_get_aio_context(), and will be the same as bs->aio_context,
thus, no need to release the lock, becasue bdrv_root_attach_child()
will not change the context.

This patch fix this issue.

Fixes: 132ada80 "block: Adjust AioContexts when attaching nodes"

Signed-off-by: Michael Qiu 
---
 blockjob.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index db3a21699c..d9dca36f65 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -212,15 +212,21 @@ int block_job_add_bdrv(BlockJob *job, const char *name, 
BlockDriverState *bs,
uint64_t perm, uint64_t shared_perm, Error **errp)
 {
 BdrvChild *c;
+bool need_context_ops;
 
 bdrv_ref(bs);
-if (job->job.aio_context != qemu_get_aio_context()) {
+
+need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context;
+
+if (need_context_ops &&
+job->job.aio_context != qemu_get_aio_context()) {
 aio_context_release(job->job.aio_context);
 }
 c = bdrv_root_attach_child(bs, name, &child_job, 0,
job->job.aio_context, perm, shared_perm, job,
errp);
-if (job->job.aio_context != qemu_get_aio_context()) {
+if (need_context_ops &&
+job->job.aio_context != qemu_get_aio_context()) {
 aio_context_acquire(job->job.aio_context);
 }
 if (c == NULL) {
-- 
2.22.0

Re: [PULL 00/24] tcg patch queue

2021-02-02 Thread no-reply

Patchew URL: 
https://patchew.org/QEMU/20210203021550.375058-1-richard.hender...@linaro.org/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20210203021550.375058-1-richard.hender...@linaro.org
Subject: [PULL 00/24] tcg patch queue

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 - [tag update]  patchew/20210201080348.438095-1-f4...@amsat.org -> 
patchew/20210201080348.438095-1-f4...@amsat.org
 * [new tag] 
patchew/20210203021550.375058-1-richard.hender...@linaro.org -> 
patchew/20210203021550.375058-1-richard.hender...@linaro.org
Switched to a new branch 'test'
7f7ed81 tcg: Remove TCG_TARGET_CON_SET_H
bb97c8f tcg/tci: Split out constraint sets to tcg-target-con-set.h
902c769 tcg/sparc: Split out constraint sets to tcg-target-con-set.h
b8c72bc tcg/s390: Split out constraint sets to tcg-target-con-set.h
9ee5700 tcg/riscv: Split out constraint sets to tcg-target-con-set.h
1740539 tcg/ppc: Split out constraint sets to tcg-target-con-set.h
ab51584 tcg/mips: Split out constraint sets to tcg-target-con-set.h
1d7748d tcg/arm: Split out constraint sets to tcg-target-con-set.h
6341650 tcg/aarch64: Split out constraint sets to tcg-target-con-set.h
169b93c tcg/i386: Split out constraint sets to tcg-target-con-set.h
6151c6c tcg: Remove TCG_TARGET_CON_STR_H
5378f1b tcg/sparc: Split out target constraints to tcg-target-con-str.h
0ac8068 tcg/s390: Split out target constraints to tcg-target-con-str.h
a225412 tcg/riscv: Split out target constraints to tcg-target-con-str.h
80edabe tcg/mips: Split out target constraints to tcg-target-con-str.h
0c5de83 tcg/tci: Split out target constraints to tcg-target-con-str.h
ff92f04 tcg/ppc: Split out target constraints to tcg-target-con-str.h
a625743 tcg/aarch64: Split out target constraints to tcg-target-con-str.h
ca53a0a tcg/arm: Split out target constraints to tcg-target-con-str.h
1163432 tcg/i386: Split out target constraints to tcg-target-con-str.h
5b0a72d tcg/i386: Tidy register constraint definitions
d4aa12b tcg/i386: Move constraint type check to tcg_target_const_match
1335fa6 tcg/tci: Remove TCG_TARGET_HAS_* ifdefs
17e08f6 tcg/tci: Drop L and S constraints

=== OUTPUT BEGIN ===
1/24 Checking commit 17e08f6cd41e (tcg/tci: Drop L and S constraints)
2/24 Checking commit 1335fa6eef88 (tcg/tci: Remove TCG_TARGET_HAS_* ifdefs)
3/24 Checking commit d4aa12bb1bca (tcg/i386: Move constraint type check to 
tcg_target_const_match)
4/24 Checking commit 5b0a72d242f6 (tcg/i386: Tidy register constraint 
definitions)
5/24 Checking commit 1163432e5dee (tcg/i386: Split out target constraints to 
tcg-target-con-str.h)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#23: 
new file mode 100644

ERROR: Macros with multiple statements should be enclosed in a do - while loop
#192: FILE: tcg/tcg.c:2471:
+#define CONST(CASE, MASK) \
+case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;

ERROR: trailing statements should be on next line
#193: FILE: tcg/tcg.c:2472:
+case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;

ERROR: Macros with multiple statements should be enclosed in a do - while loop
#194: FILE: tcg/tcg.c:2473:
+#define REGS(CASE, MASK) \
+case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;

ERROR: trailing statements should be on next line
#195: FILE: tcg/tcg.c:2474:
+case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;

total: 4 errors, 1 warnings, 175 lines checked

Patch 5/24 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

6/24 Checking commit ca53a0a97914 (tcg/arm: Split out target constraints to 
tcg-target-con-str.h)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#17: 
new file mode 100644

total: 0 errors, 1 warnings, 111 lines checked

Patch 6/24 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
7/24 Checking commit a62574360c28 (tcg/aarch64: Split out target constraints to 
tcg-target-con-str.h)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#17: 
new file mode 100644

total: 0 errors, 1 warnings, 89 lines checked

Patch 7/24 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
8/24 Checking commit ff92f04f845a (tcg/ppc: Split out target constraints to 
tcg-target-con-str.h)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#17: 
new file mode 100644

total: 0 errors, 1 warnings, 121

[PULL 23/24] tcg/tci: Split out constraint sets to tcg-target-con-set.h

This requires finishing the conversion to tcg_target_op_def.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/tci/tcg-target-con-set.h |  25 
 tcg/tci/tcg-target.h |   2 +
 tcg/tci/tcg-target.c.inc | 279 +--
 3 files changed, 161 insertions(+), 145 deletions(-)
 create mode 100644 tcg/tci/tcg-target-con-set.h

diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h
new file mode 100644
index 00..38e82f7535
--- /dev/null
+++ b/tcg/tci/tcg-target-con-set.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * TCI target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I2(r, r)
+C_O0_I2(r, ri)
+C_O0_I3(r, r, r)
+C_O0_I4(r, r, ri, ri)
+C_O0_I4(r, r, r, r)
+C_O1_I1(r, r)
+C_O1_I2(r, 0, r)
+C_O1_I2(r, ri, ri)
+C_O1_I2(r, r, r)
+C_O1_I2(r, r, ri)
+C_O1_I4(r, r, r, ri, ri)
+C_O2_I1(r, r, r)
+C_O2_I2(r, r, r, r)
+C_O2_I4(r, r, r, r, r, r)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index bb784e018e..1efd8c4fb0 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -207,4 +207,6 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 /* no need to flush icache explicitly */
 }
 
+#define TCG_TARGET_CON_SET_H
+
 #endif /* TCG_TARGET_H */
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 493bbf1e39..f0f6b13112 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -37,154 +37,143 @@
 /* Bitfield n...m (in 32 bit value). */
 #define BITS(n, m) (((0xU << (31 - n)) >> (31 - n + m)) << m)
 
-/* Macros used in tcg_target_op_defs. */
-#define R   "r"
-#define RI  "ri"
-#if TCG_TARGET_REG_BITS == 32
-# define R64"r", "r"
-#else
-# define R64"r"
-#endif
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-# define L  "r", "r"
-# define S  "r", "r"
-#else
-# define L  "r"
-# define S  "r"
-#endif
-
-/* TODO: documentation. */
-static const TCGTargetOpDef tcg_target_op_defs[] = {
-{ INDEX_op_exit_tb, { NULL } },
-{ INDEX_op_goto_tb, { NULL } },
-{ INDEX_op_br, { NULL } },
-
-{ INDEX_op_ld8u_i32, { R, R } },
-{ INDEX_op_ld8s_i32, { R, R } },
-{ INDEX_op_ld16u_i32, { R, R } },
-{ INDEX_op_ld16s_i32, { R, R } },
-{ INDEX_op_ld_i32, { R, R } },
-{ INDEX_op_st8_i32, { R, R } },
-{ INDEX_op_st16_i32, { R, R } },
-{ INDEX_op_st_i32, { R, R } },
-
-{ INDEX_op_add_i32, { R, RI, RI } },
-{ INDEX_op_sub_i32, { R, RI, RI } },
-{ INDEX_op_mul_i32, { R, RI, RI } },
-{ INDEX_op_div_i32, { R, R, R } },
-{ INDEX_op_divu_i32, { R, R, R } },
-{ INDEX_op_rem_i32, { R, R, R } },
-{ INDEX_op_remu_i32, { R, R, R } },
-/* TODO: Does R, RI, RI result in faster code than R, R, RI?
-   If both operands are constants, we can optimize. */
-{ INDEX_op_and_i32, { R, RI, RI } },
-{ INDEX_op_andc_i32, { R, RI, RI } },
-{ INDEX_op_eqv_i32, { R, RI, RI } },
-{ INDEX_op_nand_i32, { R, RI, RI } },
-{ INDEX_op_nor_i32, { R, RI, RI } },
-{ INDEX_op_or_i32, { R, RI, RI } },
-{ INDEX_op_orc_i32, { R, RI, RI } },
-{ INDEX_op_xor_i32, { R, RI, RI } },
-{ INDEX_op_shl_i32, { R, RI, RI } },
-{ INDEX_op_shr_i32, { R, RI, RI } },
-{ INDEX_op_sar_i32, { R, RI, RI } },
-{ INDEX_op_rotl_i32, { R, RI, RI } },
-{ INDEX_op_rotr_i32, { R, RI, RI } },
-{ INDEX_op_deposit_i32, { R, "0", R } },
-
-{ INDEX_op_brcond_i32, { R, RI } },
-
-{ INDEX_op_setcond_i32, { R, R, RI } },
-{ INDEX_op_setcond_i64, { R, R, RI } },
-
-/* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
-{ INDEX_op_add2_i32, { R, R, R, R, R, R } },
-{ INDEX_op_sub2_i32, { R, R, R, R, R, R } },
-{ INDEX_op_brcond2_i32, { R, R, RI, RI } },
-{ INDEX_op_mulu2_i32, { R, R, R, R } },
-{ INDEX_op_setcond2_i32, { R, R, R, RI, RI } },
-
-{ INDEX_op_not_i32, { R, R } },
-{ INDEX_op_neg_i32, { R, R } },
-
-{ INDEX_op_ld8u_i64, { R, R } },
-{ INDEX_op_ld8s_i64, { R, R } },
-{ INDEX_op_ld16u_i64, { R, R } },
-{ INDEX_op_ld16s_i64, { R, R } },
-{ INDEX_op_ld32u_i64, { R, R } },
-{ INDEX_op_ld32s_i64, { R, R } },
-{ INDEX_op_ld_i64, { R, R } },
-
-{ INDEX_op_st8_i64, { R, R } },
-{ INDEX_op_st16_i64, { R, R } },
-{ INDEX_op_st32_i64, { R, R } },
-{ INDEX_op_st_i64, { R, R } },
-
-{ INDEX_op_add_i64, { R, RI, RI } },
-{ INDEX_op_sub_i64, { R, RI, RI } },
-{ INDEX_op_mul_i64, { R, RI, RI } },
-{ INDEX_op_div_i64, { R, R, R } },
-{ INDEX_op_divu_i64, { R, R, R } },
-{ INDEX_op_rem_i64, { R, R, R } },
-{ INDEX_op_remu_i64, { R, R, R } },
-{ INDEX_op_and_i64, { R, RI, RI } },
-{ INDEX_op_andc_i64, { R, RI, RI } },

[PULL 24/24] tcg: Remove TCG_TARGET_CON_SET_H

All backends have now been converted to tcg-target-con-set.h,
so we can remove the fallback code.

Reviewed-by: Peter Maydell 
Reviewed-by: Alistair Francis 
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.h |  1 -
 tcg/arm/tcg-target.h |  1 -
 tcg/i386/tcg-target.h|  1 -
 tcg/mips/tcg-target.h|  1 -
 tcg/ppc/tcg-target.h |  1 -
 tcg/riscv/tcg-target.h   |  1 -
 tcg/s390/tcg-target.h|  1 -
 tcg/sparc/tcg-target.h   |  1 -
 tcg/tci/tcg-target.h |  2 --
 tcg/tcg.c| 12 
 10 files changed, 22 deletions(-)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 200e9b5e0e..5ec30dba25 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -155,6 +155,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_SET_H
 
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 4d201b1216..8d1fee6327 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -142,6 +142,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 48a6f2a336..b693d3692d 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -235,6 +235,5 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index e520a9d6e3..c2c32fb38f 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -207,6 +207,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
-#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 551f8d0fc9..d1339afc66 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -185,6 +185,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index a998b951e4..727c8df418 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -171,6 +171,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_POOL_LABELS
 
 #define TCG_TARGET_HAS_MEMORY_BSWAP 0
-#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 7aafd25a46..641464eea4 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -159,6 +159,5 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index f50e8d50ee..f66f5d07dc 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -168,6 +168,5 @@ extern bool use_vis3_instructions;
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 1efd8c4fb0..bb784e018e 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -207,6 +207,4 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 /* no need to flush icache explicitly */
 }
 
-#define TCG_TARGET_CON_SET_H
-
 #endif /* TCG_TARGET_H */
diff --git a/tcg/tcg.c b/tcg/tcg.c
index df9f32763e..63a12b197b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -69,9 +69,6 @@
 /* Forward declarations for functions declared in tcg-target.c.inc and
used here. */
 static void tcg_target_init(TCGContext *s);
-#ifndef TCG_TARGET_CON_SET_H
-static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
-#endif
 static void tcg_target_qemu_prologue(TCGContext *s);
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 intptr_t value, intptr_t addend);
@@ -349,7 +346,6 @@ static void set_jmp_reset_offset(TCGContext *s, int which)
 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 }
 
-#ifdef TCG_TARGET_CON_SET_H
 #define C_PFX1(P, A)P##A
 #define C_PFX2(P, A, B) P##A##_##B
 #define C_PFX3(P, A, B, C)  P##A##_##B##_##C
@@ -453,8 +449,6 @@ static const TCGTargetOpDef constraint_sets[] = {
 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, 
I4)

[PULL 21/24] tcg/s390: Split out constraint sets to tcg-target-con-set.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/s390/tcg-target-con-set.h |  29 
 tcg/s390/tcg-target.h |   1 +
 tcg/s390/tcg-target.c.inc | 121 ++
 3 files changed, 81 insertions(+), 70 deletions(-)
 create mode 100644 tcg/s390/tcg-target-con-set.h

diff --git a/tcg/s390/tcg-target-con-set.h b/tcg/s390/tcg-target-con-set.h
new file mode 100644
index 00..31985e4903
--- /dev/null
+++ b/tcg/s390/tcg-target-con-set.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define S390 target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(L, L)
+C_O0_I2(r, r)
+C_O0_I2(r, ri)
+C_O1_I1(r, L)
+C_O1_I1(r, r)
+C_O1_I2(r, 0, ri)
+C_O1_I2(r, 0, rI)
+C_O1_I2(r, 0, rJ)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, rZ, r)
+C_O1_I4(r, r, ri, r, 0)
+C_O1_I4(r, r, ri, rI, 0)
+C_O2_I2(b, a, 0, r)
+C_O2_I3(b, a, 0, 1, r)
+C_O2_I4(r, r, 0, 1, rA, r)
+C_O2_I4(r, r, 0, 1, ri, r)
+C_O2_I4(r, r, 0, 1, r, r)
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 641464eea4..7aafd25a46 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -159,5 +159,6 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index 3fec7fec5f..b67470137c 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -2274,27 +2274,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 }
 }
 
-static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
-static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
-static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
-static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
-static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
-static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
-static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
-static const TCGTargetOpDef r_0_ri = { .args_ct_str = { "r", "0", "ri" } };
-static const TCGTargetOpDef r_0_rI = { .args_ct_str = { "r", "0", "rI" } };
-static const TCGTargetOpDef r_0_rJ = { .args_ct_str = { "r", "0", "rJ" } };
-static const TCGTargetOpDef a2_r
-= { .args_ct_str = { "r", "r", "0", "1", "r", "r" } };
-static const TCGTargetOpDef a2_ri
-= { .args_ct_str = { "r", "r", "0", "1", "ri", "r" } };
-static const TCGTargetOpDef a2_rA
-= { .args_ct_str = { "r", "r", "0", "1", "rA", "r" } };
-
 switch (op) {
 case INDEX_op_goto_ptr:
-return &r;
+return C_O0_I1(r);
 
 case INDEX_op_ld8u_i32:
 case INDEX_op_ld8u_i64:
@@ -2308,6 +2292,8 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_ld32u_i64:
 case INDEX_op_ld32s_i64:
 case INDEX_op_ld_i64:
+return C_O1_I1(r, r);
+
 case INDEX_op_st8_i32:
 case INDEX_op_st8_i64:
 case INDEX_op_st16_i32:
@@ -2315,11 +2301,22 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
 case INDEX_op_st_i32:
 case INDEX_op_st32_i64:
 case INDEX_op_st_i64:
-return &r_r;
+return C_O0_I2(r, r);
 
 case INDEX_op_add_i32:
 case INDEX_op_add_i64:
-return &r_r_ri;
+case INDEX_op_shl_i64:
+case INDEX_op_shr_i64:
+case INDEX_op_sar_i64:
+case INDEX_op_rotl_i32:
+case INDEX_op_rotl_i64:
+case INDEX_op_rotr_i32:
+case INDEX_op_rotr_i64:
+case INDEX_op_clz_i64:
+case INDEX_op_setcond_i32:
+case INDEX_op_setcond_i64:
+return C_O1_I2(r, r, ri);
+
 case INDEX_op_sub_i32:
 case INDEX_op_sub_i64:
 case INDEX_op_and_i32:
@@ -2328,35 +2325,33 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
 case INDEX_op_or_i64:
 case INDEX_op_xor_i32:
 case INDEX_op_xor_i64:
-return (s390_facilities & FACILITY_DISTINCT_OPS ? &r_r_ri : &r_0_ri);
+return (s390_facilities & FACILITY_DISTINCT_OPS
+? C_O1_I2(r, r, ri)
+: C_O1_I2(r, 0, ri));
 
 case INDEX_op_mul_i32:
 /* If we have the general-instruction-extensions, then we have
MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
-return (s390_facilities & FACILITY_GEN_INST_EXT ? &r_0_ri : &r_0_rI);
+return (s390_facilities & FACILITY_GEN_INST_EXT
+? C_O1_I2(r, 0, ri)
+: C_O1_I2(r, 0, rI));
+

[PULL 20/24] tcg/riscv: Split out constraint sets to tcg-target-con-set.h

Reviewed-by: Peter Maydell 
Reviewed-by: Alistair Francis 
Signed-off-by: Richard Henderson 
---
 tcg/riscv/tcg-target-con-set.h | 30 
 tcg/riscv/tcg-target.h |  1 +
 tcg/riscv/tcg-target.c.inc | 83 ++
 3 files changed, 54 insertions(+), 60 deletions(-)
 create mode 100644 tcg/riscv/tcg-target-con-set.h

diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
new file mode 100644
index 00..cf0ac4d751
--- /dev/null
+++ b/tcg/riscv/tcg-target-con-set.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define RISC-V target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(LZ, L)
+C_O0_I2(rZ, r)
+C_O0_I2(rZ, rZ)
+C_O0_I3(LZ, L, L)
+C_O0_I3(LZ, LZ, L)
+C_O0_I4(LZ, LZ, L, L)
+C_O0_I4(rZ, rZ, rZ, rZ)
+C_O1_I1(r, L)
+C_O1_I1(r, r)
+C_O1_I2(r, L, L)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
+C_O1_I2(r, rZ, rN)
+C_O1_I2(r, rZ, rZ)
+C_O1_I4(r, rZ, rZ, rZ, rZ)
+C_O2_I1(r, r, L)
+C_O2_I2(r, r, L, L)
+C_O2_I4(r, r, rZ, rZ, rM, rM)
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index 727c8df418..a998b951e4 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -171,5 +171,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_POOL_LABELS
 
 #define TCG_TARGET_HAS_MEMORY_BSWAP 0
+#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 20d5b5ef01..e700c52067 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -1543,50 +1543,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 }
 }
 
-static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
-static const TCGTargetOpDef r
-= { .args_ct_str = { "r" } };
-static const TCGTargetOpDef r_r
-= { .args_ct_str = { "r", "r" } };
-static const TCGTargetOpDef rZ_r
-= { .args_ct_str = { "rZ", "r" } };
-static const TCGTargetOpDef rZ_rZ
-= { .args_ct_str = { "rZ", "rZ" } };
-static const TCGTargetOpDef rZ_rZ_rZ_rZ
-= { .args_ct_str = { "rZ", "rZ", "rZ", "rZ" } };
-static const TCGTargetOpDef r_r_ri
-= { .args_ct_str = { "r", "r", "ri" } };
-static const TCGTargetOpDef r_r_rI
-= { .args_ct_str = { "r", "r", "rI" } };
-static const TCGTargetOpDef r_rZ_rN
-= { .args_ct_str = { "r", "rZ", "rN" } };
-static const TCGTargetOpDef r_rZ_rZ
-= { .args_ct_str = { "r", "rZ", "rZ" } };
-static const TCGTargetOpDef r_rZ_rZ_rZ_rZ
-= { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } };
-static const TCGTargetOpDef r_L
-= { .args_ct_str = { "r", "L" } };
-static const TCGTargetOpDef r_r_L
-= { .args_ct_str = { "r", "r", "L" } };
-static const TCGTargetOpDef r_L_L
-= { .args_ct_str = { "r", "L", "L" } };
-static const TCGTargetOpDef r_r_L_L
-= { .args_ct_str = { "r", "r", "L", "L" } };
-static const TCGTargetOpDef LZ_L
-= { .args_ct_str = { "LZ", "L" } };
-static const TCGTargetOpDef LZ_L_L
-= { .args_ct_str = { "LZ", "L", "L" } };
-static const TCGTargetOpDef LZ_LZ_L
-= { .args_ct_str = { "LZ", "LZ", "L" } };
-static const TCGTargetOpDef LZ_LZ_L_L
-= { .args_ct_str = { "LZ", "LZ", "L", "L" } };
-static const TCGTargetOpDef r_r_rZ_rZ_rM_rM
-= { .args_ct_str = { "r", "r", "rZ", "rZ", "rM", "rM" } };
-
 switch (op) {
 case INDEX_op_goto_ptr:
-return &r;
+return C_O0_I1(r);
 
 case INDEX_op_ld8u_i32:
 case INDEX_op_ld8s_i32:
@@ -1618,7 +1579,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_extrl_i64_i32:
 case INDEX_op_extrh_i64_i32:
 case INDEX_op_ext_i32_i64:
-return &r_r;
+return C_O1_I1(r, r);
 
 case INDEX_op_st8_i32:
 case INDEX_op_st16_i32:
@@ -1627,7 +1588,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_st16_i64:
 case INDEX_op_st32_i64:
 case INDEX_op_st_i64:
-return &rZ_r;
+return C_O0_I2(rZ, r);
 
 case INDEX_op_add_i32:
 case INDEX_op_and_i32:
@@ -1637,11 +1598,11 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
 case INDEX_op_and_i64:
 case INDEX_op_or_i64:
 case INDEX_op_xor_i64:
-return &r_r_rI;
+return C_O1_I2(r, r, rI);
 
 case INDEX_op_sub_i32:
 case INDEX_op_sub_i64:
-return &r_rZ_rN;
+return C_O1_I2(r, rZ, rN);
 
 case INDEX_op_mul_i32:
 case INDEX_op_mulsh_i32:
@@ -1659,7 +1620,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)

[PULL 17/24] tcg/arm: Split out constraint sets to tcg-target-con-set.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target-con-set.h | 35 ++
 tcg/arm/tcg-target.h |  1 +
 tcg/arm/tcg-target.c.inc | 94 
 3 files changed, 68 insertions(+), 62 deletions(-)
 create mode 100644 tcg/arm/tcg-target-con-set.h

diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
new file mode 100644
index 00..ab63e089c2
--- /dev/null
+++ b/tcg/arm/tcg-target-con-set.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Arm target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(r, r)
+C_O0_I2(r, rIN)
+C_O0_I2(s, s)
+C_O0_I3(s, s, s)
+C_O0_I4(r, r, rI, rI)
+C_O0_I4(s, s, s, s)
+C_O1_I1(r, l)
+C_O1_I1(r, r)
+C_O1_I2(r, 0, rZ)
+C_O1_I2(r, l, l)
+C_O1_I2(r, r, r)
+C_O1_I2(r, r, rI)
+C_O1_I2(r, r, rIK)
+C_O1_I2(r, r, rIN)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, rZ, rZ)
+C_O1_I4(r, r, r, rI, rI)
+C_O1_I4(r, r, rIN, rIK, 0)
+C_O2_I1(r, r, l)
+C_O2_I2(r, r, l, l)
+C_O2_I2(r, r, r, r)
+C_O2_I4(r, r, r, r, rIN, rIK)
+C_O2_I4(r, r, rI, rI, rIN, rIK)
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 8d1fee6327..4d201b1216 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -142,5 +142,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index bbd41d2491..8457108a87 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -2036,57 +2036,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 }
 }
 
-static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
-static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
-static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
-static const TCGTargetOpDef s_s = { .args_ct_str = { "s", "s" } };
-static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
-static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
-static const TCGTargetOpDef r_r_l = { .args_ct_str = { "r", "r", "l" } };
-static const TCGTargetOpDef r_l_l = { .args_ct_str = { "r", "l", "l" } };
-static const TCGTargetOpDef s_s_s = { .args_ct_str = { "s", "s", "s" } };
-static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
-static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
-static const TCGTargetOpDef r_r_rIN
-= { .args_ct_str = { "r", "r", "rIN" } };
-static const TCGTargetOpDef r_r_rIK
-= { .args_ct_str = { "r", "r", "rIK" } };
-static const TCGTargetOpDef r_r_r_r
-= { .args_ct_str = { "r", "r", "r", "r" } };
-static const TCGTargetOpDef r_r_l_l
-= { .args_ct_str = { "r", "r", "l", "l" } };
-static const TCGTargetOpDef s_s_s_s
-= { .args_ct_str = { "s", "s", "s", "s" } };
-static const TCGTargetOpDef br
-= { .args_ct_str = { "r", "rIN" } };
-static const TCGTargetOpDef ext2
-= { .args_ct_str = { "r", "rZ", "rZ" } };
-static const TCGTargetOpDef dep
-= { .args_ct_str = { "r", "0", "rZ" } };
-static const TCGTargetOpDef movc
-= { .args_ct_str = { "r", "r", "rIN", "rIK", "0" } };
-static const TCGTargetOpDef add2
-= { .args_ct_str = { "r", "r", "r", "r", "rIN", "rIK" } };
-static const TCGTargetOpDef sub2
-= { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
-static const TCGTargetOpDef br2
-= { .args_ct_str = { "r", "r", "rI", "rI" } };
-static const TCGTargetOpDef setc2
-= { .args_ct_str = { "r", "r", "r", "rI", "rI" } };
-
 switch (op) {
 case INDEX_op_goto_ptr:
-return &r;
+return C_O0_I1(r);
 
 case INDEX_op_ld8u_i32:
 case INDEX_op_ld8s_i32:
 case INDEX_op_ld16u_i32:
 case INDEX_op_ld16s_i32:
 case INDEX_op_ld_i32:
-case INDEX_op_st8_i32:
-case INDEX_op_st16_i32:
-case INDEX_op_st_i32:
 case INDEX_op_neg_i32:
 case INDEX_op_not_i32:
 case INDEX_op_bswap16_i32:
@@ -2096,62 +2056,72 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
 case INDEX_op_ext16u_i32:
 case INDEX_op_extract_i32:
 case INDEX_op_sextract_i32:
-return &r_r;
+return C_O1_I1(r, r);
+
+case INDEX_op_st8_i32:
+case INDEX_op_st16_i32:
+case INDEX_op_st_i32:
+return C_O0_I2(r, r);
 
 case INDEX_op_add_i32:
 case INDEX_op_sub_i32:
 case INDEX_op_setcond_i32:
-return &r_r_rIN;
+return C_O1_I2

[PULL 16/24] tcg/aarch64: Split out constraint sets to tcg-target-con-set.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target-con-set.h | 36 +
 tcg/aarch64/tcg-target.h |  1 +
 tcg/aarch64/tcg-target.c.inc | 86 +++-
 3 files changed, 65 insertions(+), 58 deletions(-)
 create mode 100644 tcg/aarch64/tcg-target-con-set.h

diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
new file mode 100644
index 00..d6c6866878
--- /dev/null
+++ b/tcg/aarch64/tcg-target-con-set.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Define AArch64 target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(lZ, l)
+C_O0_I2(r, rA)
+C_O0_I2(rZ, r)
+C_O0_I2(w, r)
+C_O1_I1(r, l)
+C_O1_I1(r, r)
+C_O1_I1(w, r)
+C_O1_I1(w, w)
+C_O1_I1(w, wr)
+C_O1_I2(r, 0, rZ)
+C_O1_I2(r, r, r)
+C_O1_I2(r, r, rA)
+C_O1_I2(r, r, rAL)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rL)
+C_O1_I2(r, rZ, rZ)
+C_O1_I2(w, 0, w)
+C_O1_I2(w, w, w)
+C_O1_I2(w, w, wN)
+C_O1_I2(w, w, wO)
+C_O1_I2(w, w, wZ)
+C_O1_I3(w, w, w, w)
+C_O1_I4(r, r, rA, rZ, rZ)
+C_O2_I4(r, r, rZ, rZ, rA, rMZ)
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 5ec30dba25..200e9b5e0e 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -155,5 +155,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_SET_H
 
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 42037c98fa..3c1ee39fd4 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -2547,42 +2547,11 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece,
 va_end(va);
 }
 
-static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
-static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
-static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
-static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
-static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
-static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
-static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
-static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
-static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
-static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
-static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
-static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
-static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
-static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
-static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
-static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
-static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
-static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
-static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
-static const TCGTargetOpDef r_r_rAL
-= { .args_ct_str = { "r", "r", "rAL" } };
-static const TCGTargetOpDef dep
-= { .args_ct_str = { "r", "0", "rZ" } };
-static const TCGTargetOpDef ext2
-= { .args_ct_str = { "r", "rZ", "rZ" } };
-static const TCGTargetOpDef movc
-= { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
-static const TCGTargetOpDef add2
-= { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
-static const TCGTargetOpDef w_w_w_w
-= { .args_ct_str = { "w", "w", "w", "w" } };
-
 switch (op) {
 case INDEX_op_goto_ptr:
-return &r;
+return C_O0_I1(r);
 
 case INDEX_op_ld8u_i32:
 case INDEX_op_ld8s_i32:
@@ -2621,7 +2590,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_extract_i64:
 case INDEX_op_sextract_i32:
 case INDEX_op_sextract_i64:
-return &r_r;
+return C_O1_I1(r, r);
 
 case INDEX_op_st8_i32:
 case INDEX_op_st16_i32:
@@ -2630,7 +2599,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_st16_i64:
 case INDEX_op_st32_i64:
 case INDEX_op_st_i64:
-return &rZ_r;
+return C_O0_I2(rZ, r);
 
 case INDEX_op_add_i32:
 case INDEX_op_add_i64:
@@ -2638,7 +2607,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_sub_i64:
 case INDEX_op_set

[PULL 19/24] tcg/ppc: Split out constraint sets to tcg-target-con-set.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target-con-set.h |  42 +++
 tcg/ppc/tcg-target.h |   1 +
 tcg/ppc/tcg-target.c.inc | 136 +++
 3 files changed, 99 insertions(+), 80 deletions(-)
 create mode 100644 tcg/ppc/tcg-target-con-set.h

diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
new file mode 100644
index 00..a1a345883d
--- /dev/null
+++ b/tcg/ppc/tcg-target-con-set.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define PowerPC target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(r, r)
+C_O0_I2(r, ri)
+C_O0_I2(S, S)
+C_O0_I2(v, r)
+C_O0_I3(S, S, S)
+C_O0_I4(r, r, ri, ri)
+C_O0_I4(S, S, S, S)
+C_O1_I1(r, L)
+C_O1_I1(r, r)
+C_O1_I1(v, r)
+C_O1_I1(v, v)
+C_O1_I1(v, vr)
+C_O1_I2(r, 0, rZ)
+C_O1_I2(r, L, L)
+C_O1_I2(r, rI, ri)
+C_O1_I2(r, rI, rT)
+C_O1_I2(r, r, r)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
+C_O1_I2(r, r, rT)
+C_O1_I2(r, r, rU)
+C_O1_I2(r, r, rZW)
+C_O1_I2(v, v, v)
+C_O1_I3(v, v, v, v)
+C_O1_I4(r, r, ri, rZ, rZ)
+C_O1_I4(r, r, r, ri, ri)
+C_O2_I1(L, L, L)
+C_O2_I2(L, L, L, L)
+C_O2_I4(r, r, rI, rZM, r, r)
+C_O2_I4(r, r, r, r, rI, rZM)
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index d1339afc66..551f8d0fc9 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -185,5 +185,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index e5aa8d2d10..4377d15d62 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -3456,62 +3456,17 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece,
 va_end(va);
 }
 
-static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
-static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
-static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
-static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
-static const TCGTargetOpDef S_S = { .args_ct_str = { "S", "S" } };
-static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
-static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
-static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
-static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
-static const TCGTargetOpDef S_S_S = { .args_ct_str = { "S", "S", "S" } };
-static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
-static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
-static const TCGTargetOpDef r_r_rT = { .args_ct_str = { "r", "r", "rT" } };
-static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } };
-static const TCGTargetOpDef r_rI_ri
-= { .args_ct_str = { "r", "rI", "ri" } };
-static const TCGTargetOpDef r_rI_rT
-= { .args_ct_str = { "r", "rI", "rT" } };
-static const TCGTargetOpDef r_r_rZW
-= { .args_ct_str = { "r", "r", "rZW" } };
-static const TCGTargetOpDef L_L_L_L
-= { .args_ct_str = { "L", "L", "L", "L" } };
-static const TCGTargetOpDef S_S_S_S
-= { .args_ct_str = { "S", "S", "S", "S" } };
-static const TCGTargetOpDef movc
-= { .args_ct_str = { "r", "r", "ri", "rZ", "rZ" } };
-static const TCGTargetOpDef dep
-= { .args_ct_str = { "r", "0", "rZ" } };
-static const TCGTargetOpDef br2
-= { .args_ct_str = { "r", "r", "ri", "ri" } };
-static const TCGTargetOpDef setc2
-= { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
-static const TCGTargetOpDef add2
-= { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
-static const TCGTargetOpDef sub2
-= { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
-static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
-static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
-static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
-static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
-static const TCGTargetOpDef v_v_v_v
-= { .args_ct_str = { "v", "v", "v", "v" } };
-
 switch (op) {
 case INDEX_op_goto_ptr:
-return &r;
+return C_O0_I1(r);
 
 case INDEX_op_ld8u_i32:
 case INDEX_op_ld8s_i32:
 case INDEX_op_ld16u_i32:
 case INDEX_op_ld16s_i32:
 case INDEX_op_ld_i32:
-case INDEX_op_st8_i32:
-case INDEX_op_st16_i32:
-case INDEX_op_st_i32:

[PULL 15/24] tcg/i386: Split out constraint sets to tcg-target-con-set.h

This exports the constraint sets from tcg_target_op_def to
a place we will be able to manipulate more in future.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target-con-set.h |  55 ++
 tcg/i386/tcg-target.h |   1 +
 tcg/tcg.c | 119 +
 tcg/i386/tcg-target.c.inc | 194 --
 4 files changed, 242 insertions(+), 127 deletions(-)
 create mode 100644 tcg/i386/tcg-target-con-set.h

diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
new file mode 100644
index 00..78774d1005
--- /dev/null
+++ b/tcg/i386/tcg-target-con-set.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define i386 target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ *
+ * C_N1_Im(...) defines a constraint set with 1 output and  inputs,
+ * except that the output must use a new register.
+ */
+C_O0_I1(r)
+C_O0_I2(L, L)
+C_O0_I2(qi, r)
+C_O0_I2(re, r)
+C_O0_I2(ri, r)
+C_O0_I2(r, re)
+C_O0_I2(s, L)
+C_O0_I2(x, r)
+C_O0_I3(L, L, L)
+C_O0_I3(s, L, L)
+C_O0_I4(L, L, L, L)
+C_O0_I4(r, r, ri, ri)
+C_O1_I1(r, 0)
+C_O1_I1(r, L)
+C_O1_I1(r, q)
+C_O1_I1(r, r)
+C_O1_I1(x, r)
+C_O1_I1(x, x)
+C_O1_I2(Q, 0, Q)
+C_O1_I2(q, r, re)
+C_O1_I2(r, 0, ci)
+C_O1_I2(r, 0, r)
+C_O1_I2(r, 0, re)
+C_O1_I2(r, 0, reZ)
+C_O1_I2(r, 0, ri)
+C_O1_I2(r, 0, rI)
+C_O1_I2(r, L, L)
+C_O1_I2(r, r, re)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
+C_O1_I2(x, x, x)
+C_N1_I2(r, r, r)
+C_N1_I2(r, r, rW)
+C_O1_I3(x, x, x, x)
+C_O1_I4(r, r, re, r, 0)
+C_O1_I4(r, r, r, ri, ri)
+C_O2_I1(r, r, L)
+C_O2_I2(a, d, a, r)
+C_O2_I2(r, r, L, L)
+C_O2_I3(a, d, 0, 1, r)
+C_O2_I4(r, r, 0, 1, re, re)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index b693d3692d..48a6f2a336 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -235,5 +235,6 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 39bcdff8dc..df9f32763e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -69,7 +69,9 @@
 /* Forward declarations for functions declared in tcg-target.c.inc and
used here. */
 static void tcg_target_init(TCGContext *s);
+#ifndef TCG_TARGET_CON_SET_H
 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
+#endif
 static void tcg_target_qemu_prologue(TCGContext *s);
 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 intptr_t value, intptr_t addend);
@@ -347,6 +349,112 @@ static void set_jmp_reset_offset(TCGContext *s, int which)
 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
 }
 
+#ifdef TCG_TARGET_CON_SET_H
+#define C_PFX1(P, A)P##A
+#define C_PFX2(P, A, B) P##A##_##B
+#define C_PFX3(P, A, B, C)  P##A##_##B##_##C
+#define C_PFX4(P, A, B, C, D)   P##A##_##B##_##C##_##D
+#define C_PFX5(P, A, B, C, D, E)P##A##_##B##_##C##_##D##_##E
+#define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
+
+/* Define an enumeration for the various combinations. */
+
+#define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
+#define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
+#define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
+#define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
+
+#define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
+#define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
+#define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
+#define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
+
+#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
+
+#define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
+#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
+#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
+#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, 
I4),
+
+typedef enum {
+#include "tcg-target-con-set.h"
+} TCGConstraintSetIndex;
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
+
+#undef C_O0_I1
+#undef C_O0_I2
+#undef C_O0_I3
+#undef C_O0_I4
+#undef C_O1_I1
+#undef C_O1_I2
+#undef C_O1_I3
+#undef C_O1_I4
+#undef C_N1_I2
+#undef C_O2_I1
+#undef C_O2_I2
+#undef C_O2_I3
+#undef C_O2_I4
+
+/* Put all of the constraint sets into an array, indexed by the enum. */
+
+#define C_O0_I1(I1) { .args_ct_str = { #I1 } },
+#define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
+#define C_O0_I3(I1, I2, I3) {

[PULL 18/24] tcg/mips: Split out constraint sets to tcg-target-con-set.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target-con-set.h | 36 +
 tcg/mips/tcg-target.h |  1 +
 tcg/mips/tcg-target.c.inc | 96 +++
 3 files changed, 66 insertions(+), 67 deletions(-)
 create mode 100644 tcg/mips/tcg-target-con-set.h

diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h
new file mode 100644
index 00..fe3e868a2f
--- /dev/null
+++ b/tcg/mips/tcg-target-con-set.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define MIPS target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(rZ, r)
+C_O0_I2(rZ, rZ)
+C_O0_I2(SZ, S)
+C_O0_I3(SZ, S, S)
+C_O0_I3(SZ, SZ, S)
+C_O0_I4(rZ, rZ, rZ, rZ)
+C_O0_I4(SZ, SZ, S, S)
+C_O1_I1(r, L)
+C_O1_I1(r, r)
+C_O1_I2(r, 0, rZ)
+C_O1_I2(r, L, L)
+C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
+C_O1_I2(r, r, rIK)
+C_O1_I2(r, r, rJ)
+C_O1_I2(r, r, rWZ)
+C_O1_I2(r, rZ, rN)
+C_O1_I2(r, rZ, rZ)
+C_O1_I4(r, rZ, rZ, rZ, 0)
+C_O1_I4(r, rZ, rZ, rZ, rZ)
+C_O2_I1(r, r, L)
+C_O2_I2(r, r, L, L)
+C_O2_I2(r, r, r, r)
+C_O2_I4(r, r, rZ, rZ, rN, rN)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c2c32fb38f..e520a9d6e3 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -207,5 +207,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
+#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 432d38a010..ab55f3109b 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -2112,52 +2112,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 }
 }
 
-static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
-static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
-static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
-static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
-static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
-static const TCGTargetOpDef SZ_S = { .args_ct_str = { "SZ", "S" } };
-static const TCGTargetOpDef rZ_rZ = { .args_ct_str = { "rZ", "rZ" } };
-static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
-static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
-static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
-static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
-static const TCGTargetOpDef r_r_rJ = { .args_ct_str = { "r", "r", "rJ" } };
-static const TCGTargetOpDef SZ_S_S = { .args_ct_str = { "SZ", "S", "S" } };
-static const TCGTargetOpDef SZ_SZ_S
-= { .args_ct_str = { "SZ", "SZ", "S" } };
-static const TCGTargetOpDef SZ_SZ_S_S
-= { .args_ct_str = { "SZ", "SZ", "S", "S" } };
-static const TCGTargetOpDef r_rZ_rN
-= { .args_ct_str = { "r", "rZ", "rN" } };
-static const TCGTargetOpDef r_rZ_rZ
-= { .args_ct_str = { "r", "rZ", "rZ" } };
-static const TCGTargetOpDef r_r_rIK
-= { .args_ct_str = { "r", "r", "rIK" } };
-static const TCGTargetOpDef r_r_rWZ
-= { .args_ct_str = { "r", "r", "rWZ" } };
-static const TCGTargetOpDef r_r_r_r
-= { .args_ct_str = { "r", "r", "r", "r" } };
-static const TCGTargetOpDef r_r_L_L
-= { .args_ct_str = { "r", "r", "L", "L" } };
-static const TCGTargetOpDef dep
-= { .args_ct_str = { "r", "0", "rZ" } };
-static const TCGTargetOpDef movc
-= { .args_ct_str = { "r", "rZ", "rZ", "rZ", "0" } };
-static const TCGTargetOpDef movc_r6
-= { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } };
-static const TCGTargetOpDef add2
-= { .args_ct_str = { "r", "r", "rZ", "rZ", "rN", "rN" } };
-static const TCGTargetOpDef br2
-= { .args_ct_str = { "rZ", "rZ", "rZ", "rZ" } };
-static const TCGTargetOpDef setc2
-= { .args_ct_str = { "r", "rZ", "rZ", "rZ", "rZ" } };
-
 switch (op) {
 case INDEX_op_goto_ptr:
-return &r;
+return C_O0_I1(r);
 
 case INDEX_op_ld8u_i32:
 case INDEX_op_ld8s_i32:
@@ -2190,7 +2149,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_extrl_i64_i32:
 case INDEX_op_extrh_i64_i32:
 case INDEX_op_extract_i64:
-return &r_r;
+return C_O1_I1(r, r);
 
 case INDEX_op_st8_i32:
 case INDEX_op_st16_i32:
@@ -2199,14 +2158,14 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
 case INDEX_op_st16_i64:
 case INDEX_op_st32_i64:
 case INDEX_op_st_i64:
-

[PULL 11/24] tcg/riscv: Split out target constraints to tcg-target-con-str.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/riscv/tcg-target-con-str.h | 21 ++
 tcg/riscv/tcg-target.h |  1 +
 tcg/riscv/tcg-target.c.inc | 52 +-
 3 files changed, 35 insertions(+), 39 deletions(-)
 create mode 100644 tcg/riscv/tcg-target-con-str.h

diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
new file mode 100644
index 00..8d8afaee53
--- /dev/null
+++ b/tcg/riscv/tcg-target-con-str.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define RISC-V target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_S12)
+CONST('N', TCG_CT_CONST_N12)
+CONST('M', TCG_CT_CONST_M12)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index 727c8df418..daf3ef7b5c 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -171,5 +171,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_POOL_LABELS
 
 #define TCG_TARGET_HAS_MEMORY_BSWAP 0
+#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 71c0badc02..20d5b5ef01 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -122,6 +122,19 @@ static const int tcg_target_call_oarg_regs[] = {
 #define TCG_CT_CONST_N12   0x400
 #define TCG_CT_CONST_M12   0x800
 
+#define ALL_GENERAL_REGS  MAKE_64BIT_MASK(0, 32)
+/*
+ * For softmmu, we need to avoid conflicts with the first 5
+ * argument registers to call the helper.  Some of these are
+ * also used for the tlb lookup.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS  MAKE_64BIT_MASK(TCG_REG_A0, 5)
+#else
+#define SOFTMMU_RESERVE_REGS  0
+#endif
+
+
 static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
 {
 if (TCG_TARGET_REG_BITS == 32) {
@@ -131,45 +144,6 @@ static inline tcg_target_long sextreg(tcg_target_long val, 
int pos, int len)
 }
 }
 
-/* parse target specific constraints */
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type)
-{
-switch (*ct_str++) {
-case 'r':
-ct->regs = 0x;
-break;
-case 'L':
-/* qemu_ld/qemu_st constraint */
-ct->regs = 0x;
-/* qemu_ld/qemu_st uses TCG_REG_TMP0 */
-#if defined(CONFIG_SOFTMMU)
-tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[0]);
-tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[1]);
-tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[2]);
-tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[3]);
-tcg_regset_reset_reg(ct->regs, tcg_target_call_iarg_regs[4]);
-#endif
-break;
-case 'I':
-ct->ct |= TCG_CT_CONST_S12;
-break;
-case 'N':
-ct->ct |= TCG_CT_CONST_N12;
-break;
-case 'M':
-ct->ct |= TCG_CT_CONST_M12;
-break;
-case 'Z':
-/* we can use a zero immediate as a zero register argument. */
-ct->ct |= TCG_CT_CONST_ZERO;
-break;
-default:
-return NULL;
-}
-return ct_str;
-}
-
 /* test if a constant matches the constraint */
 static int tcg_target_const_match(tcg_target_long val, TCGType type,
   const TCGArgConstraint *arg_ct)
-- 
2.25.1

[PULL 13/24] tcg/sparc: Split out target constraints to tcg-target-con-str.h

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/sparc/tcg-target-con-str.h | 23 ++
 tcg/sparc/tcg-target.h |  5 +--
 tcg/sparc/tcg-target.c.inc | 81 +-
 3 files changed, 55 insertions(+), 54 deletions(-)
 create mode 100644 tcg/sparc/tcg-target-con-str.h

diff --git a/tcg/sparc/tcg-target-con-str.h b/tcg/sparc/tcg-target-con-str.h
new file mode 100644
index 00..fdb25d9313
--- /dev/null
+++ b/tcg/sparc/tcg-target-con-str.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Sparc target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('R', ALL_GENERAL_REGS64)
+REGS('s', ALL_QLDST_REGS)
+REGS('S', ALL_QLDST_REGS64)
+REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_S11)
+CONST('J', TCG_CT_CONST_S13)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 95ab9af955..5185b00524 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -66,10 +66,6 @@ typedef enum {
 TCG_REG_I7,
 } TCGReg;
 
-#define TCG_CT_CONST_S11  0x100
-#define TCG_CT_CONST_S13  0x200
-#define TCG_CT_CONST_ZERO 0x400
-
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_O6
 
@@ -172,5 +168,6 @@ extern bool use_vis3_instructions;
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index 28b5b6559a..e291eb0b95 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -67,18 +67,38 @@ static const char * const 
tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 # define SPARC64 0
 #endif
 
-/* Note that sparcv8plus can only hold 64 bit quantities in %g and %o
-   registers.  These are saved manually by the kernel in full 64-bit
-   slots.  The %i and %l registers are saved by the register window
-   mechanism, which only allocates space for 32 bits.  Given that this
-   window spill/fill can happen on any signal, we must consider the
-   high bits of the %i and %l registers garbage at all times.  */
-#if SPARC64
-# define ALL_64  0xu
+#define TCG_CT_CONST_S11  0x100
+#define TCG_CT_CONST_S13  0x200
+#define TCG_CT_CONST_ZERO 0x400
+
+/*
+ * For softmmu, we need to avoid conflicts with the first 3
+ * argument registers to perform the tlb lookup, and to call
+ * the helper function.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
 #else
-# define ALL_64  0xu
+#define SOFTMMU_RESERVE_REGS 0
 #endif
 
+/*
+ * Note that sparcv8plus can only hold 64 bit quantities in %g and %o
+ * registers.  These are saved manually by the kernel in full 64-bit
+ * slots.  The %i and %l registers are saved by the register window
+ * mechanism, which only allocates space for 32 bits.  Given that this
+ * window spill/fill can happen on any signal, we must consider the
+ * high bits of the %i and %l registers garbage at all times.
+ */
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
+#if SPARC64
+# define ALL_GENERAL_REGS64  ALL_GENERAL_REGS
+#else
+# define ALL_GENERAL_REGS64  MAKE_64BIT_MASK(0, 16)
+#endif
+#define ALL_QLDST_REGS   (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+#define ALL_QLDST_REGS64 (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS)
+
 /* Define some temporary registers.  T2 is used for constant generation.  */
 #define TCG_REG_T1  TCG_REG_G1
 #define TCG_REG_T2  TCG_REG_O7
@@ -320,45 +340,6 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
 return true;
 }
 
-/* parse target specific constraints */
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type)
-{
-switch (*ct_str++) {
-case 'r':
-ct->regs = 0x;
-break;
-case 'R':
-ct->regs = ALL_64;
-break;
-case 'A': /* qemu_ld/st address constraint */
-ct->regs = TARGET_LONG_BITS == 64 ? ALL_64 : 0x;
-reserve_helpers:
-tcg_regset_reset_reg(ct->regs, TCG_REG_O0);
-tcg_regset_reset_reg(ct->regs, TCG_REG_O1);
-tcg_regset_reset_reg(ct->regs, TCG_REG_O2);
-break;
-case 's': /* qemu_st data 32-bit constraint */
-ct->regs = 0x;
-goto reserve_helpers;
-case 'S': /* qemu_st data 64-bit constraint */
-ct->regs = ALL_64;
-goto reserve_helpers;
-case 'I':
-ct->ct |= TCG_CT_CONST_S11;
-break;
-case 'J':
-ct->ct |= TCG_CT_CONST_S13;
-break;
-case 'Z':
-ct->ct |= TCG_CT_CONST_

[PULL 14/24] tcg: Remove TCG_TARGET_CON_STR_H

All backends have now been converted to tcg-target-con-str.h,
so we can remove the fallback code.

Reviewed-by: Peter Maydell 
Reviewed-by: Alistair Francis 
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.h |  1 -
 tcg/arm/tcg-target.h |  1 -
 tcg/i386/tcg-target.h|  1 -
 tcg/mips/tcg-target.h|  1 -
 tcg/ppc/tcg-target.h |  1 -
 tcg/riscv/tcg-target.h   |  1 -
 tcg/s390/tcg-target.h|  1 -
 tcg/sparc/tcg-target.h   |  1 -
 tcg/tci/tcg-target.h |  2 --
 tcg/tcg.c| 16 
 10 files changed, 26 deletions(-)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 4fc20b58ec..5ec30dba25 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -155,6 +155,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_STR_H
 
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 16336cd545..8d1fee6327 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -142,6 +142,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 77693e13ea..b693d3692d 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -235,6 +235,5 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index d850200855..c2c32fb38f 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -207,6 +207,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
-#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 40ed4b82dd..d1339afc66 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -185,6 +185,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
index daf3ef7b5c..727c8df418 100644
--- a/tcg/riscv/tcg-target.h
+++ b/tcg/riscv/tcg-target.h
@@ -171,6 +171,5 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_POOL_LABELS
 
 #define TCG_TARGET_HAS_MEMORY_BSWAP 0
-#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index c43d6aba84..641464eea4 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -159,6 +159,5 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 5185b00524..f66f5d07dc 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -168,6 +168,5 @@ extern bool use_vis3_instructions;
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_NEED_POOL_LABELS
-#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index ab832aecc3..bb784e018e 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -207,6 +207,4 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 /* no need to flush icache explicitly */
 }
 
-#define TCG_TARGET_CON_STR_H
-
 #endif /* TCG_TARGET_H */
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 8cfa28ed84..39bcdff8dc 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -103,10 +103,6 @@ static void tcg_register_jit_int(const void *buf, size_t 
size,
 __attribute__((unused));
 
 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
-#ifndef TCG_TARGET_CON_STR_H
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type);
-#endif
 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
intptr_t arg2);
 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
@@ -2464,7 +2460,6 @@ static void process_op_defs(TCGContext *s)
 ct_str++;
 break;
 
-#ifdef TCG_TARGET_CON_STR_H
 /* Include all of the target-specific constraints. */
 
 #undef CONST
@@ -2480,17 +2475,6 @@ static void process_op_defs(TCGContext *s)
 default:
 /* Typo in TCGTargetOpDef constraint. */

[PULL 22/24] tcg/sparc: Split out constraint sets to tcg-target-con-set.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/sparc/tcg-target-con-set.h | 32 +++
 tcg/sparc/tcg-target.h |  1 +
 tcg/sparc/tcg-target.c.inc | 75 +++---
 3 files changed, 56 insertions(+), 52 deletions(-)
 create mode 100644 tcg/sparc/tcg-target-con-set.h

diff --git a/tcg/sparc/tcg-target-con-set.h b/tcg/sparc/tcg-target-con-set.h
new file mode 100644
index 00..3b751dc3fb
--- /dev/null
+++ b/tcg/sparc/tcg-target-con-set.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Sparc target-specific constraint sets.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
+C_O0_I2(rZ, r)
+C_O0_I2(RZ, r)
+C_O0_I2(rZ, rJ)
+C_O0_I2(RZ, RJ)
+C_O0_I2(sZ, A)
+C_O0_I2(SZ, A)
+C_O1_I1(r, A)
+C_O1_I1(R, A)
+C_O1_I1(r, r)
+C_O1_I1(r, R)
+C_O1_I1(R, r)
+C_O1_I1(R, R)
+C_O1_I2(R, R, R)
+C_O1_I2(r, rZ, rJ)
+C_O1_I2(R, RZ, RJ)
+C_O1_I4(r, rZ, rJ, rI, 0)
+C_O1_I4(R, RZ, RJ, RI, 0)
+C_O2_I2(r, r, rZ, rJ)
+C_O2_I4(R, R, RZ, RZ, RJ, RI)
+C_O2_I4(r, r, rZ, rZ, rJ, rJ)
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index f66f5d07dc..f50e8d50ee 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -168,5 +168,6 @@ extern bool use_vis3_instructions;
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
 
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_SET_H
 
 #endif
diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
index e291eb0b95..3d50f985c6 100644
--- a/tcg/sparc/tcg-target.c.inc
+++ b/tcg/sparc/tcg-target.c.inc
@@ -1573,40 +1573,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 }
 }
 
-static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 {
-static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
-static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
-static const TCGTargetOpDef R_r = { .args_ct_str = { "R", "r" } };
-static const TCGTargetOpDef r_R = { .args_ct_str = { "r", "R" } };
-static const TCGTargetOpDef R_R = { .args_ct_str = { "R", "R" } };
-static const TCGTargetOpDef r_A = { .args_ct_str = { "r", "A" } };
-static const TCGTargetOpDef R_A = { .args_ct_str = { "R", "A" } };
-static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
-static const TCGTargetOpDef RZ_r = { .args_ct_str = { "RZ", "r" } };
-static const TCGTargetOpDef sZ_A = { .args_ct_str = { "sZ", "A" } };
-static const TCGTargetOpDef SZ_A = { .args_ct_str = { "SZ", "A" } };
-static const TCGTargetOpDef rZ_rJ = { .args_ct_str = { "rZ", "rJ" } };
-static const TCGTargetOpDef RZ_RJ = { .args_ct_str = { "RZ", "RJ" } };
-static const TCGTargetOpDef R_R_R = { .args_ct_str = { "R", "R", "R" } };
-static const TCGTargetOpDef r_rZ_rJ
-= { .args_ct_str = { "r", "rZ", "rJ" } };
-static const TCGTargetOpDef R_RZ_RJ
-= { .args_ct_str = { "R", "RZ", "RJ" } };
-static const TCGTargetOpDef r_r_rZ_rJ
-= { .args_ct_str = { "r", "r", "rZ", "rJ" } };
-static const TCGTargetOpDef movc_32
-= { .args_ct_str = { "r", "rZ", "rJ", "rI", "0" } };
-static const TCGTargetOpDef movc_64
-= { .args_ct_str = { "R", "RZ", "RJ", "RI", "0" } };
-static const TCGTargetOpDef add2_32
-= { .args_ct_str = { "r", "r", "rZ", "rZ", "rJ", "rJ" } };
-static const TCGTargetOpDef add2_64
-= { .args_ct_str = { "R", "R", "RZ", "RZ", "RJ", "RI" } };
-
 switch (op) {
 case INDEX_op_goto_ptr:
-return &r;
+return C_O0_I1(r);
 
 case INDEX_op_ld8u_i32:
 case INDEX_op_ld8s_i32:
@@ -1615,12 +1586,12 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
 case INDEX_op_ld_i32:
 case INDEX_op_neg_i32:
 case INDEX_op_not_i32:
-return &r_r;
+return C_O1_I1(r, r);
 
 case INDEX_op_st8_i32:
 case INDEX_op_st16_i32:
 case INDEX_op_st_i32:
-return &rZ_r;
+return C_O0_I2(rZ, r);
 
 case INDEX_op_add_i32:
 case INDEX_op_mul_i32:
@@ -1636,18 +1607,18 @@ static const TCGTargetOpDef 
*tcg_target_op_def(TCGOpcode op)
 case INDEX_op_shr_i32:
 case INDEX_op_sar_i32:
 case INDEX_op_setcond_i32:
-return &r_rZ_rJ;
+return C_O1_I2(r, rZ, rJ);
 
 case INDEX_op_brcond_i32:
-return &rZ_rJ;
+return C_O0_I2(rZ, rJ);
 case INDEX_op_movcond_i32:
-return &movc_32;
+return C_O1_I4(r, rZ, rJ, rI, 0);
 case INDEX_op_add2_i32:
 case INDEX_op_sub2_i32:
-return &add2_32;
+return C_O2_I4(r, r, rZ, rZ, rJ, rJ);
 case INDEX_op_mulu2_i32:
 case INDEX_op_muls2_i32:
-return &r_r_rZ_rJ;
+return

[PULL 10/24] tcg/mips: Split out target constraints to tcg-target-con-str.h

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target-con-str.h | 24 +++
 tcg/mips/tcg-target.h |  1 +
 tcg/mips/tcg-target.c.inc | 77 ++-
 3 files changed, 46 insertions(+), 56 deletions(-)
 create mode 100644 tcg/mips/tcg-target-con-str.h

diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h
new file mode 100644
index 00..e4b2965c72
--- /dev/null
+++ b/tcg/mips/tcg-target-con-str.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define MIPS target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('L', ALL_QLOAD_REGS)
+REGS('S', ALL_QSTORE_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_U16)
+CONST('J', TCG_CT_CONST_S16)
+CONST('K', TCG_CT_CONST_P2M1)
+CONST('N', TCG_CT_CONST_N16)
+CONST('W', TCG_CT_CONST_WSZ)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c2c32fb38f..d850200855 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -207,5 +207,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #ifdef CONFIG_SOFTMMU
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
+#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 7293169ab2..432d38a010 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -171,67 +171,27 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 #define TCG_CT_CONST_N16  0x1000   /* "Negatable" 16-bit: -32767 - 32767 */
 #define TCG_CT_CONST_WSZ  0x2000   /* word size */
 
+#define ALL_GENERAL_REGS  0xu
+#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0))
+
+#ifdef CONFIG_SOFTMMU
+#define ALL_QLOAD_REGS \
+(NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2))
+#define ALL_QSTORE_REGS \
+(NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS   \
+   ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3)  \
+   : (1 << TCG_REG_A1)))
+#else
+#define ALL_QLOAD_REGS   NOA0_REGS
+#define ALL_QSTORE_REGS  NOA0_REGS
+#endif
+
+
 static inline bool is_p2m1(tcg_target_long val)
 {
 return val && ((val + 1) & val) == 0;
 }
 
-/* parse target specific constraints */
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type)
-{
-switch(*ct_str++) {
-case 'r':
-ct->regs = 0x;
-break;
-case 'L': /* qemu_ld input arg constraint */
-ct->regs = 0x;
-tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
-#if defined(CONFIG_SOFTMMU)
-if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
-}
-#endif
-break;
-case 'S': /* qemu_st constraint */
-ct->regs = 0x;
-tcg_regset_reset_reg(ct->regs, TCG_REG_A0);
-#if defined(CONFIG_SOFTMMU)
-if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-tcg_regset_reset_reg(ct->regs, TCG_REG_A2);
-tcg_regset_reset_reg(ct->regs, TCG_REG_A3);
-} else {
-tcg_regset_reset_reg(ct->regs, TCG_REG_A1);
-}
-#endif
-break;
-case 'I':
-ct->ct |= TCG_CT_CONST_U16;
-break;
-case 'J':
-ct->ct |= TCG_CT_CONST_S16;
-break;
-case 'K':
-ct->ct |= TCG_CT_CONST_P2M1;
-break;
-case 'N':
-ct->ct |= TCG_CT_CONST_N16;
-break;
-case 'W':
-ct->ct |= TCG_CT_CONST_WSZ;
-break;
-case 'Z':
-/* We are cheating a bit here, using the fact that the register
-   ZERO is also the register number 0. Hence there is no need
-   to check for const_args in each instruction. */
-ct->ct |= TCG_CT_CONST_ZERO;
-break;
-default:
-return NULL;
-}
-return ct_str;
-}
-
 /* test if a constant matches the constraint */
 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
  const TCGArgConstraint *arg_ct)
@@ -1697,6 +1657,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 TCGArg a0, a1, a2;
 int c2;
 
+/*
+ * Note that many operands use the constraint set "rZ".
+ * We make use of the fact that 0 is the ZERO register,
+ * and hence such cases need not check for const_args.
+ */
 a0 = args[0];
 a1 = args[1];
 a2 = args[2];
-- 
2.25.1

[PULL 09/24] tcg/tci: Split out target constraints to tcg-target-con-str.h

Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 tcg/tci/tcg-target-con-str.h | 11 +++
 tcg/tci/tcg-target.h |  2 ++
 tcg/tci/tcg-target.c.inc | 14 --
 3 files changed, 13 insertions(+), 14 deletions(-)
 create mode 100644 tcg/tci/tcg-target-con-str.h

diff --git a/tcg/tci/tcg-target-con-str.h b/tcg/tci/tcg-target-con-str.h
new file mode 100644
index 00..87c0f19e9c
--- /dev/null
+++ b/tcg/tci/tcg-target-con-str.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define TCI target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS))
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index bb784e018e..ab832aecc3 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -207,4 +207,6 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 /* no need to flush icache explicitly */
 }
 
+#define TCG_TARGET_CON_STR_H
+
 #endif /* TCG_TARGET_H */
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index b62e14d5ce..493bbf1e39 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -302,20 +302,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 return true;
 }
 
-/* Parse target specific constraints. */
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type)
-{
-switch (*ct_str++) {
-case 'r':
-ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
-break;
-default:
-return NULL;
-}
-return ct_str;
-}
-
 #if defined(CONFIG_DEBUG_TCG_INTERPRETER)
 /* Show current bytecode. Used by tcg interpreter. */
 void tci_disas(uint8_t opc)
-- 
2.25.1

[PULL 07/24] tcg/aarch64: Split out target constraints to tcg-target-con-str.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target-con-str.h | 24 +++
 tcg/aarch64/tcg-target.h |  1 +
 tcg/aarch64/tcg-target.c.inc | 51 +---
 3 files changed, 33 insertions(+), 43 deletions(-)
 create mode 100644 tcg/aarch64/tcg-target-con-str.h

diff --git a/tcg/aarch64/tcg-target-con-str.h b/tcg/aarch64/tcg-target-con-str.h
new file mode 100644
index 00..00adb64594
--- /dev/null
+++ b/tcg/aarch64/tcg-target-con-str.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Define AArch64 target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('l', ALL_QLDST_REGS)
+REGS('w', ALL_VECTOR_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('A', TCG_CT_CONST_AIMM)
+CONST('L', TCG_CT_CONST_LIMM)
+CONST('M', TCG_CT_CONST_MONE)
+CONST('O', TCG_CT_CONST_ORRI)
+CONST('N', TCG_CT_CONST_ANDI)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 5ec30dba25..4fc20b58ec 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -155,5 +155,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_STR_H
 
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 23954ec7cf..42037c98fa 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -126,51 +126,16 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 #define TCG_CT_CONST_ORRI 0x1000
 #define TCG_CT_CONST_ANDI 0x2000
 
-/* parse target specific constraints */
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type)
-{
-switch (*ct_str++) {
-case 'r': /* general registers */
-ct->regs |= 0xu;
-break;
-case 'w': /* advsimd registers */
-ct->regs |= 0xull;
-break;
-case 'l': /* qemu_ld / qemu_st address, data_reg */
-ct->regs = 0xu;
+#define ALL_GENERAL_REGS  0xu
+#define ALL_VECTOR_REGS   0xull
+
 #ifdef CONFIG_SOFTMMU
-/* x0 and x1 will be overwritten when reading the tlb entry,
-   and x2, and x3 for helper args, better to avoid using them. */
-tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
-tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
-tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
-tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
+#define ALL_QLDST_REGS \
+(ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
+  (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
+#else
+#define ALL_QLDST_REGS   ALL_GENERAL_REGS
 #endif
-break;
-case 'A': /* Valid for arithmetic immediate (positive or negative).  */
-ct->ct |= TCG_CT_CONST_AIMM;
-break;
-case 'L': /* Valid for logical immediate.  */
-ct->ct |= TCG_CT_CONST_LIMM;
-break;
-case 'M': /* minus one */
-ct->ct |= TCG_CT_CONST_MONE;
-break;
-case 'O': /* vector orr/bic immediate */
-ct->ct |= TCG_CT_CONST_ORRI;
-break;
-case 'N': /* vector orr/bic immediate, inverted */
-ct->ct |= TCG_CT_CONST_ANDI;
-break;
-case 'Z': /* zero */
-ct->ct |= TCG_CT_CONST_ZERO;
-break;
-default:
-return NULL;
-}
-return ct_str;
-}
 
 /* Match a constant valid for addition (12-bit, optionally shifted).  */
 static inline bool is_aimm(uint64_t val)
-- 
2.25.1

[PULL 04/24] tcg/i386: Tidy register constraint definitions

Create symbolic constants for all low-byte-addressable
and second-byte-addressable registers.  Create a symbol
for the registers that need reserving for softmmu.

There is no functional change for 's', as this letter is
only used for i386.  The BYTEL name is correct for the
action we wish from the constraint.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 40 +++
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 540debdf34..4feb7e2aa1 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -132,6 +132,22 @@ static const int tcg_target_call_oarg_regs[] = {
 # define TCG_REG_L1 TCG_REG_EDX
 #endif
 
+#define ALL_BYTEH_REGS 0x000fu
+#if TCG_TARGET_REG_BITS == 64
+# define ALL_GENERAL_REGS  0xu
+# define ALL_VECTOR_REGS   0xu
+# define ALL_BYTEL_REGSALL_GENERAL_REGS
+#else
+# define ALL_GENERAL_REGS  0x00ffu
+# define ALL_VECTOR_REGS   0x00ffu
+# define ALL_BYTEL_REGSALL_BYTEH_REGS
+#endif
+#ifdef CONFIG_SOFTMMU
+# define SOFTMMU_RESERVE_REGS  ((1 << TCG_REG_L0) | (1 << TCG_REG_L1))
+#else
+# define SOFTMMU_RESERVE_REGS  0
+#endif
+
 /* The host compiler should supply  to enable runtime features
detection, as we're not going to go so far as our own inline assembly.
If not available, default values will be assumed.  */
@@ -193,14 +209,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 return true;
 }
 
-#if TCG_TARGET_REG_BITS == 64
-#define ALL_GENERAL_REGS   0xu
-#define ALL_VECTOR_REGS0xu
-#else
-#define ALL_GENERAL_REGS   0x00ffu
-#define ALL_VECTOR_REGS0x00ffu
-#endif
-
 /* parse target specific constraints */
 static const char *target_parse_constraint(TCGArgConstraint *ct,
const char *ct_str, TCGType type)
@@ -226,11 +234,11 @@ static const char 
*target_parse_constraint(TCGArgConstraint *ct,
 break;
 case 'q':
 /* A register that can be used as a byte operand.  */
-ct->regs = TCG_TARGET_REG_BITS == 64 ? 0x : 0xf;
+ct->regs |= ALL_BYTEL_REGS;
 break;
 case 'Q':
 /* A register with an addressable second byte (e.g. %ah).  */
-ct->regs = 0xf;
+ct->regs |= ALL_BYTEH_REGS;
 break;
 case 'r':
 /* A general register.  */
@@ -247,19 +255,11 @@ static const char 
*target_parse_constraint(TCGArgConstraint *ct,
 
 case 'L':
 /* qemu_ld/st data+address constraint */
-ct->regs = TCG_TARGET_REG_BITS == 64 ? 0x : 0xff;
-#ifdef CONFIG_SOFTMMU
-tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
-tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
-#endif
+ct->regs |= ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS;
 break;
 case 's':
 /* qemu_st8_i32 data constraint */
-ct->regs = 0xf;
-#ifdef CONFIG_SOFTMMU
-tcg_regset_reset_reg(ct->regs, TCG_REG_L0);
-tcg_regset_reset_reg(ct->regs, TCG_REG_L1);
-#endif
+ct->regs |= ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS;
 break;
 
 case 'e':
-- 
2.25.1

[PULL 08/24] tcg/ppc: Split out target constraints to tcg-target-con-str.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target-con-str.h | 30 +++
 tcg/ppc/tcg-target.h |  1 +
 tcg/ppc/tcg-target.c.inc | 73 
 3 files changed, 46 insertions(+), 58 deletions(-)
 create mode 100644 tcg/ppc/tcg-target-con-str.h

diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
new file mode 100644
index 00..298ca20d5b
--- /dev/null
+++ b/tcg/ppc/tcg-target-con-str.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define PowerPC target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('v', ALL_VECTOR_REGS)
+REGS('A', 1u << TCG_REG_R3)
+REGS('B', 1u << TCG_REG_R4)
+REGS('C', 1u << TCG_REG_R5)
+REGS('D', 1u << TCG_REG_R6)
+REGS('L', ALL_QLOAD_REGS)
+REGS('S', ALL_QSTORE_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_S16)
+CONST('J', TCG_CT_CONST_U16)
+CONST('M', TCG_CT_CONST_MONE)
+CONST('T', TCG_CT_CONST_S32)
+CONST('U', TCG_CT_CONST_U32)
+CONST('W', TCG_CT_CONST_WSZ)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index d1339afc66..40ed4b82dd 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -185,5 +185,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index cf64892295..e5aa8d2d10 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -62,6 +62,21 @@
 #define TCG_CT_CONST_MONE 0x2000
 #define TCG_CT_CONST_WSZ  0x4000
 
+#define ALL_GENERAL_REGS  0xu
+#define ALL_VECTOR_REGS   0xull
+
+#ifdef CONFIG_SOFTMMU
+#define ALL_QLOAD_REGS \
+(ALL_GENERAL_REGS & \
+ ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
+#define ALL_QSTORE_REGS \
+(ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
+  (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
+#else
+#define ALL_QLOAD_REGS  (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
+#define ALL_QSTORE_REGS ALL_QLOAD_REGS
+#endif
+
 TCGPowerISA have_isa;
 static bool have_isel;
 bool have_altivec;
@@ -222,64 +237,6 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const 
tcg_insn_unit *target)
 return false;
 }
 
-/* parse target specific constraints */
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type)
-{
-switch (*ct_str++) {
-case 'A': case 'B': case 'C': case 'D':
-tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
-break;
-case 'r':
-ct->regs = 0x;
-break;
-case 'v':
-ct->regs = 0xull;
-break;
-case 'L':   /* qemu_ld constraint */
-ct->regs = 0x;
-tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
-#ifdef CONFIG_SOFTMMU
-tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
-#endif
-break;
-case 'S':   /* qemu_st constraint */
-ct->regs = 0x;
-tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
-#ifdef CONFIG_SOFTMMU
-tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
-#endif
-break;
-case 'I':
-ct->ct |= TCG_CT_CONST_S16;
-break;
-case 'J':
-ct->ct |= TCG_CT_CONST_U16;
-break;
-case 'M':
-ct->ct |= TCG_CT_CONST_MONE;
-break;
-case 'T':
-ct->ct |= TCG_CT_CONST_S32;
-break;
-case 'U':
-ct->ct |= TCG_CT_CONST_U32;
-break;
-case 'W':
-ct->ct |= TCG_CT_CONST_WSZ;
-break;
-case 'Z':
-ct->ct |= TCG_CT_CONST_ZERO;
-break;
-default:
-return NULL;
-}
-return ct_str;
-}
-
 /* test if a constant matches the constraint */
 static int tcg_target_const_match(tcg_target_long val, TCGType type,
   const TCGArgConstraint *arg_ct)
-- 
2.25.1

[PULL 05/24] tcg/i386: Split out target constraints to tcg-target-con-str.h

This eliminates the target-specific function target_parse_constraint
and folds it into the single caller, process_op_defs.  Since this is
done directly into the switch statement, duplicates are compilation
errors rather than silently ignored at runtime.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target-con-str.h | 33 +
 tcg/i386/tcg-target.h |  1 +
 tcg/tcg.c | 33 ++---
 tcg/i386/tcg-target.c.inc | 69 ---
 4 files changed, 62 insertions(+), 74 deletions(-)
 create mode 100644 tcg/i386/tcg-target-con-str.h

diff --git a/tcg/i386/tcg-target-con-str.h b/tcg/i386/tcg-target-con-str.h
new file mode 100644
index 00..24e6bcb80d
--- /dev/null
+++ b/tcg/i386/tcg-target-con-str.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define i386 target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ *
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('a', 1u << TCG_REG_EAX)
+REGS('b', 1u << TCG_REG_EBX)
+REGS('c', 1u << TCG_REG_ECX)
+REGS('d', 1u << TCG_REG_EDX)
+REGS('S', 1u << TCG_REG_ESI)
+REGS('D', 1u << TCG_REG_EDI)
+
+REGS('r', ALL_GENERAL_REGS)
+REGS('x', ALL_VECTOR_REGS)
+REGS('q', ALL_BYTEL_REGS) /* regs that can be used as a byte operand */
+REGS('Q', ALL_BYTEH_REGS) /* regs with a second byte (e.g. %ah) */
+REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)  /* qemu_ld/st */
+REGS('s', ALL_BYTEL_REGS & ~SOFTMMU_RESERVE_REGS)/* qemu_st8_i32 data */
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('e', TCG_CT_CONST_S32)
+CONST('I', TCG_CT_CONST_I32)
+CONST('W', TCG_CT_CONST_WSZ)
+CONST('Z', TCG_CT_CONST_U32)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index b693d3692d..77693e13ea 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -235,5 +235,6 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 9e1b0d73c7..8cfa28ed84 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -103,8 +103,10 @@ static void tcg_register_jit_int(const void *buf, size_t 
size,
 __attribute__((unused));
 
 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
+#ifndef TCG_TARGET_CON_STR_H
 static const char *target_parse_constraint(TCGArgConstraint *ct,
const char *ct_str, TCGType type);
+#endif
 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
intptr_t arg2);
 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
@@ -2415,7 +2417,6 @@ static void process_op_defs(TCGContext *s)
 for (op = 0; op < NB_OPS; op++) {
 TCGOpDef *def = &tcg_op_defs[op];
 const TCGTargetOpDef *tdefs;
-TCGType type;
 int i, nb_args;
 
 if (def->flags & TCG_OPF_NOT_PRESENT) {
@@ -2431,7 +2432,6 @@ static void process_op_defs(TCGContext *s)
 /* Missing TCGTargetOpDef entry. */
 tcg_debug_assert(tdefs != NULL);
 
-type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
 for (i = 0; i < nb_args; i++) {
 const char *ct_str = tdefs->args_ct_str[i];
 /* Incomplete TCGTargetOpDef entry. */
@@ -2463,11 +2463,34 @@ static void process_op_defs(TCGContext *s)
 def->args_ct[i].ct |= TCG_CT_CONST;
 ct_str++;
 break;
+
+#ifdef TCG_TARGET_CON_STR_H
+/* Include all of the target-specific constraints. */
+
+#undef CONST
+#define CONST(CASE, MASK) \
+case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
+#define REGS(CASE, MASK) \
+case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
+
+#include "tcg-target-con-str.h"
+
+#undef REGS
+#undef CONST
 default:
-ct_str = target_parse_constraint(&def->args_ct[i],
- ct_str, type);
 /* Typo in TCGTargetOpDef constraint. */
-tcg_debug_assert(ct_str != NULL);
+g_assert_not_reached();
+#else
+default:
+{
+TCGType type = (def->flags & TCG_OPF_64BIT
+? TCG_TYPE_I64 : TCG_TYPE_I32);
+ct_str = target_parse_constraint(&def->args_ct[i],
+ ct_str, type);
+/* Typo in TCGTargetOpDef constraint. */
+tcg_debug_assert(ct_str != NULL);
+}
+#endif
 }
 }
 }
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/

[PULL 12/24] tcg/s390: Split out target constraints to tcg-target-con-str.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/s390/tcg-target-con-str.h | 28 ++
 tcg/s390/tcg-target.h |  1 +
 tcg/s390/tcg-target.c.inc | 53 +--
 3 files changed, 42 insertions(+), 40 deletions(-)
 create mode 100644 tcg/s390/tcg-target-con-str.h

diff --git a/tcg/s390/tcg-target-con-str.h b/tcg/s390/tcg-target-con-str.h
new file mode 100644
index 00..892d8f8c06
--- /dev/null
+++ b/tcg/s390/tcg-target-con-str.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define S390 target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+/*
+ * A (single) even/odd pair for division.
+ * TODO: Add something to the register allocator to allow
+ * this kind of regno+1 pairing to be done more generally.
+ */
+REGS('a', 1u << TCG_REG_R2)
+REGS('b', 1u << TCG_REG_R3)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('A', TCG_CT_CONST_S33)
+CONST('I', TCG_CT_CONST_S16)
+CONST('J', TCG_CT_CONST_S32)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 641464eea4..c43d6aba84 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -159,5 +159,6 @@ static inline void tb_target_set_jmp_target(uintptr_t 
tc_ptr, uintptr_t jmp_rx,
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/s390/tcg-target.c.inc b/tcg/s390/tcg-target.c.inc
index 8517e55232..3fec7fec5f 100644
--- a/tcg/s390/tcg-target.c.inc
+++ b/tcg/s390/tcg-target.c.inc
@@ -42,6 +42,19 @@
 #define TCG_CT_CONST_S33   0x400
 #define TCG_CT_CONST_ZERO  0x800
 
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
+/*
+ * For softmmu, we need to avoid conflicts with the first 3
+ * argument registers to perform the tlb lookup, and to call
+ * the helper function.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
+#else
+#define SOFTMMU_RESERVE_REGS 0
+#endif
+
+
 /* Several places within the instruction set 0 means "no register"
rather than TCG_REG_R0.  */
 #define TCG_REG_NONE0
@@ -403,46 +416,6 @@ static bool patch_reloc(tcg_insn_unit *src_rw, int type,
 return false;
 }
 
-/* parse target specific constraints */
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type)
-{
-switch (*ct_str++) {
-case 'r':  /* all registers */
-ct->regs = 0x;
-break;
-case 'L':  /* qemu_ld/st constraint */
-ct->regs = 0x;
-tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
-break;
-case 'a':  /* force R2 for division */
-ct->regs = 0;
-tcg_regset_set_reg(ct->regs, TCG_REG_R2);
-break;
-case 'b':  /* force R3 for division */
-ct->regs = 0;
-tcg_regset_set_reg(ct->regs, TCG_REG_R3);
-break;
-case 'A':
-ct->ct |= TCG_CT_CONST_S33;
-break;
-case 'I':
-ct->ct |= TCG_CT_CONST_S16;
-break;
-case 'J':
-ct->ct |= TCG_CT_CONST_S32;
-break;
-case 'Z':
-ct->ct |= TCG_CT_CONST_ZERO;
-break;
-default:
-return NULL;
-}
-return ct_str;
-}
-
 /* Test if a constant matches the constraint. */
 static int tcg_target_const_match(tcg_target_long val, TCGType type,
   const TCGArgConstraint *arg_ct)
-- 
2.25.1

[PULL 02/24] tcg/tci: Remove TCG_TARGET_HAS_* ifdefs

The opcodes always exist, regardless of whether or not they
are enabled.  Remove the unnecessary ifdefs.

Signed-off-by: Richard Henderson 
---
 tcg/tci/tcg-target.c.inc | 82 
 1 file changed, 82 deletions(-)

diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 9c45f5f88f..b62e14d5ce 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -71,70 +71,42 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
 { INDEX_op_add_i32, { R, RI, RI } },
 { INDEX_op_sub_i32, { R, RI, RI } },
 { INDEX_op_mul_i32, { R, RI, RI } },
-#if TCG_TARGET_HAS_div_i32
 { INDEX_op_div_i32, { R, R, R } },
 { INDEX_op_divu_i32, { R, R, R } },
 { INDEX_op_rem_i32, { R, R, R } },
 { INDEX_op_remu_i32, { R, R, R } },
-#elif TCG_TARGET_HAS_div2_i32
-{ INDEX_op_div2_i32, { R, R, "0", "1", R } },
-{ INDEX_op_divu2_i32, { R, R, "0", "1", R } },
-#endif
 /* TODO: Does R, RI, RI result in faster code than R, R, RI?
If both operands are constants, we can optimize. */
 { INDEX_op_and_i32, { R, RI, RI } },
-#if TCG_TARGET_HAS_andc_i32
 { INDEX_op_andc_i32, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_eqv_i32
 { INDEX_op_eqv_i32, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_nand_i32
 { INDEX_op_nand_i32, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_nor_i32
 { INDEX_op_nor_i32, { R, RI, RI } },
-#endif
 { INDEX_op_or_i32, { R, RI, RI } },
-#if TCG_TARGET_HAS_orc_i32
 { INDEX_op_orc_i32, { R, RI, RI } },
-#endif
 { INDEX_op_xor_i32, { R, RI, RI } },
 { INDEX_op_shl_i32, { R, RI, RI } },
 { INDEX_op_shr_i32, { R, RI, RI } },
 { INDEX_op_sar_i32, { R, RI, RI } },
-#if TCG_TARGET_HAS_rot_i32
 { INDEX_op_rotl_i32, { R, RI, RI } },
 { INDEX_op_rotr_i32, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_deposit_i32
 { INDEX_op_deposit_i32, { R, "0", R } },
-#endif
 
 { INDEX_op_brcond_i32, { R, RI } },
 
 { INDEX_op_setcond_i32, { R, R, RI } },
-#if TCG_TARGET_REG_BITS == 64
 { INDEX_op_setcond_i64, { R, R, RI } },
-#endif /* TCG_TARGET_REG_BITS == 64 */
 
-#if TCG_TARGET_REG_BITS == 32
 /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
 { INDEX_op_add2_i32, { R, R, R, R, R, R } },
 { INDEX_op_sub2_i32, { R, R, R, R, R, R } },
 { INDEX_op_brcond2_i32, { R, R, RI, RI } },
 { INDEX_op_mulu2_i32, { R, R, R, R } },
 { INDEX_op_setcond2_i32, { R, R, R, RI, RI } },
-#endif
 
-#if TCG_TARGET_HAS_not_i32
 { INDEX_op_not_i32, { R, R } },
-#endif
-#if TCG_TARGET_HAS_neg_i32
 { INDEX_op_neg_i32, { R, R } },
-#endif
 
-#if TCG_TARGET_REG_BITS == 64
 { INDEX_op_ld8u_i64, { R, R } },
 { INDEX_op_ld8s_i64, { R, R } },
 { INDEX_op_ld16u_i64, { R, R } },
@@ -151,81 +123,39 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
 { INDEX_op_add_i64, { R, RI, RI } },
 { INDEX_op_sub_i64, { R, RI, RI } },
 { INDEX_op_mul_i64, { R, RI, RI } },
-#if TCG_TARGET_HAS_div_i64
 { INDEX_op_div_i64, { R, R, R } },
 { INDEX_op_divu_i64, { R, R, R } },
 { INDEX_op_rem_i64, { R, R, R } },
 { INDEX_op_remu_i64, { R, R, R } },
-#elif TCG_TARGET_HAS_div2_i64
-{ INDEX_op_div2_i64, { R, R, "0", "1", R } },
-{ INDEX_op_divu2_i64, { R, R, "0", "1", R } },
-#endif
 { INDEX_op_and_i64, { R, RI, RI } },
-#if TCG_TARGET_HAS_andc_i64
 { INDEX_op_andc_i64, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_eqv_i64
 { INDEX_op_eqv_i64, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_nand_i64
 { INDEX_op_nand_i64, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_nor_i64
 { INDEX_op_nor_i64, { R, RI, RI } },
-#endif
 { INDEX_op_or_i64, { R, RI, RI } },
-#if TCG_TARGET_HAS_orc_i64
 { INDEX_op_orc_i64, { R, RI, RI } },
-#endif
 { INDEX_op_xor_i64, { R, RI, RI } },
 { INDEX_op_shl_i64, { R, RI, RI } },
 { INDEX_op_shr_i64, { R, RI, RI } },
 { INDEX_op_sar_i64, { R, RI, RI } },
-#if TCG_TARGET_HAS_rot_i64
 { INDEX_op_rotl_i64, { R, RI, RI } },
 { INDEX_op_rotr_i64, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_deposit_i64
 { INDEX_op_deposit_i64, { R, "0", R } },
-#endif
 { INDEX_op_brcond_i64, { R, RI } },
 
-#if TCG_TARGET_HAS_ext8s_i64
 { INDEX_op_ext8s_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext16s_i64
 { INDEX_op_ext16s_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext32s_i64
 { INDEX_op_ext32s_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext8u_i64
 { INDEX_op_ext8u_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext16u_i64
 { INDEX_op_ext16u_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext32u_i64
 { INDEX_op_ext32u_i64, { R, R } },
-#endif
 { INDEX_op_ext_i32_i64, { R, R } },
 { INDEX_op_extu_i32_i64, { R, R } },
-#if TCG_TARGET_HAS_bswap16_i64
 { INDEX_op_bswap16_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_bswap32_i64
 { INDEX_op_bswap32_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_bswap64_i64
 { INDEX_op_bswap64_i64, { R, R } },
-#end

[PULL 06/24] tcg/arm: Split out target constraints to tcg-target-con-str.h

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target-con-str.h | 22 +++
 tcg/arm/tcg-target.h |  1 +
 tcg/arm/tcg-target.c.inc | 74 +---
 3 files changed, 41 insertions(+), 56 deletions(-)
 create mode 100644 tcg/arm/tcg-target-con-str.h

diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
new file mode 100644
index 00..a0ab7747db
--- /dev/null
+++ b/tcg/arm/tcg-target-con-str.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define Arm target-specific operand constraints.
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('l', ALL_QLOAD_REGS)
+REGS('s', ALL_QSTORE_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_ARM)
+CONST('K', TCG_CT_CONST_INV)
+CONST('N', TCG_CT_CONST_NEG)
+CONST('Z', TCG_CT_CONST_ZERO)
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 8d1fee6327..16336cd545 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -142,5 +142,6 @@ void tb_target_set_jmp_target(uintptr_t, uintptr_t, 
uintptr_t, uintptr_t);
 #define TCG_TARGET_NEED_LDST_LABELS
 #endif
 #define TCG_TARGET_NEED_POOL_LABELS
+#define TCG_TARGET_CON_STR_H
 
 #endif
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index c2b26b3c45..bbd41d2491 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -237,65 +237,27 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 #define TCG_CT_CONST_NEG  0x400
 #define TCG_CT_CONST_ZERO 0x800
 
-/* parse target specific constraints */
-static const char *target_parse_constraint(TCGArgConstraint *ct,
-   const char *ct_str, TCGType type)
-{
-switch (*ct_str++) {
-case 'I':
-ct->ct |= TCG_CT_CONST_ARM;
-break;
-case 'K':
-ct->ct |= TCG_CT_CONST_INV;
-break;
-case 'N': /* The gcc constraint letter is L, already used here.  */
-ct->ct |= TCG_CT_CONST_NEG;
-break;
-case 'Z':
-ct->ct |= TCG_CT_CONST_ZERO;
-break;
+#define ALL_GENERAL_REGS  0xu
 
-case 'r':
-ct->regs = 0x;
-break;
-
-/* qemu_ld address */
-case 'l':
-ct->regs = 0x;
+/*
+ * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
+ * and r0-r1 doing the byte swapping, so don't use these.
+ * r3 is removed for softmmu to avoid clashes with helper arguments.
+ */
 #ifdef CONFIG_SOFTMMU
-/* r0-r2,lr will be overwritten when reading the tlb entry,
-   so don't use these. */
-tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
+#define ALL_QLOAD_REGS \
+(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
+  (1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
+  (1 << TCG_REG_R14)))
+#define ALL_QSTORE_REGS \
+(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
+  (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
+  ((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
+#else
+#define ALL_QLOAD_REGS   ALL_GENERAL_REGS
+#define ALL_QSTORE_REGS \
+(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
 #endif
-break;
-
-/* qemu_st address & data */
-case 's':
-ct->regs = 0x;
-/* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
-   and r0-r1 doing the byte swapping, so don't use these. */
-tcg_regset_reset_reg(ct->regs, TCG_REG_R0);
-tcg_regset_reset_reg(ct->regs, TCG_REG_R1);
-#if defined(CONFIG_SOFTMMU)
-/* Avoid clashes with registers being used for helper args */
-tcg_regset_reset_reg(ct->regs, TCG_REG_R2);
-#if TARGET_LONG_BITS == 64
-/* Avoid clashes with registers being used for helper args */
-tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
-#endif
-tcg_regset_reset_reg(ct->regs, TCG_REG_R14);
-#endif
-break;
-
-default:
-return NULL;
-}
-return ct_str;
-}
 
 static inline uint32_t rotl(uint32_t val, int n)
 {
-- 
2.25.1

[PULL 03/24] tcg/i386: Move constraint type check to tcg_target_const_match

Rather than check the type when filling in the constraint,
check it when matching the constant.  This removes the only
use of the type argument to target_parse_constraint.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 050f3cb0b1..540debdf34 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -263,13 +263,13 @@ static const char 
*target_parse_constraint(TCGArgConstraint *ct,
 break;
 
 case 'e':
-ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
+ct->ct |= TCG_CT_CONST_S32;
 break;
 case 'Z':
-ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
+ct->ct |= TCG_CT_CONST_U32;
 break;
 case 'I':
-ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
+ct->ct |= TCG_CT_CONST_I32;
 break;
 
 default:
@@ -286,14 +286,20 @@ static inline int tcg_target_const_match(tcg_target_long 
val, TCGType type,
 if (ct & TCG_CT_CONST) {
 return 1;
 }
-if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
-return 1;
-}
-if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
-return 1;
-}
-if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
-return 1;
+if (type == TCG_TYPE_I32) {
+if (ct & (TCG_CT_CONST_S32 | TCG_CT_CONST_U32 | TCG_CT_CONST_I32)) {
+return 1;
+}
+} else {
+if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+return 1;
+}
+if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+return 1;
+}
+if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
+return 1;
+}
 }
 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
 return 1;
-- 
2.25.1

[PULL 00/24] tcg patch queue

The following changes since commit 77f3804ab7ed94b471a14acb260e5aeacf26193f:

  Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging 
(2021-02-02 16:47:51 +)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210202

for you to fetch changes up to 0c823e596877a30fd6c17a1ae9f98218a53055ea:

  tcg: Remove TCG_TARGET_CON_SET_H (2021-02-02 12:12:43 -1000)


TCG backend constraints cleanup


Richard Henderson (24):
  tcg/tci: Drop L and S constraints
  tcg/tci: Remove TCG_TARGET_HAS_* ifdefs
  tcg/i386: Move constraint type check to tcg_target_const_match
  tcg/i386: Tidy register constraint definitions
  tcg/i386: Split out target constraints to tcg-target-con-str.h
  tcg/arm: Split out target constraints to tcg-target-con-str.h
  tcg/aarch64: Split out target constraints to tcg-target-con-str.h
  tcg/ppc: Split out target constraints to tcg-target-con-str.h
  tcg/tci: Split out target constraints to tcg-target-con-str.h
  tcg/mips: Split out target constraints to tcg-target-con-str.h
  tcg/riscv: Split out target constraints to tcg-target-con-str.h
  tcg/s390: Split out target constraints to tcg-target-con-str.h
  tcg/sparc: Split out target constraints to tcg-target-con-str.h
  tcg: Remove TCG_TARGET_CON_STR_H
  tcg/i386: Split out constraint sets to tcg-target-con-set.h
  tcg/aarch64: Split out constraint sets to tcg-target-con-set.h
  tcg/arm: Split out constraint sets to tcg-target-con-set.h
  tcg/mips: Split out constraint sets to tcg-target-con-set.h
  tcg/ppc: Split out constraint sets to tcg-target-con-set.h
  tcg/riscv: Split out constraint sets to tcg-target-con-set.h
  tcg/s390: Split out constraint sets to tcg-target-con-set.h
  tcg/sparc: Split out constraint sets to tcg-target-con-set.h
  tcg/tci: Split out constraint sets to tcg-target-con-set.h
  tcg: Remove TCG_TARGET_CON_SET_H

 tcg/aarch64/tcg-target-con-set.h |  36 
 tcg/aarch64/tcg-target-con-str.h |  24 +++
 tcg/arm/tcg-target-con-set.h |  35 
 tcg/arm/tcg-target-con-str.h |  22 +++
 tcg/i386/tcg-target-con-set.h|  55 ++
 tcg/i386/tcg-target-con-str.h|  33 
 tcg/mips/tcg-target-con-set.h|  36 
 tcg/mips/tcg-target-con-str.h|  24 +++
 tcg/ppc/tcg-target-con-set.h |  42 +
 tcg/ppc/tcg-target-con-str.h |  30 
 tcg/riscv/tcg-target-con-set.h   |  30 
 tcg/riscv/tcg-target-con-str.h   |  21 +++
 tcg/s390/tcg-target-con-set.h|  29 
 tcg/s390/tcg-target-con-str.h|  28 +++
 tcg/sparc/tcg-target-con-set.h   |  32 
 tcg/sparc/tcg-target-con-str.h   |  23 +++
 tcg/sparc/tcg-target.h   |   4 -
 tcg/tci/tcg-target-con-set.h |  25 +++
 tcg/tci/tcg-target-con-str.h |  11 ++
 tcg/tcg.c| 136 +--
 tcg/aarch64/tcg-target.c.inc | 137 ---
 tcg/arm/tcg-target.c.inc | 168 ++
 tcg/i386/tcg-target.c.inc| 317 +++---
 tcg/mips/tcg-target.c.inc| 173 ++-
 tcg/ppc/tcg-target.c.inc | 209 ---
 tcg/riscv/tcg-target.c.inc   | 135 ---
 tcg/s390/tcg-target.c.inc| 174 +++
 tcg/sparc/tcg-target.c.inc   | 156 ++---
 tcg/tci/tcg-target.c.inc | 359 ++-
 29 files changed, 1244 insertions(+), 1260 deletions(-)
 create mode 100644 tcg/aarch64/tcg-target-con-set.h
 create mode 100644 tcg/aarch64/tcg-target-con-str.h
 create mode 100644 tcg/arm/tcg-target-con-set.h
 create mode 100644 tcg/arm/tcg-target-con-str.h
 create mode 100644 tcg/i386/tcg-target-con-set.h
 create mode 100644 tcg/i386/tcg-target-con-str.h
 create mode 100644 tcg/mips/tcg-target-con-set.h
 create mode 100644 tcg/mips/tcg-target-con-str.h
 create mode 100644 tcg/ppc/tcg-target-con-set.h
 create mode 100644 tcg/ppc/tcg-target-con-str.h
 create mode 100644 tcg/riscv/tcg-target-con-set.h
 create mode 100644 tcg/riscv/tcg-target-con-str.h
 create mode 100644 tcg/s390/tcg-target-con-set.h
 create mode 100644 tcg/s390/tcg-target-con-str.h
 create mode 100644 tcg/sparc/tcg-target-con-set.h
 create mode 100644 tcg/sparc/tcg-target-con-str.h
 create mode 100644 tcg/tci/tcg-target-con-set.h
 create mode 100644 tcg/tci/tcg-target-con-str.h

[PULL 01/24] tcg/tci: Drop L and S constraints

These are identical to the 'r' constraint.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 tcg/tci/tcg-target.c.inc | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 15981265db..9c45f5f88f 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -46,11 +46,11 @@
 # define R64"r"
 #endif
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-# define L  "L", "L"
-# define S  "S", "S"
+# define L  "r", "r"
+# define S  "r", "r"
 #else
-# define L  "L"
-# define S  "S"
+# define L  "r"
+# define S  "r"
 #endif
 
 /* TODO: documentation. */
@@ -390,8 +390,6 @@ static const char *target_parse_constraint(TCGArgConstraint 
*ct,
 {
 switch (*ct_str++) {
 case 'r':
-case 'L':   /* qemu_ld constraint */
-case 'S':   /* qemu_st constraint */
 ct->regs = BIT(TCG_TARGET_NB_REGS) - 1;
 break;
 default:
-- 
2.25.1

Re: [PATCH] target/avr/cpu: Use device_class_set_parent_realize()

On 1/31/21 10:03 PM, Philippe Mathieu-Daudé wrote:
> Change generated automatically using the Coccinelle
> patch included in commit bf853881690 ("qdev: use
> device_class_set_parent_realize/unrealize/reset()")
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  target/avr/cpu.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)

Reviewed-by: Richard Henderson 

r~

[RFC PATCH v1 00/01] PCIe DOE for PCIe and CXL 2.0

2021-02-02 Thread Chris Browy


 PCIe Data Object Exchange (DOE) protocol for PCIe and CXL is available


https://gitlab.com/avery-qemu/cxl2.0-v3-doe/

based on Ben Widawsky's CXL QEMU cxl2.0-v3 gitlab branch

https://lore.kernel.org/qemu-devel/20210202005948.241655-1-ben.widaw...@intel.com

which is located at

https://gitlab.com/bwidawsk/qemu

The changes from Ben’s latest cxl-2.0v3 are:

 MAINTAINERS   |   7 +
 hw/cxl/cxl-component-utils.c  | 151 +++
 hw/mem/cxl_type3.c| 121 +
 hw/pci/meson.build|   1 +
 hw/pci/pcie.c |   4 +-
 hw/pci/pcie_doe.c | 360 +
 include/hw/cxl/cxl_component.h| 120 +
 include/hw/cxl/cxl_pci.h  | 428
 ++
 include/hw/pci/pcie.h |   5 +
 include/hw/pci/pcie_doe.h | 130 +
 include/hw/pci/pcie_regs.h|   4 +
 include/standard-headers/linux/pci_regs.h |   3 +-
 12 files changed, 1332 insertions(+), 2 deletions(-)

The DOE protocol defines a mailbox method that allows either UEFI or OS 
methods

to read the device and do further setup of ACPI tables, etc.

There are 2 PCIe DOE protocols (PCI-SIG ECN Data Object Exchange (DOE) March 
2020)

- Discovery
- Component Measurement (CMA)

And 2 CXL specific ones:
- Compliance Mode (Compute Express Link Specification September 2, 2020
  Revision: 2.0, Version 1.0)
- CDAT (Coherent Device Attribute Table (CDAT) Specification
   October 2020 Revision 1.02)

For CXL, the CDAT table defines the memory device so that UEFI or OS can read 
it out of device using DOE and then can configure the system’s ACPI SRAT/HMAT

tables for system memory, and DEVSEC, Component, and Device registers in CXL
device.

Current version provides fixed CDAT table defined in the CXL Type3 device
model. Updates are planned shortly to allow for user to provide CDAT tables
through -device option property to vary from run to run.  The format will be
ASCII with structure/field-values pairs that are read by the device during
initialization as shown here:

-device 
cxl-type3,bus=rp0,memdev=cxl-mem1,id=cxl-pmem0,size=256M,cdat_file=


For testing, a cxl_app.c user program is enhanced to test all supported DOE 
protocols which are comprised of sequences of CFG RD/WR to various DOE cap
registers.  The Linux kernel updates and CXL Type3 Device driver provide 
sufficient ioctl() support to exercise the DOE protocol. See


https://lore.kernel.org/linux-cxl/20210130002438.1872527-1-ben.widaw...@intel.com

 cxl_app.c
#include 
#include 
#include 
#include 
#include 

#include "cxl_mem_wrapper.h"

const char* help= "\
-h   help message\n\
-query   IOCTL CXL_MEM_QUERY_COMMANDS\n\
-cfg_rd [0xoffset]   IOCTL CXL_MEM_CONFIG_WR Read Hex\n\
-cfg_wr [0xoffset] [0xaddr]  IOCTL CXL_MEM_CONFIG_WR Write Hex\n\
-doe_discovery [0xindex=0-3] IOCTL CXL_MEM_CONFIG_WR Write Hex\n\
-doe_cxl [0xprotocol=0 or 2] [0xreq_code=0,1 for protocol=0]\n\
-doe_cma [0xnum = 0] IOCTL CXL_MEM_CONFIG_WR Write Hex\n\
example:\n\
./cxl_app.exe -cfg_rd 0x00\n\
./cxl_app.exe -cfg_wr 0x10 0x00ff0004\n\
./cxl_app.exe -doe_discovery 0\n\
./cxl_app.exe -doe_cxl 2\n\
./cxl_app.exe -doe_cxl 0 1\n\
./cxl_app.exe -doe_cma 0\n\
";
#define READ  0 #define WRITE 1

int FD;
typedef struct cxl_pdev_config cxl_pdev_config;

int cxl_query() {
typedef struct cxl_mem_query_commands cxl_mem_query_commands;
typedef struct cxl_command_info cxl_command_info;
int n_cmds= 0;
// QUERY with n_commands == 0 to get command size
ioctl(FD, CXL_MEM_QUERY_COMMANDS, &n_cmds);
printf("Querying\n");

cxl_mem_query_commands* cmds= malloc(sizeof(cxl_mem_query_commands)
+ n_cmds * sizeof(cxl_command_info));
cmds->n_commands= n_cmds;
// QUERY with command size & pre-alloc memory
ioctl(FD, CXL_MEM_QUERY_COMMANDS, cmds);

for (int i= 0; i < (int)cmds->n_commands; i++) {
printf(" id %d", cmds->commands[i].id);
printf(" flags %d", cmds->commands[i].flags);
printf(" size_in %d", cmds->commands[i].size_in);
printf(" size_out %d\n", cmds->commands[i].size_out);
}

   return 0;
};

int cxl_config(char* offset_s, char* data_s) {
int offset, data, is_write;
cxl_pdev_config* config_payload= malloc(sizeof(cxl_pdev_config));
if (data_s == NULL)
is_write= 0;
else {
is_write= 1;
data= strtol(data_s, NULL, 16);
}
offset= strtol(offset_s, NULL, 16);

config_payload->offset= offset;
config_payload->data= data;
config_payload->is_write= is_write;
ioctl(FD, CXL_MEM_CONFIG_WR, config_payload);
printf("CONFIG_WR %s [%0x] ", (is_write)? "write" : "read",
config_payload->offset);
for (int i= 0; i < 32; i += 8) printf("

Re: [PATCH v3 2/4] target/arm: Add support for FEAT_DIT, Data Independent Timing

On 2/2/21 12:21 PM, Rebecca Cran wrote:
> On 1/27/21 10:06 PM, Richard Henderson wrote:
>> In particular: CPSR.DIT (bit 21) -> SPSR_EL1.DIT (bit 24), and merging
>> PSTATE.SS into SPSR_EL1.SS (bit 21).
> 
> Thanks. I _think_ I'm understanding it better now. Would the following work? I
> don't see where I need to map PSTATE.SS into SPSR_EL1.SS though, because isn't
> that handled automatically since PSTATE maps onto SPSR?
> 
> 
> diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
> index a6b162049806..c1ff24d42f32 100644
> --- a/target/arm/helper-a64.c
> +++ b/target/arm/helper-a64.c
> @@ -1003,6 +1003,11 @@ void HELPER(exception_return)(CPUARMState *env, 
> uint64_t
> new_pc)
>  if (!arm_singlestep_active(env)) {
>  env->pstate &= ~PSTATE_SS;
>  }
> +
> +    if (spsr & PSTATE_DIT) {
> +    env->uncached_cpsr |= CPSR_DIT;
> +    }

This is missing the restore of PSTATE_SS for when singlestep *is* active.

> @@ -9426,6 +9426,12 @@ static void take_aarch32_exception(CPUARMState *env, 
> int
> new_mode,
>   */
>  env->pstate &= ~PSTATE_SS;
>  env->spsr = cpsr_read(env);
> +
> +    if (env->uncached_cpsr & CPSR_DIT) {
> +    env->spsr |= PSTATE_DIT;
> +    env->spsr &= ~PSTATE_SS;
> +    }

This one is incorrect because we're not storing to SPSR_ELx format, but SPSR
(the aa32 version), which has DIT at bit 21.

> @@ -9905,6 +9911,11 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
>  old_mode = cpsr_read(env);
>  env->elr_el[new_el] = env->regs[15];
> 
> +    if (old_mode & CPSR_DIT) {
> +    old_mode |= PSTATE_DIT;
> +    old_mode &= ~PSTATE_SS;

This line would be clearer using CPSR_DIT.  I don't see PSTATE_SS being added
to old_mode.  Is that somewhere else, or simply missing context here?

I think it would be clearer to add some new helpers.  Naming is always
difficult, but how about:

static uint32_t cpsr_read_for_spsr_elx(CPUARMState *env)
{
uint32_t ret = cpsr_read(env);

/* Move DIT to the correct location for SPSR_ELx */
if (ret & CPSR_DIT) {
ret &= ~CPSR_DIT;
ret |= PSTATE_DIT;
}
/* Merge PSTATE.SS into SPSR_ELx */
ret |= env->pstate & PSTATE_SS;

return ret;
}

static void cpsr_write_from_spsr_elx(CPUARMState *env,
 uint32_t val)
{
uint32_t mask;

/* Save SPSR_ELx.SS into PSTATE. */
env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS);
val &= ~PSTATE_SS;

/* Move DIT to the correct location for CPSR */
if (val & PSTATE_DIT) {
val &= ~PSTATE_DIT;
val |= CPSR_DIT;
}

mask = aarch32_cpsr_valid_mask(env->features, \
&env_archcpu(env)->isar);
cpsr_write(env, val, mask, CPSRWriteRaw);
}


r~

Re: [PATCH v2 2/2] sev: update sev-inject-launch-secret to make gpa optional

2021-02-02 Thread James Bottomley

On Tue, 2021-01-26 at 12:32 +, Dr. David Alan Gilbert wrote:
> * James Bottomley (j...@linux.ibm.com) wrote:
> > If the gpa isn't specified, it's value is extracted from the OVMF
> > properties table located below the reset vector (and if this
> > doesn't
> > exist, an error is returned).  OVMF has defined the GUID for the
> > SEV
> > secret area as 4c2eb361-7d9b-4cc3-8081-127c90d3d294 and the format
> > of
> > the  is: | where both are uint32_t.  We extract
> >  and use it as the gpa for the injection.
> > 
> > Note: it is expected that the injected secret will also be GUID
> > described but since qemu can't interpret it, the format is left
> > undefined here.
> > 
> > Signed-off-by: James Bottomley 
> > 
> > ---
> > 
> > v2: fix line length warning, add more comments about sev area
> > ---
> >  qapi/misc-target.json |  2 +-
> >  target/i386/monitor.c | 27 ++-
> >  2 files changed, 27 insertions(+), 2 deletions(-)
> > 
> > diff --git a/qapi/misc-target.json b/qapi/misc-target.json
> > index 06ef8757f0..0c7491cd82 100644
> > --- a/qapi/misc-target.json
> > +++ b/qapi/misc-target.json
> > @@ -216,7 +216,7 @@
> >  #
> >  ##
> >  { 'command': 'sev-inject-launch-secret',
> > -  'data': { 'packet-header': 'str', 'secret': 'str', 'gpa':
> > 'uint64' },
> > +  'data': { 'packet-header': 'str', 'secret': 'str', '*gpa':
> > 'uint64' },
> >'if': 'defined(TARGET_I386)' }
> >  
> >  ##
> > diff --git a/target/i386/monitor.c b/target/i386/monitor.c
> > index 1bc91442b1..11bdb04155 100644
> > --- a/target/i386/monitor.c
> > +++ b/target/i386/monitor.c
> > @@ -34,6 +34,7 @@
> >  #include "sev_i386.h"
> >  #include "qapi/qapi-commands-misc-target.h"
> >  #include "qapi/qapi-commands-misc.h"
> > +#include "hw/i386/pc.h"
> >  
> >  /* Perform linear address sign extension */
> >  static hwaddr addr_canonical(CPUArchState *env, hwaddr addr)
> > @@ -730,9 +731,33 @@ SevCapability
> > *qmp_query_sev_capabilities(Error **errp)
> >  return sev_get_capabilities(errp);
> >  }
> >  
> > +#define SEV_SECRET_GUID "4c2eb361-7d9b-4cc3-8081-127c90d3d294"
> > +struct sev_secret_area {
> > +uint32_t base;
> > +uint32_t size;
> > +};
> > +
> >  void qmp_sev_inject_launch_secret(const char *packet_hdr,
> > -  const char *secret, uint64_t
> > gpa,
> > +  const char *secret,
> > +  bool has_gpa, uint64_t gpa,
> >Error **errp)
> >  {
> > +if (!has_gpa) {
> > +uint8_t *data;
> > +struct sev_secret_area *area;
> > +
> > +/*
> > + * not checking length means that this area can't be
> > versioned
> > + * by length and would have to be replaced if updated
> > + */
> 
> Can you just explain that a bit more?

It's referring back to the original concept that the reset vector
length would tell you what version of the thing you were using.  So if
you were looking for a property at offset 10 and the length came in as
8 the version was too early.  If it was 18 you had a later version and
your property was present.

The current scheme uses guids which can be versioned by length if you
think you'll add extra properties to them.  This one I don't think
would ever get an extra property, so there's no point checking the
length.  Not checking the length means if I'm wrong and we do need an
extra property it will have to be attached to a new guid.

That's a bit confusing to add to the comment ... how about I just leave
out the comment entirely?

> > +if (!pc_system_ovmf_table_find(SEV_SECRET_GUID, &data,
> > NULL)) {
> > +error_setg(errp, "SEV: no secret area found in OVMF,"
> > +   " gpa must be specified.");
> > +return;
> > +}
> > +area = (struct sev_secret_area *)data;
> > +gpa = area->base;
> > +}
> > +
> >  sev_inject_launch_secret(packet_hdr, secret, gpa, errp);
> 
> Other than me not understanding that comment, I think we're fine:

Thanks.

> Reviewed-by: Dr. David Alan Gilbert 
> 
> >  }
> > -- 
> > 2.26.2
> > 
> >

Re: [PATCH v2 1/2] pc: add parser for OVMF reset block

2021-02-02 Thread James Bottomley

On Tue, 2021-01-26 at 12:22 +, Dr. David Alan Gilbert wrote:
> * James Bottomley (j...@linux.ibm.com) wrote:
> > OVMF is developing a mechanism for depositing a GUIDed table just
> > below the known location of the reset vector.  The table goes
> > backwards in memory so all entries are of the form
> > 
> > |len|
> > 
> > Where  is arbtrary size and type,  is a uint16_t and
> > describes the entire length of the entry from the beginning of the
> > data to the end of the guid.
> > 
> > The foot of the table is of this form and  for this case
> > describes the entire size of the table.  The table foot GUID is
> > defined by OVMF as 96b582de-1fb2-45f7-baea-a366c55a082d and if the
> > table is present this GUID is just below the reset vector, 48 bytes
> > before the end of the firmware file.
> > 
> > Add a parser for the ovmf reset block which takes a copy of the
> > block,
> > if the table foot guid is found, minus the footer and a function
> > for
> > later traversal to return the data area of any specified GUIDs.
> > 
> > Signed-off-by: James Bottomley 
> > 
> > ---
> > 
> > v2: fix brace warnings and return values
> > ---
> >  hw/i386/pc_sysfw.c   | 106
> > +++
> >  include/hw/i386/pc.h |   4 ++
> >  2 files changed, 110 insertions(+)
> > 
> > diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
> > index 92e90ff013..436b78c587 100644
> > --- a/hw/i386/pc_sysfw.c
> > +++ b/hw/i386/pc_sysfw.c
> > @@ -124,6 +124,107 @@ void
> > pc_system_flash_cleanup_unused(PCMachineState *pcms)
> >  }
> >  }
> >  
> > +#define OVMF_TABLE_FOOTER_GUID "96b582de-1fb2-45f7-baea-
> > a366c55a082d"
> > +
> > +static uint8_t *ovmf_table;
> > +static int ovmf_table_len;
> > +
> > +static void pc_system_parse_ovmf_flash(uint8_t *flash_ptr, int
> > flash_size)
> 
> Maybe size_t for flash_size?

Heh, sure, who knows how big OVMF will get ...  but I get the point
about an int overflow attack.

> > +{
> > +uint8_t *ptr;
> > +QemuUUID guid;
> > +int tot_len;
> > +
> > +/* should only be called once */
> > +if (ovmf_table) {
> > +return;
> > +}
> > +
> > +/*
> > + * if this is OVMF there will be a table footer
> > + * guid 48 bytes before the end of the flash file.  If it's
> > + * not found, silently abort the flash parsing.
> > + */
> > +qemu_uuid_parse(OVMF_TABLE_FOOTER_GUID, &guid);
> > +guid = qemu_uuid_bswap(guid); /* guids are LE */
> > +ptr = flash_ptr + flash_size - 48;
> 
> I think since flash_size is coming from memory_region_size it's
> probably rounded to a page size by now, but perhaps we should always
> check we have enough space before we start moving pointers around

I think OVMF must be at least a page, so I can add that check.

> (Given that the OVMF binary might be provided by the guest owner, we
> have to consider it might be a vector to attack the hypervisor).
> 
> > +if (!qemu_uuid_is_equal((QemuUUID *)ptr, &guid)) {
> > +return;
> > +}
> > +
> > +/* if found, just before is two byte table length */
> > +ptr -= sizeof(uint16_t);
> > +tot_len = le16_to_cpu(*(uint16_t *)ptr) - sizeof(guid) -
> > sizeof(uint16_t);
> > +
> > +if (tot_len <= 0) {
> > +return;
> > +}
> > +
> > +ovmf_table = g_malloc(tot_len);
> > +ovmf_table_len = tot_len;
> > +
> > +/*
> > + * ptr is the foot of the table, so copy it all to the newly
> > + * allocated ovmf_table and then set the ovmf_table pointer
> > + * to the table foot
> > + */
> > +memcpy(ovmf_table, ptr - tot_len, tot_len);
> > +ovmf_table += tot_len;
> > +}
> > +
> > +bool pc_system_ovmf_table_find(const char *entry, uint8_t **data,
> > +   int *data_len)
> > +{
> > +uint8_t *ptr = ovmf_table;
> > +int tot_len = ovmf_table_len;
> > +QemuUUID entry_guid;
> > +
> > +if (qemu_uuid_parse(entry, &entry_guid) < 0) {
> > +return false;
> > +}
> > +
> > +if (!ptr) {
> > +return false;
> > +}
> > +
> > +entry_guid = qemu_uuid_bswap(entry_guid); /* guids are LE */
> > +while (tot_len > 0) {
> > +int len;
> > +QemuUUID *guid;
> > +
> > +/*
> > + * The data structure is
> > + *   arbitrary length data
> > + *   2 byte length of entire entry
> > + *   16 byte guid
> > + */
> > +guid = (QemuUUID *)(ptr - sizeof(QemuUUID));
> > +len = le16_to_cpu(*(uint16_t *)(ptr - sizeof(QemuUUID) -
> > +sizeof(uint16_t)));
> 
> Again I think we need to be checking tot_len > (sizeof(QemuUUID) +
> sizeof(uint16_t)) before doing this dereference.

I can make the loop start

  while (tot_len > sizeof(QemuUUID) + sizeof(uint16_t))

> > +/*
> > + * just in case the table is corrupt, wouldn't want to
> > spin in
> > + * the zero case
> > + */
> > +if (len < sizeof(QemuUUID) + sizeof(uint16_t

Re: [PATCH] hw/net: Add npcm7xx emc model

On Mon, Feb 1, 2021 at 9:11 AM Peter Maydell 
wrote:

> On Fri, 22 Jan 2021 at 00:34, dje--- via  wrote:
> >
> > This is a 10/100 ethernet device that has several features.
> > Only the ones needed by the Linux driver have been implemented.
> > See npcm7xx_emc.c for a list of unimplemented features.
> >
> > Reviewed-by: Hao Wu 
> > Reviewed-by: Avi Fishman 
> > Signed-off-by: Doug Evans 
> > ---
> >  docs/system/arm/nuvoton.rst|   3 +-
> >  hw/arm/npcm7xx.c   |  50 +-
> >  hw/net/meson.build |   1 +
> >  hw/net/npcm7xx_emc.c   | 852 +
> >  hw/net/trace-events|  17 +
> >  include/hw/arm/npcm7xx.h   |   2 +
> >  include/hw/net/npcm7xx_emc.h   | 286 +++
> >  tests/qtest/meson.build|   1 +
> >  tests/qtest/npcm7xx_emc-test.c | 793 ++
> >  9 files changed, 2002 insertions(+), 3 deletions(-)
> >  create mode 100644 hw/net/npcm7xx_emc.c
> >  create mode 100644 include/hw/net/npcm7xx_emc.h
> >  create mode 100644 tests/qtest/npcm7xx_emc-test.c
>
> Hi; could you split this into a multi-patch series so it's
> a bit more digestible to review, please ?
>  patch 1: new device
>  patch 2: add new device to the npcm7xx board
>  patch 3: device test case
>
> thanks
> -- PMM
>


Done, thx.
https://lists.nongnu.org/archive/html/qemu-devel/2021-02/msg00804.html

[PATCH v2 3/3] tests/qtests: Add npcm7xx emc model test

Reviewed-by: Hao Wu 
Reviewed-by: Avi Fishman 
Signed-off-by: Doug Evans 
---
 tests/qtest/meson.build|   1 +
 tests/qtest/npcm7xx_emc-test.c | 793 +
 2 files changed, 794 insertions(+)
 create mode 100644 tests/qtest/npcm7xx_emc-test.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index c83bc211b6..f7c369f3d5 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -136,6 +136,7 @@ qtests_sparc64 = \
 
 qtests_npcm7xx = \
   ['npcm7xx_adc-test',
+   'npcm7xx_emc-test',
'npcm7xx_gpio-test',
'npcm7xx_pwm-test',
'npcm7xx_rng-test',
diff --git a/tests/qtest/npcm7xx_emc-test.c b/tests/qtest/npcm7xx_emc-test.c
new file mode 100644
index 00..66c706454c
--- /dev/null
+++ b/tests/qtest/npcm7xx_emc-test.c
@@ -0,0 +1,793 @@
+/*
+ * QTests for Nuvoton NPCM7xx EMC Modules.
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "libqos/libqos.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qnum.h"
+#include "qemu/bitops.h"
+#include "qemu/iov.h"
+
+/* Name of the emc device. */
+#define TYPE_NPCM7XX_EMC "npcm7xx-emc"
+
+/* Timeout for various operations, in seconds. */
+#define TIMEOUT_SECONDS 10
+
+/* Address in memory of the descriptor. */
+#define DESC_ADDR (1 << 20) /* 1 MiB */
+
+/* Address in memory of the data packet. */
+#define DATA_ADDR (DESC_ADDR + 4096)
+
+#define CRC_LENGTH 4
+
+#define NUM_TX_DESCRIPTORS 3
+#define NUM_RX_DESCRIPTORS 2
+
+/* Size of tx,rx test buffers. */
+#define TX_DATA_LEN 64
+#define RX_DATA_LEN 64
+
+#define TX_STEP_COUNT 1
+#define RX_STEP_COUNT 1
+
+/* 32-bit register indices. */
+typedef enum NPCM7xxPWMRegister {
+/* Control registers. */
+REG_CAMCMR,
+REG_CAMEN,
+
+/* There are 16 CAMn[ML] registers. */
+REG_CAMM_BASE,
+REG_CAML_BASE,
+
+REG_TXDLSA = 0x22,
+REG_RXDLSA,
+REG_MCMDR,
+REG_MIID,
+REG_MIIDA,
+REG_FFTCR,
+REG_TSDR,
+REG_RSDR,
+REG_DMARFC,
+REG_MIEN,
+
+/* Status registers. */
+REG_MISTA,
+REG_MGSTA,
+REG_MPCNT,
+REG_MRPC,
+REG_MRPCC,
+REG_MREPC,
+REG_DMARFS,
+REG_CTXDSA,
+REG_CTXBSA,
+REG_CRXDSA,
+REG_CRXBSA,
+
+NPCM7XX_NUM_EMC_REGS,
+} NPCM7xxPWMRegister;
+
+enum { NUM_CAMML_REGS = 16 };
+
+/* REG_CAMCMR fields */
+/* Enable CAM Compare */
+#define REG_CAMCMR_ECMP (1 << 4)
+/* Accept Unicast Packet */
+#define REG_CAMCMR_AUP (1 << 0)
+
+/* REG_MCMDR fields */
+/* Software Reset */
+#define REG_MCMDR_SWR (1 << 24)
+/* Frame Transmission On */
+#define REG_MCMDR_TXON (1 << 8)
+/* Accept Long Packet */
+#define REG_MCMDR_ALP (1 << 1)
+/* Frame Reception On */
+#define REG_MCMDR_RXON (1 << 0)
+
+/* REG_MIEN fields */
+/* Enable Transmit Completion Interrupt */
+#define REG_MIEN_ENTXCP (1 << 18)
+/* Enable Transmit Interrupt */
+#define REG_MIEN_ENTXINTR (1 << 16)
+/* Enable Receive Good Interrupt */
+#define REG_MIEN_ENRXGD (1 << 4)
+/* ENable Receive Interrupt */
+#define REG_MIEN_ENRXINTR (1 << 0)
+
+/* REG_MISTA fields */
+/* Transmit Bus Error Interrupt */
+#define REG_MISTA_TXBERR (1 << 24)
+/* Transmit Descriptor Unavailable Interrupt */
+#define REG_MISTA_TDU (1 << 23)
+/* Transmit Completion Interrupt */
+#define REG_MISTA_TXCP (1 << 18)
+/* Transmit Interrupt */
+#define REG_MISTA_TXINTR (1 << 16)
+/* Receive Bus Error Interrupt */
+#define REG_MISTA_RXBERR (1 << 11)
+/* Receive Descriptor Unavailable Interrupt */
+#define REG_MISTA_RDU (1 << 10)
+/* DMA Early Notification Interrupt */
+#define REG_MISTA_DENI (1 << 9)
+/* Maximum Frame Length Interrupt */
+#define REG_MISTA_DFOI (1 << 8)
+/* Receive Good Interrupt */
+#define REG_MISTA_RXGD (1 << 4)
+/* Packet Too Long Interrupt */
+#define REG_MISTA_PTLE (1 << 3)
+/* Receive Interrupt */
+#define REG_MISTA_RXINTR (1 << 0)
+
+typedef struct NPCM7xxEMCTxDesc NPCM7xxEMCTxDesc;
+typedef struct NPCM7xxEMCRxDesc NPCM7xxEMCRxDesc;
+
+struct NPCM7xxEMCTxDesc {
+uint32_t flags;
+uint32_t txbsa;
+uint32_t status_and_length;
+uint32_t ntxdsa;
+};
+
+struct NPCM7xxEMCRxDesc {
+uint32_t status_and_length;
+uint32_t rxbsa;
+uint32_t reserved;
+uint32_t nrxdsa;
+};
+
+/* NPCM7xxEMCTxDesc.flags values */
+/* Owner: 0 = cpu, 1 = emc */
+#define TX_DESC_FLAG_OWNER_MASK (1 << 31)
+/* Transmit interrupt enable */
+#define TX_DESC_FLAG_INTEN (1 << 2)
+
+/* NPCM7xxEMCTxDesc.status_and_length values */
+/* Transmission complete */
+#define

[PATCH v2 1/3] hw/net: Add npcm7xx emc model

This is a 10/100 ethernet device that has several features.
Only the ones needed by the Linux driver have been implemented.
See npcm7xx_emc.c for a list of unimplemented features.

Reviewed-by: Hao Wu 
Reviewed-by: Avi Fishman 
Signed-off-by: Doug Evans 
---
 hw/net/meson.build   |   1 +
 hw/net/npcm7xx_emc.c | 852 +++
 hw/net/trace-events  |  17 +
 include/hw/net/npcm7xx_emc.h | 286 
 4 files changed, 1156 insertions(+)
 create mode 100644 hw/net/npcm7xx_emc.c
 create mode 100644 include/hw/net/npcm7xx_emc.h

diff --git a/hw/net/meson.build b/hw/net/meson.build
index 4a7051b54a..af0749c42b 100644
--- a/hw/net/meson.build
+++ b/hw/net/meson.build
@@ -35,6 +35,7 @@ softmmu_ss.add(when: 'CONFIG_I82596_COMMON', if_true: 
files('i82596.c'))
 softmmu_ss.add(when: 'CONFIG_SUNHME', if_true: files('sunhme.c'))
 softmmu_ss.add(when: 'CONFIG_FTGMAC100', if_true: files('ftgmac100.c'))
 softmmu_ss.add(when: 'CONFIG_SUNGEM', if_true: files('sungem.c'))
+softmmu_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_emc.c'))
 
 softmmu_ss.add(when: 'CONFIG_ETRAXFS', if_true: files('etraxfs_eth.c'))
 softmmu_ss.add(when: 'CONFIG_COLDFIRE', if_true: files('mcf_fec.c'))
diff --git a/hw/net/npcm7xx_emc.c b/hw/net/npcm7xx_emc.c
new file mode 100644
index 00..d7da1387b3
--- /dev/null
+++ b/hw/net/npcm7xx_emc.c
@@ -0,0 +1,852 @@
+/*
+ * Nuvoton NPCM7xx EMC Module
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * Unsupported/unimplemented features:
+ * - MCMDR.FDUP (full duplex) is ignored, half duplex is not supported
+ * - Only CAM0 is supported, CAM[1-15] are not
+ *   - writes to CAMEN.[1-15] are ignored, these bits always read as zeroes
+ * - MII is not implemented, MIIDA.BUSY and MIID always return zero
+ * - MCMDR.LBK is not implemented
+ * - MCMDR.{OPMOD,ENSQE,AEP,ARP} are not supported
+ * - H/W FIFOs are not supported, MCMDR.FFTCR is ignored
+ * - MGSTA.SQE is not supported
+ * - pause and control frames are not implemented
+ * - MGSTA.CCNT is not supported
+ * - MPCNT, DMARFS are not implemented
+ */
+
+#include "qemu/osdep.h"
+
+/* For crc32 */
+#include 
+
+#include "qemu-common.h"
+#include "hw/irq.h"
+#include "hw/qdev-clock.h"
+#include "hw/qdev-properties.h"
+#include "hw/net/npcm7xx_emc.h"
+#include "net/eth.h"
+#include "migration/vmstate.h"
+#include "qemu/bitops.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "sysemu/dma.h"
+#include "trace.h"
+
+#define CRC_LENGTH 4
+
+/*
+ * The maximum size of a (layer 2) ethernet frame as defined by 802.3.
+ * 1518 = 6(dest macaddr) + 6(src macaddr) + 2(proto) + 4(crc) + 1500(payload)
+ * This does not include an additional 4 for the vlan field (802.1q).
+ */
+#define MAX_ETH_FRAME_SIZE 1518
+
+static const char *emc_reg_name(int regno)
+{
+#define REG(name) case REG_ ## name: return #name;
+switch (regno) {
+REG(CAMCMR)
+REG(CAMEN)
+REG(TXDLSA)
+REG(RXDLSA)
+REG(MCMDR)
+REG(MIID)
+REG(MIIDA)
+REG(FFTCR)
+REG(TSDR)
+REG(RSDR)
+REG(DMARFC)
+REG(MIEN)
+REG(MISTA)
+REG(MGSTA)
+REG(MPCNT)
+REG(MRPC)
+REG(MRPCC)
+REG(MREPC)
+REG(DMARFS)
+REG(CTXDSA)
+REG(CTXBSA)
+REG(CRXDSA)
+REG(CRXBSA)
+case REG_CAMM_BASE + 0: return "CAM0M";
+case REG_CAML_BASE + 0: return "CAM0L";
+case REG_CAMM_BASE + 2 ... REG_CAMML_LAST:
+/* Only CAM0 is supported, fold the others into something simple. */
+if (regno & 1) {
+return "CAML";
+} else {
+return "CAMM";
+}
+default: return "UNKNOWN";
+}
+#undef REG
+}
+
+static void emc_reset(NPCM7xxEMCState *emc)
+{
+trace_npcm7xx_emc_reset(emc->emc_num);
+
+memset(&emc->regs[0], 0, sizeof(emc->regs));
+
+/* These regs have non-zero reset values. */
+emc->regs[REG_TXDLSA] = 0xfffc;
+emc->regs[REG_RXDLSA] = 0xfffc;
+emc->regs[REG_MIIDA] = 0x0090;
+emc->regs[REG_FFTCR] = 0x0101;
+emc->regs[REG_DMARFC] = 0x0800;
+emc->regs[REG_MPCNT] = 0x7fff;
+
+emc->tx_active = false;
+emc->rx_active = false;
+
+qemu_set_irq(emc->tx_irq, 0);
+qemu_set_irq(emc->rx_irq, 0);
+}
+
+static void npcm7xx_emc_reset(DeviceState *dev)
+{
+NPCM7xxEMCState *emc = NPCM7XX_EMC(dev);
+emc_reset(emc);
+}
+
+static void emc_soft_reset(NPCM7xxEMCState *emc)
+{
+/*
+ * The docs say at least MCMDR.{LBK,OPM

[PATCH v2 2/3] hw/arm: Add npcm7xx emc model

This is a 10/100 ethernet device that has several features.
Only the ones needed by the Linux driver have been implemented.
See npcm7xx_emc.c for a list of unimplemented features.

Reviewed-by: Hao Wu 
Reviewed-by: Avi Fishman 
Signed-off-by: Doug Evans 
---
 docs/system/arm/nuvoton.rst |  3 ++-
 hw/arm/npcm7xx.c| 50 +++--
 include/hw/arm/npcm7xx.h|  2 ++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/docs/system/arm/nuvoton.rst b/docs/system/arm/nuvoton.rst
index a1786342e2..c6e9a4c17e 100644
--- a/docs/system/arm/nuvoton.rst
+++ b/docs/system/arm/nuvoton.rst
@@ -43,6 +43,7 @@ Supported devices
  * GPIO controller
  * Analog to Digital Converter (ADC)
  * Pulse Width Modulation (PWM)
+ * Ethernet controller (EMC)
 
 Missing devices
 ---
@@ -56,7 +57,7 @@ Missing devices
* Shared memory (SHM)
* eSPI slave interface
 
- * Ethernet controllers (GMAC and EMC)
+ * Ethernet controller (GMAC)
  * USB device (USBD)
  * SMBus controller (SMBF)
  * Peripheral SPI controller (PSPI)
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index 72040d4079..94b79ff4c0 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -82,6 +82,8 @@ enum NPCM7xxInterrupt {
 NPCM7XX_UART1_IRQ,
 NPCM7XX_UART2_IRQ,
 NPCM7XX_UART3_IRQ,
+NPCM7XX_EMC1RX_IRQ  = 15,
+NPCM7XX_EMC1TX_IRQ,
 NPCM7XX_TIMER0_IRQ  = 32,   /* Timer Module 0 */
 NPCM7XX_TIMER1_IRQ,
 NPCM7XX_TIMER2_IRQ,
@@ -104,6 +106,8 @@ enum NPCM7xxInterrupt {
 NPCM7XX_OHCI_IRQ= 62,
 NPCM7XX_PWM0_IRQ= 93,   /* PWM module 0 */
 NPCM7XX_PWM1_IRQ,   /* PWM module 1 */
+NPCM7XX_EMC2RX_IRQ  = 114,
+NPCM7XX_EMC2TX_IRQ,
 NPCM7XX_GPIO0_IRQ   = 116,
 NPCM7XX_GPIO1_IRQ,
 NPCM7XX_GPIO2_IRQ,
@@ -152,6 +156,12 @@ static const hwaddr npcm7xx_pwm_addr[] = {
 0xf0104000,
 };
 
+/* Register base address for each EMC Module */
+static const hwaddr npcm7xx_emc_addr[] = {
+0xf0825000,
+0xf0826000,
+};
+
 static const struct {
 hwaddr regs_addr;
 uint32_t unconnected_pins;
@@ -365,6 +375,10 @@ static void npcm7xx_init(Object *obj)
 for (i = 0; i < ARRAY_SIZE(s->pwm); i++) {
 object_initialize_child(obj, "pwm[*]", &s->pwm[i], TYPE_NPCM7XX_PWM);
 }
+
+for (i = 0; i < ARRAY_SIZE(s->emc); i++) {
+object_initialize_child(obj, "emc[*]", &s->emc[i], TYPE_NPCM7XX_EMC);
+}
 }
 
 static void npcm7xx_realize(DeviceState *dev, Error **errp)
@@ -537,6 +551,40 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
 sysbus_connect_irq(sbd, i, npcm7xx_irq(s, NPCM7XX_PWM0_IRQ + i));
 }
 
+/*
+ * EMC Modules. Cannot fail.
+ * The mapping of the device to its netdev backend works as follows:
+ * emc[i] = nd_table[i]
+ * This works around the inability to specify the netdev property for the
+ * emc device: it's not pluggable and thus the -device option can't be
+ * used.
+ */
+QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm7xx_emc_addr) != ARRAY_SIZE(s->emc));
+QEMU_BUILD_BUG_ON(ARRAY_SIZE(s->emc) != 2);
+for (i = 0; i < ARRAY_SIZE(s->emc); i++) {
+s->emc[i].emc_num = i;
+SysBusDevice *sbd = SYS_BUS_DEVICE(&s->emc[i]);
+if (nd_table[i].used) {
+qemu_check_nic_model(&nd_table[i], TYPE_NPCM7XX_EMC);
+qdev_set_nic_properties(DEVICE(sbd), &nd_table[i]);
+}
+/*
+ * The device exists regardless of whether it's connected to a QEMU
+ * netdev backend. So always instantiate it even if there is no
+ * backend.
+ */
+sysbus_realize(sbd, &error_abort);
+sysbus_mmio_map(sbd, 0, npcm7xx_emc_addr[i]);
+int tx_irq = i == 0 ? NPCM7XX_EMC1TX_IRQ : NPCM7XX_EMC2TX_IRQ;
+int rx_irq = i == 0 ? NPCM7XX_EMC1RX_IRQ : NPCM7XX_EMC2RX_IRQ;
+/*
+ * N.B. The values for the second argument sysbus_connect_irq are
+ * chosen to match the registration order in npcm7xx_emc_realize.
+ */
+sysbus_connect_irq(sbd, 0, npcm7xx_irq(s, tx_irq));
+sysbus_connect_irq(sbd, 1, npcm7xx_irq(s, rx_irq));
+}
+
 /*
  * Flash Interface Unit (FIU). Can fail if incorrect number of chip selects
  * specified, but this is a programming error.
@@ -621,8 +669,6 @@ static void npcm7xx_realize(DeviceState *dev, Error **errp)
 create_unimplemented_device("npcm7xx.vcd",  0xf081,  64 * KiB);
 create_unimplemented_device("npcm7xx.ece",  0xf082,   8 * KiB);
 create_unimplemented_device("npcm7xx.vdma", 0xf0822000,   8 * KiB);
-create_unimplemented_device("npcm7xx.emc1", 0xf0825000,   4 * KiB);
-create_unimplemented_device("npcm7xx.emc2", 0xf0826000,   4 * KiB);
 create_unimplemented_device("npcm7xx.usbd[0]",  0xf083,   4 * KiB);
 create_unimplemented_device("npcm7xx.usbd[1]",  0xf0831000,   4 * KiB

[PATCH v2 0/3] Add npcm7xx emc model

This is a 10/100 ethernet device that has several features.
Only the ones needed by the Linux driver have been implemented.
See npcm7xx_emc.c for a list of unimplemented features.

Doug Evans (3):
  hw/net: Add npcm7xx emc model
  hw/arm: Add npcm7xx emc model
  tests/qtests: Add npcm7xx emc model test

 docs/system/arm/nuvoton.rst|   3 +-
 hw/arm/npcm7xx.c   |  50 +-
 hw/net/meson.build |   1 +
 hw/net/npcm7xx_emc.c   | 852 +
 hw/net/trace-events|  17 +
 include/hw/arm/npcm7xx.h   |   2 +
 include/hw/net/npcm7xx_emc.h   | 286 +++
 tests/qtest/meson.build|   1 +
 tests/qtest/npcm7xx_emc-test.c | 793 ++
 9 files changed, 2002 insertions(+), 3 deletions(-)
 create mode 100644 hw/net/npcm7xx_emc.c
 create mode 100644 include/hw/net/npcm7xx_emc.h
 create mode 100644 tests/qtest/npcm7xx_emc-test.c

-- 
2.30.0.365.g02bc693789-goog

Re: [RFC PATCH 1/4] include/standard-headers/linux/pci_regs: temp hack to add necessary DOE definitions.

On Tue, Feb 02, 2021 at 07:39:51AM -0800, Ben Widawsky wrote:
> On 21-02-01 23:16:26, Jonathan Cameron wrote:
> > Signed-off-by: Jonathan Cameron 
> > ---
> >  include/standard-headers/linux/pci_regs.h | 33 ++-
> >  1 file changed, 32 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/standard-headers/linux/pci_regs.h 
> > b/include/standard-headers/linux/pci_regs.h
> > index e709ae8235..7e852d3dd0 100644
> > --- a/include/standard-headers/linux/pci_regs.h
> > +++ b/include/standard-headers/linux/pci_regs.h
> > @@ -730,7 +730,8 @@
> >  #define PCI_EXT_CAP_ID_DVSEC   0x23/* Designated Vendor-Specific */
> >  #define PCI_EXT_CAP_ID_DLF 0x25/* Data Link Feature */
> >  #define PCI_EXT_CAP_ID_PL_16GT 0x26/* Physical Layer 16.0 GT/s */
> > -#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT
> > +#define PCI_EXT_CAP_ID_DOE 0x2E/* Data Object Exchange */
> > +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE
> >  
> >  #define PCI_EXT_CAP_DSN_SIZEOF 12
> >  #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
> > @@ -1092,4 +1093,34 @@
> >  #define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK0x00F0
> >  #define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT   4
> >  
> > +/* Data Object Exchange */
> > +#define PCI_DOE_CAP0x04
> > +#define  PCI_DOE_CAP_INT_SUPPORT   0x0001
> > +#define  PCI_DOE_CAP_INT_MSG_NUM   0x0FFE
> > +
> > +#define PCI_DOE_CTRL   0x08
> > +#define  PCI_DOE_CTRL_DOE_ABORT0x0001
> > +#define  PCI_DOE_CTRL_DOE_INT_EN   0x0002
> > +#define  PCI_DOE_CTRL_DOE_GO   0x8000
> > +
> > +#define PCI_DOE_STATUS 0x0c
> > +#define  PCI_DOE_STATUS_DOE_BUSY   0x0001
> > +#define  PCI_DOE_STATUS_INT_STATUS 0x0002
> > +#define  PCI_DOE_STATUS_DOE_ERROR  0x0004
> > +#define  PCI_DOE_STATUS_DATA_OBJECT_READY  0x8000
> > +
> > +#define PCI_DOE_WRITE_MAILBOX  0x10
> > +#define PCI_DOE_READ_MAILBOX   0x14
> > +
> > +/* Data Object Format DOE ECN 6.xx.1 */
> > +#define PCI_DATA_OBJ_DW0_VID   0x
> > +#define PCI_DATA_OBJ_DW0_TYPE  0x00ff
> > +#define PCI_DATA_OBJ_DW1_LEN   0x0003
> > +
> > +/* DOE Discover Data Object */
> > +#define PCI_DOE_DIS_OBJ_TYPE0x1
> > +#define PCI_DOE_DIS_REQ_D0_DW0_INDEX   0x00ff
> > +#define PCI_DOE_DIS_RSP_DO_DW0_VID 0x
> > +#define PCI_DOE_DIS_RSP_D0_DW0_PROT0x00ff
> > +#define PCI_DOE_DIS_RSP_D0_DW0_NEXT_INDEX  0xff00
> >  #endif /* LINUX_PCI_REGS_H */
> 
> I think a lot of these should have had _MASK at the end.
> 
> As for the accuracy of the values, lgtm.

just add them in the source file where they are used.
standard-headers are over-written by scripts, adding
your own macros there won't help.

Re: [PULL 00/20] NBD patches for 2021-02-02

2021-02-02 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20210202224529.642055-1-ebl...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20210202224529.642055-1-ebl...@redhat.com
Subject: [PULL 00/20] NBD patches for 2021-02-02

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 - [tag update]  patchew/20210202191207.4103973-1-ehabk...@redhat.com -> 
patchew/20210202191207.4103973-1-ehabk...@redhat.com
 - [tag update]  patchew/20210202205824.1085853-1-phi...@redhat.com -> 
patchew/20210202205824.1085853-1-phi...@redhat.com
 * [new tag] patchew/20210202224529.642055-1-ebl...@redhat.com -> 
patchew/20210202224529.642055-1-ebl...@redhat.com
Switched to a new branch 'test'
b4fc744 nbd: make nbd_read* return -EIO on error
509be46 block/nbd: only enter connection coroutine if it's present
606d6b3 block/nbd: only detach existing iochannel from aio_context
b9ebab5 block/io: use int64_t bytes in copy_range
4d2c686 block/io: support int64_t bytes in read/write wrappers
0f83114 block/io: support int64_t bytes in bdrv_co_p{read, write}v_part()
51bfadf block/io: support int64_t bytes in bdrv_aligned_preadv()
5ddf9ea block/io: support int64_t bytes in bdrv_co_do_copy_on_readv()
cf35854 block/io: support int64_t bytes in bdrv_aligned_pwritev()
53fd620 block/io: support int64_t bytes in bdrv_co_do_pwrite_zeroes()
df70398 block/io: use int64_t bytes in driver wrappers
f9b30ef block: use int64_t as bytes type in tracked requests
184c8b8 block/io: improve bdrv_check_request: check qiov too
848d7be block/throttle-groups: throttle_group_co_io_limits_intercept(): 64bit 
bytes
2958d9e block/io: bdrv_pad_request(): support qemu_iovec_init_extended failure
12700c1 block/io: refactor bdrv_pad_request(): move bdrv_pad_request() up
ff808b0 block: fix theoretical overflow in bdrv_init_padding()
e0a4e42 util/iov: make qemu_iovec_init_extended() honest
fe22159 block: refactor bdrv_check_request: add errp
cae9e91 iotests: Fix expected whitespace for 185

=== OUTPUT BEGIN ===
1/20 Checking commit cae9e912680e (iotests: Fix expected whitespace for 185)
2/20 Checking commit fe2215907a78 (block: refactor bdrv_check_request: add errp)
3/20 Checking commit e0a4e42b0947 (util/iov: make qemu_iovec_init_extended() 
honest)
4/20 Checking commit ff808b00c095 (block: fix theoretical overflow in 
bdrv_init_padding())
5/20 Checking commit 12700c18a28d (block/io: refactor bdrv_pad_request(): move 
bdrv_pad_request() up)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#73: 
new file mode 100644

ERROR: trailing whitespace
#82: FILE: block/io.c.rej:5:
+ $

ERROR: trailing whitespace
#100: FILE: block/io.c.rej:23:
+ $

ERROR: trailing whitespace
#106: FILE: block/io.c.rej:29:
+ $

total: 3 errors, 1 warnings, 85 lines checked

Patch 5/20 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

6/20 Checking commit 2958d9edb34e (block/io: bdrv_pad_request(): support 
qemu_iovec_init_extended failure)
7/20 Checking commit 848d7be2eda3 (block/throttle-groups: 
throttle_group_co_io_limits_intercept(): 64bit bytes)
8/20 Checking commit 184c8b8c73d6 (block/io: improve bdrv_check_request: check 
qiov too)
9/20 Checking commit f9b30ef24e63 (block: use int64_t as bytes type in tracked 
requests)
10/20 Checking commit df70398f87e5 (block/io: use int64_t bytes in driver 
wrappers)
11/20 Checking commit 53fd620fa30f (block/io: support int64_t bytes in 
bdrv_co_do_pwrite_zeroes())
12/20 Checking commit cf358543ec81 (block/io: support int64_t bytes in 
bdrv_aligned_pwritev())
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#134: 
deleted file mode 100644

total: 0 errors, 1 warnings, 73 lines checked

Patch 12/20 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
13/20 Checking commit 5ddf9eaa98f0 (block/io: support int64_t bytes in 
bdrv_co_do_copy_on_readv())
14/20 Checking commit 51bfadf931ed (block/io: support int64_t bytes in 
bdrv_aligned_preadv())
15/20 Checking commit 0f8311495774 (block/io: support int64_t bytes in 
bdrv_co_p{read, write}v_part())
16/20 Checking commit 4d2c686f09f5 (block/io: support int64_t bytes in 
read/write wrappers)
17/20 Checking commit b9ebab53f292 (block/io: use int64_t bytes in copy_range)
18/20 Checking commit 606d6b399708 (block/nbd: only detach existing iochannel 
from aio_context)
19/20 Checking commit 509be463b54f (block/nbd: only enter connection coroutine 
if it's present)
20/20 Checking commit b4fc744f9b07 (nbd: m

Re: [PATCH 1/1] docs: fix mistake in dirty bitmap feature description

2021-02-02 Thread Denis V. Lunev

On 2/3/21 1:15 AM, Eric Blake wrote:
> On 1/28/21 11:21 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 28.01.2021 20:13, Denis V. Lunev wrote:
>>> Original specification says that l1 table size if 64 * l1_size, which
>>> is obviously wrong. The size of the l1 entry is 64 _bits_, not bytes.
>>> Thus 64 is to be replaces with 8 as specification says about bytes.
>>>
>>> There is also minor tweak, field name is renamed from l1 to l1_table,
>>> which matches with the later text.
>>>
>>> Signed-off-by: Denis V. Lunev 
>>> CC: Stefan Hajnoczi 
>>> CC: Vladimir Sementsov-Ogievskiy 
>> Reviewed-by: Vladimir Sementsov-Ogievskiy 
>>
> I saw the subject "dirty bitmap", and assumed it would go through my
> dirty bitmap tree.  In reality, it's unrelated to the dirty bitmap code.
>  Would an improved subject line help?
hmm. Actually this is about "how the dirty bitmaps are stored in the
Parallels Image format". The section is called "dirty bitmap feature".

What I can propose? :)

"docs: fix mistake in Parallels Image "dirty bitmap" feature description"

Will this work for you?

Den

>>> ---
>>>   docs/interop/parallels.txt | 2 +-
>>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/docs/interop/parallels.txt b/docs/interop/parallels.txt
>>> index e9271eba5d..f15bf35bd1 100644
>>> --- a/docs/interop/parallels.txt
>>> +++ b/docs/interop/parallels.txt
>>> @@ -208,7 +208,7 @@ of its data area are:
>>>     28 - 31:    l1_size
>>>     The number of entries in the L1 table of the bitmap.
>>>   -  variable:   l1 (64 * l1_size bytes)
>>> +  variable:   l1_table (8 * l1_size bytes)
>>>     L1 offset table (in bytes)
>> I don't remember why this "(in bytes)" is here.. What in bytes? L1 table
>> size? But the described field is not L1 table size, but L1 table
>> itself.. It's not in bytes, it's just L1 table :)
>>
>> So, I'd also drop "(in bytes)" while being here. Or the whole line "L1
>> offset table (in bytes)" altogether.
>>
>>>     A dirty bitmap is stored using a one-level structure for the
>>> mapping to host
>>>
>>

Re: [PATCH v8 12/13] confidential guest support: Alter virtio default properties for protected guests

On Tue, Feb 02, 2021 at 03:13:14PM +1100, David Gibson wrote:
> The default behaviour for virtio devices is not to use the platforms normal
> DMA paths, but instead to use the fact that it's running in a hypervisor
> to directly access guest memory.  That doesn't work if the guest's memory
> is protected from hypervisor access, such as with AMD's SEV or POWER's PEF.
> 
> So, if a confidential guest mechanism is enabled, then apply the
> iommu_platform=on option so it will go through normal DMA mechanisms.
> Those will presumably have some way of marking memory as shared with
> the hypervisor or hardware so that DMA will work.
> 
> Signed-off-by: David Gibson 
> Reviewed-by: Dr. David Alan Gilbert 
> Reviewed-by: Cornelia Huck 
> Reviewed-by: Greg Kurz 


> ---
>  hw/core/machine.c | 13 +
>  1 file changed, 13 insertions(+)
> 
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 94194ab82d..497949899b 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -33,6 +33,8 @@
>  #include "migration/global_state.h"
>  #include "migration/vmstate.h"
>  #include "exec/confidential-guest-support.h"
> +#include "hw/virtio/virtio.h"
> +#include "hw/virtio/virtio-pci.h"
>  
>  GlobalProperty hw_compat_5_2[] = {};
>  const size_t hw_compat_5_2_len = G_N_ELEMENTS(hw_compat_5_2);
> @@ -1196,6 +1198,17 @@ void machine_run_board_init(MachineState *machine)
>   * areas.
>   */
>  machine_set_mem_merge(OBJECT(machine), false, &error_abort);
> +
> +/*
> + * Virtio devices can't count on directly accessing guest
> + * memory, so they need iommu_platform=on to use normal DMA
> + * mechanisms.  That requires also disabling legacy virtio
> + * support for those virtio pci devices which allow it.
> + */
> +object_register_sugar_prop(TYPE_VIRTIO_PCI, "disable-legacy",
> +   "on", true);
> +object_register_sugar_prop(TYPE_VIRTIO_DEVICE, "iommu_platform",
> +   "on", false);

So overriding a boolean property always poses a problem:
if user does set iommu_platform=off we are ignoring this
silently.

Can we change iommu_platform to on/off/auto? Then we can
change how does auto behave.

Bonus points for adding "access_platform" and making it
a synonym of platform_iommu.

>  }
>  
>  machine_class->init(machine);
> -- 
> 2.29.2

[PULL 20/20] nbd: make nbd_read* return -EIO on error

From: Roman Kagan 

NBD reconnect logic considers the error code from the functions that
read NBD messages to tell if reconnect should be attempted or not: it is
attempted on -EIO, otherwise the client transitions to NBD_CLIENT_QUIT
state (see nbd_channel_error).  This error code is propagated from the
primitives like nbd_read.

The problem, however, is that nbd_read itself turns every error into -1
rather than -EIO.  As a result, if the NBD server happens to die while
sending the message, the client in QEMU receives less data than it
expects, considers it as a fatal error, and wouldn't attempt
reestablishing the connection.

Fix it by turning every negative return from qio_channel_read_all into
-EIO returned from nbd_read.  Apparently that was the original behavior,
but got broken later.  Also adjust nbd_readXX to follow.

Fixes: e6798f06a6 ("nbd: generalize usage of nbd_read")
Signed-off-by: Roman Kagan 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20210129073859.683063-4-rvka...@yandex-team.ru>
Signed-off-by: Eric Blake 
---
 include/block/nbd.h | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 4a52a43ef598..5f34d23bb037 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -364,7 +364,7 @@ static inline int nbd_read(QIOChannel *ioc, void *buffer, 
size_t size,
 if (desc) {
 error_prepend(errp, "Failed to read %s: ", desc);
 }
-return -1;
+return ret;
 }

 return 0;
@@ -375,8 +375,9 @@ static inline int nbd_read##bits(QIOChannel *ioc,   
\
  uint##bits##_t *val,   \
  const char *desc, Error **errp)\
 {   \
-if (nbd_read(ioc, val, sizeof(*val), desc, errp) < 0) { \
-return -1;  \
+int ret = nbd_read(ioc, val, sizeof(*val), desc, errp); \
+if (ret < 0) {  \
+return ret; \
 }   \
 *val = be##bits##_to_cpu(*val); \
 return 0;   \
-- 
2.30.0

Re: [PATCH 1/1] docs: fix mistake in dirty bitmap feature description

On 2/2/21 4:50 PM, Denis V. Lunev wrote:
> On 2/3/21 1:15 AM, Eric Blake wrote:
>> On 1/28/21 11:21 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> 28.01.2021 20:13, Denis V. Lunev wrote:
 Original specification says that l1 table size if 64 * l1_size, which
 is obviously wrong. The size of the l1 entry is 64 _bits_, not bytes.
 Thus 64 is to be replaces with 8 as specification says about bytes.

 There is also minor tweak, field name is renamed from l1 to l1_table,
 which matches with the later text.

 Signed-off-by: Denis V. Lunev 
 CC: Stefan Hajnoczi 
 CC: Vladimir Sementsov-Ogievskiy 
>>> Reviewed-by: Vladimir Sementsov-Ogievskiy 
>>>
>> I saw the subject "dirty bitmap", and assumed it would go through my
>> dirty bitmap tree.  In reality, it's unrelated to the dirty bitmap code.
>>  Would an improved subject line help?
> hmm. Actually this is about "how the dirty bitmaps are stored in the
> Parallels Image format". The section is called "dirty bitmap feature".
> 
> What I can propose? :)
> 
> "docs: fix mistake in Parallels Image "dirty bitmap" feature description"
> 
> Will this work for you?

That feels a bit long; maybe just:

docs: fix Parallels Image "dirty bitmap" section


-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org

Re: [PULL 0/9] pc,virtio: fixes, features

On Tue, Feb 02, 2021 at 10:44:28PM +, Peter Maydell wrote:
> On Tue, 2 Feb 2021 at 15:12, Michael S. Tsirkin  wrote:
> >
> > The following changes since commit 9cd69f1a270235b652766f00b94114f48a2d603f:
> >
> >   Merge remote-tracking branch 
> > 'remotes/stefanberger/tags/pull-tpm-2021-01-25-1' into staging (2021-01-26 
> > 09:51:02 +)
> >
> > are available in the Git repository at:
> >
> >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> >
> > for you to fetch changes up to 737242ed5be0a7119aad55894148b3f5dec41200:
> >
> >   virtio-pmem: add trace events (2021-01-27 08:02:39 -0500)
> >
> > 
> > pc,virtio: fixes, features
> >
> > Fixes all over the place.
> > Ability to control ACPI OEM ID's.
> >
> > Signed-off-by: Michael S. Tsirkin 
> >
> > 
> > Eugenio Pérez (1):
> >   virtio: Add corresponding memory_listener_unregister to unrealize
> >
> > Laurent Vivier (1):
> >   virtio-mmio: fix guest kernel crash with SHM regions
> >
> > Marian Postevca (5):
> >   tests/acpi: allow updates for expected data files
> >   acpi: Permit OEM ID and OEM table ID fields to be changed
> >   tests/acpi: add OEM ID and OEM TABLE ID test
> >   tests/acpi: update expected data files
> >   tests/acpi: disallow updates for expected data files
> >
> > Pankaj Gupta (1):
> >   virtio-pmem: add trace events
> >
> > Stefano Garzarella (1):
> >   virtio: move 'use-disabled-flag' property to hw_compat_4_2
> 
> Fails to build, aarch64:
> 
> In file included from /usr/include/string.h:495,
>  from /home/pm/qemu/include/qemu/osdep.h:87,
>  from ../../hw/arm/virt.c:31:
> In function ‘strncpy’,
> inlined from ‘virt_set_oem_table_id’ at ../../hw/arm/virt.c:2197:5:
> /usr/include/aarch64-linux-gnu/bits/string_fortified.h:106:10: error:
> ‘__builtin_strncpy’ specified bound depends on the length of the
> source argument [-Werror=stringop-overflow=]
>   106 |   return __builtin___strncpy_chk (__dest, __src, __len, __bos 
> (__dest));
>   |  
> ^~
> ../../hw/arm/virt.c: In function ‘virt_set_oem_table_id’:
> ../../hw/arm/virt.c:2190:18: note: length computed here
>  2190 | size_t len = strlen(value);
>   |  ^
> In file included from /usr/include/string.h:495,
>  from /home/pm/qemu/include/qemu/osdep.h:87,
>  from ../../hw/arm/virt.c:31:
> In function ‘strncpy’,
> inlined from ‘virt_set_oem_id’ at ../../hw/arm/virt.c:2176:5:
> /usr/include/aarch64-linux-gnu/bits/string_fortified.h:106:10: error:
> ‘__builtin_strncpy’ specified bound depends on the length of the
> source argument [-Werror=stringop-overflow=]
>   106 |   return __builtin___strncpy_chk (__dest, __src, __len, __bos 
> (__dest));
>   |  
> ^~
> ../../hw/arm/virt.c: In function ‘virt_set_oem_id’:
> ../../hw/arm/virt.c:2168:18: note: length computed here
>  2168 | size_t len = strlen(value);
>   |  ^
> 

I added a fixup on top, and pushed.


> Also iotest 030 failed on openbsd, which might be an intermittent rather
> than anything to do with this patchset:
> 
>   TEST   iotest-qcow2: 030 [fail]
> QEMU  --
> "/home/qemu/qemu-test.vl8fUt/build/tests/qemu-iotests/../../qemu-system-aarch64"
> -nodefaults -di
> splay none -accel qtest -machine virt
> QEMU_IMG  --
> "/home/qemu/qemu-test.vl8fUt/build/tests/qemu-iotests/../../qemu-img"
> QEMU_IO   --
> "/home/qemu/qemu-test.vl8fUt/build/tests/qemu-iotests/../../qemu-io"
> --cache writeback --aio thr
> eads -f qcow2
> QEMU_NBD  --
> "/home/qemu/qemu-test.vl8fUt/build/tests/qemu-iotests/../../qemu-nbd"
> IMGFMT-- qcow2
> IMGPROTO  -- file
> PLATFORM  -- OpenBSD/amd64 openbsd.localnet 6.8
> TEST_DIR  -- /home/qemu/qemu-test.vl8fUt/build/tests/qemu-iotests/scratch
> SOCK_DIR  -- /tmp/tmpu4236zgh
> SOCKET_SCM_HELPER --
> --- /home/qemu/qemu-test.vl8fUt/src/tests/qemu-iotests/030.out
> +++ 030.out.bad
> @@ -1,5 +1,17 @@
> -...
> +.F.
> +==
> fcntl(): Invalid argument
> +FAIL: test_overlapping_5 (__main__.TestParallelOps)
> +--
> +Traceback (most recent call last):
> +  File "/home/qemu/qemu-test.vl8fUt/src/tests/qemu-iotests/030", line
> 424, in test_overlapping_5
> +self.assert_qmp(result, 'return', {})
> +  File "/home/qemu/qemu-test.vl8fUt/src/tests/qemu-iotests/iotests.py",
> line 925, in assert_qmp
> +result = self.dictpath(d, path)
> +  File "/home/qemu/qemu-test.vl8fUt/src/tests/qemu-iotests/iotests.py",
> line 899, in dictpath
> +s

[PULL 19/20] block/nbd: only enter connection coroutine if it's present

From: Roman Kagan 

When an NBD block driver state is moved from one aio_context to another
(e.g. when doing a drain in a migration thread),
nbd_client_attach_aio_context_bh is executed that enters the connection
coroutine.

However, the assumption that ->connection_co is always present here
appears incorrect: the connection may have encountered an error other
than -EIO in the underlying transport, and thus may have decided to quit
rather than keep trying to reconnect, and therefore it may have
terminated the connection coroutine.  As a result an attempt to reassign
the client in this state (NBD_CLIENT_QUIT) to a different aio_context
leads to a null pointer dereference:

  #0  qio_channel_detach_aio_context (ioc=0x0)
  at /build/qemu-gYtjVn/qemu-5.0.1/io/channel.c:452
  #1  0x562a242824b3 in bdrv_detach_aio_context (bs=0x562a268d6a00)
  at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6151
  #2  bdrv_set_aio_context_ignore (bs=bs@entry=0x562a268d6a00,
  new_context=new_context@entry=0x562a260c9580,
  ignore=ignore@entry=0x7feeadc9b780)
  at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6230
  #3  0x562a24282969 in bdrv_child_try_set_aio_context
  (bs=bs@entry=0x562a268d6a00, ctx=0x562a260c9580,
  ignore_child=, errp=)
  at /build/qemu-gYtjVn/qemu-5.0.1/block.c:6332
  #4  0x562a242bb7db in blk_do_set_aio_context (blk=0x562a2735d0d0,
  new_context=0x562a260c9580,
  update_root_node=update_root_node@entry=true, errp=errp@entry=0x0)
  at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:1989
  #5  0x562a242be0bd in blk_set_aio_context (blk=,
  new_context=, errp=errp@entry=0x0)
  at /build/qemu-gYtjVn/qemu-5.0.1/block/block-backend.c:2010
  #6  0x562a23fbd953 in virtio_blk_data_plane_stop (vdev=)
  at /build/qemu-gYtjVn/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292
  #7  0x562a241fc7bf in virtio_bus_stop_ioeventfd (bus=0x562a260dbf08)
  at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio-bus.c:245
  #8  0x562a23fefb2e in virtio_vmstate_change (opaque=0x562a260dbf90,
  running=0, state=)
  at /build/qemu-gYtjVn/qemu-5.0.1/hw/virtio/virtio.c:3220
  #9  0x562a2402ebfd in vm_state_notify (running=running@entry=0,
  state=state@entry=RUN_STATE_FINISH_MIGRATE)
  at /build/qemu-gYtjVn/qemu-5.0.1/softmmu/vl.c:1275
  #10 0x562a23f7bc02 in do_vm_stop (state=RUN_STATE_FINISH_MIGRATE,
  send_stop=)
  at /build/qemu-gYtjVn/qemu-5.0.1/cpus.c:1032
  #11 0x562a24209765 in migration_completion (s=0x562a260e83a0)
  at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:2914
  #12 migration_iteration_run (s=0x562a260e83a0)
  at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3275
  #13 migration_thread (opaque=opaque@entry=0x562a260e83a0)
  at /build/qemu-gYtjVn/qemu-5.0.1/migration/migration.c:3439
  #14 0x562a2435ca96 in qemu_thread_start (args=)
  at /build/qemu-gYtjVn/qemu-5.0.1/util/qemu-thread-posix.c:519
  #15 0x7feed31466ba in start_thread (arg=0x7feeadc9c700)
  at pthread_create.c:333
  #16 0x7feed2e7c41d in __GI___sysctl (name=0x0, nlen=608471908,
  oldval=0x562a2452b138, oldlenp=0x0, newval=0x562a2452c5e0
  <__func__.28102>, newlen=0)
  at ../sysdeps/unix/sysv/linux/sysctl.c:30
  #17 0x in ?? ()

Fix it by checking that the connection coroutine is non-null before
trying to enter it.  If it is null, no entering is needed, as the
connection is probably going down anyway.

Signed-off-by: Roman Kagan 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20210129073859.683063-3-rvka...@yandex-team.ru>
Signed-off-by: Eric Blake 
---
 block/nbd.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index bcd6641e90f5..b3cbbeb4b0cb 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -250,13 +250,15 @@ static void nbd_client_attach_aio_context_bh(void *opaque)
 BlockDriverState *bs = opaque;
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;

-/*
- * The node is still drained, so we know the coroutine has yielded in
- * nbd_read_eof(), the only place where bs->in_flight can reach 0, or it is
- * entered for the first time. Both places are safe for entering the
- * coroutine.
- */
-qemu_aio_coroutine_enter(bs->aio_context, s->connection_co);
+if (s->connection_co) {
+/*
+ * The node is still drained, so we know the coroutine has yielded in
+ * nbd_read_eof(), the only place where bs->in_flight can reach 0, or
+ * it is entered for the first time. Both places are safe for entering
+ * the coroutine.
+ */
+qemu_aio_coroutine_enter(bs->aio_context, s->connection_co);
+}
 bdrv_dec_in_flight(bs);
 }

-- 
2.30.0

[PATCH] acpi: use constants as strncpy limit

gcc is not smart enough to figure out length was validated before use as
strncpy limit, resulting in this warning:

inlined from ‘virt_set_oem_table_id’ at ../../hw/arm/virt.c:2197:5:
/usr/include/aarch64-linux-gnu/bits/string_fortified.h:106:10: error:
‘__builtin_strncpy’ specified bound depends on the length of the
source argument [-Werror=stringop-overflow=]

Simplify things by using a constant limit instead.

Fixes: 97fc5d507fca ("acpi: Permit OEM ID and OEM table ID fields to be 
changed")
Signed-off-by: Michael S. Tsirkin 
---
 hw/arm/virt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ecb0e14816..f538194e32 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2107,7 +2107,7 @@ static void virt_set_oem_id(Object *obj, const char 
*value, Error **errp)
 return;
 }
 
-strncpy(vms->oem_id, value, len + 1);
+strncpy(vms->oem_id, value, 6);
 }
 
 static char *virt_get_oem_table_id(Object *obj, Error **errp)
@@ -2128,7 +2128,7 @@ static void virt_set_oem_table_id(Object *obj, const char 
*value,
"User specified oem-table-id value is bigger than 8 bytes 
in size");
 return;
 }
-strncpy(vms->oem_table_id, value, len + 1);
+strncpy(vms->oem_table_id, value, 8);
 }
 
 
-- 
MST

[PULL 16/20] block/io: support int64_t bytes in read/write wrappers

From: Vladimir Sementsov-Ogievskiy 

We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

Now, since bdrv_co_preadv_part() and bdrv_co_pwritev_part() have been
updated, update all their wrappers.

For all of them type of 'bytes' is widening, so callers are safe. We
have update request_fn in blkverify.c simultaneously. Still it's just a
pointer to one of bdrv_co_pwritev() or bdrv_co_preadv(), and type is
widening for callers of the request_fn anyway.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-16-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
[eblake: grammar tweak]
Signed-off-by: Eric Blake 
---
 include/block/block.h | 11 ++-
 include/block/block_int.h |  4 ++--
 block/io.c| 15 ---
 block/blkverify.c |  2 +-
 block/trace-events|  2 +-
 5 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 81fcaad5acca..5f28d0d33f5c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -392,12 +392,13 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
 void bdrv_reopen_commit(BDRVReopenState *reopen_state);
 void bdrv_reopen_abort(BDRVReopenState *reopen_state);
 int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int bytes, BdrvRequestFlags flags);
+   int64_t bytes, BdrvRequestFlags flags);
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes);
-int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes);
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
+int64_t bytes);
 int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
- const void *buf, int count);
+ const void *buf, int64_t bytes);
 /*
  * Efficiently zero a region of the disk image.  Note that this is a regular
  * I/O request like read or write and should have a reasonable size.  This
@@ -405,7 +406,7 @@ int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
  * because it may allocate memory for the entire region.
  */
 int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int bytes, BdrvRequestFlags flags);
+   int64_t bytes, BdrvRequestFlags flags);
 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
 const char *backing_file);
 void bdrv_refresh_filename(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f2ad8aa771c3..749d1fb9d049 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1032,13 +1032,13 @@ extern BlockDriver bdrv_raw;
 extern BlockDriver bdrv_qcow2;

 int coroutine_fn bdrv_co_preadv(BdrvChild *child,
-int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+int64_t offset, int64_t bytes, QEMUIOVector *qiov,
 BdrvRequestFlags flags);
 int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
 int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
 int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
-int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+int64_t offset, int64_t bytes, QEMUIOVector *qiov,
 BdrvRequestFlags flags);
 int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 int64_t offset, int64_t bytes,
diff --git a/block/io.c b/block/io.c
index 7b6b0027bc4c..8817c8496663 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1005,7 +1005,7 @@ static int bdrv_check_request32(int64_t offset, int64_t 
bytes,
 }

 int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int bytes, BdrvRequestFlags flags)
+   int64_t bytes, BdrvRequestFlags flags)
 {
 return bdrv_pwritev(child, offset, bytes, NULL,
 BDRV_REQ_ZERO_WRITE | flags);
@@ -1053,7 +1053,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 }

 /* See bdrv_pwrite() for the return codes */
-int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes)
 {
 int ret;
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
@@ -1073,7 +1073,8 @@ int bdrv_pread(BdrvChild *child, int64_t offset, void 
*buf, int bytes)
   -EINVAL  Invalid offset or number of bytes
   -EACCES  Trying to write a read-only device
 */
-int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int by

[PULL 18/20] block/nbd: only detach existing iochannel from aio_context

From: Roman Kagan 

When the reconnect in NBD client is in progress, the iochannel used for
NBD connection doesn't exist.  Therefore an attempt to detach it from
the aio_context of the parent BlockDriverState results in a NULL pointer
dereference.

The problem is triggerable, in particular, when an outgoing migration is
about to finish, and stopping the dataplane tries to move the
BlockDriverState from the iothread aio_context to the main loop.  If the
NBD connection is lost before this point, and the NBD client has entered
the reconnect procedure, QEMU crashes:

  #0  qemu_aio_coroutine_enter (ctx=0x5618056c7580, co=0x0)
  at /build/qemu-6MF7tq/qemu-5.0.1/util/qemu-coroutine.c:109
  #1  0x5618034b1b68 in nbd_client_attach_aio_context_bh (
  opaque=0x561805ed4c00) at /build/qemu-6MF7tq/qemu-5.0.1/block/nbd.c:164
  #2  0x56180353116b in aio_wait_bh (opaque=0x7f60e1e63700)
  at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-wait.c:55
  #3  0x561803530633 in aio_bh_call (bh=0x7f60d40a7e80)
  at /build/qemu-6MF7tq/qemu-5.0.1/util/async.c:136
  #4  aio_bh_poll (ctx=ctx@entry=0x5618056c7580)
  at /build/qemu-6MF7tq/qemu-5.0.1/util/async.c:164
  #5  0x561803533e5a in aio_poll (ctx=ctx@entry=0x5618056c7580,
  blocking=blocking@entry=true)
  at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-posix.c:650
  #6  0x56180353128d in aio_wait_bh_oneshot (ctx=0x5618056c7580,
  cb=, opaque=)
  at /build/qemu-6MF7tq/qemu-5.0.1/util/aio-wait.c:71
  #7  0x56180345c50a in bdrv_attach_aio_context (new_context=0x5618056c7580,
  bs=0x561805ed4c00) at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6172
  #8  bdrv_set_aio_context_ignore (bs=bs@entry=0x561805ed4c00,
  new_context=new_context@entry=0x5618056c7580,
  ignore=ignore@entry=0x7f60e1e63780)
  at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6237
  #9  0x56180345c969 in bdrv_child_try_set_aio_context (
  bs=bs@entry=0x561805ed4c00, ctx=0x5618056c7580,
  ignore_child=, errp=)
  at /build/qemu-6MF7tq/qemu-5.0.1/block.c:6332
  #10 0x5618034957db in blk_do_set_aio_context (blk=0x56180695b3f0,
  new_context=0x5618056c7580, update_root_node=update_root_node@entry=true,
  errp=errp@entry=0x0)
  at /build/qemu-6MF7tq/qemu-5.0.1/block/block-backend.c:1989
  #11 0x5618034980bd in blk_set_aio_context (blk=,
  new_context=, errp=errp@entry=0x0)
  at /build/qemu-6MF7tq/qemu-5.0.1/block/block-backend.c:2010
  #12 0x561803197953 in virtio_blk_data_plane_stop (vdev=)
  at /build/qemu-6MF7tq/qemu-5.0.1/hw/block/dataplane/virtio-blk.c:292
  #13 0x5618033d67bf in virtio_bus_stop_ioeventfd (bus=0x5618056d9f08)
  at /build/qemu-6MF7tq/qemu-5.0.1/hw/virtio/virtio-bus.c:245
  #14 0x5618031c9b2e in virtio_vmstate_change (opaque=0x5618056d9f90,
  running=0, state=)
  at /build/qemu-6MF7tq/qemu-5.0.1/hw/virtio/virtio.c:3220
  #15 0x561803208bfd in vm_state_notify (running=running@entry=0,
  state=state@entry=RUN_STATE_FINISH_MIGRATE)
  at /build/qemu-6MF7tq/qemu-5.0.1/softmmu/vl.c:1275
  #16 0x561803155c02 in do_vm_stop (state=RUN_STATE_FINISH_MIGRATE,
  send_stop=) at /build/qemu-6MF7tq/qemu-5.0.1/cpus.c:1032
  #17 0x5618033e3765 in migration_completion (s=0x5618056e6960)
  at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:2914
  #18 migration_iteration_run (s=0x5618056e6960)
  at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:3275
  #19 migration_thread (opaque=opaque@entry=0x5618056e6960)
  at /build/qemu-6MF7tq/qemu-5.0.1/migration/migration.c:3439
  #20 0x561803536ad6 in qemu_thread_start (args=)
  at /build/qemu-6MF7tq/qemu-5.0.1/util/qemu-thread-posix.c:519
  #21 0x7f61085d06ba in start_thread ()
 from /lib/x86_64-linux-gnu/libpthread.so.0
  #22 0x7f610830641d in sysctl () from /lib/x86_64-linux-gnu/libc.so.6
  #23 0x in ?? ()

Fix it by checking that the iochannel is non-null before trying to
detach it from the aio_context.  If it is null, no detaching is needed,
and it will get reattached in the proper aio_context once the connection
is reestablished.

Signed-off-by: Roman Kagan 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20210129073859.683063-2-rvka...@yandex-team.ru>
Signed-off-by: Eric Blake 
---
 block/nbd.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/nbd.c b/block/nbd.c
index 42e10c7c93f5..bcd6641e90f5 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -235,7 +235,14 @@ static void nbd_client_detach_aio_context(BlockDriverState 
*bs)

 /* Timer is deleted in nbd_client_co_drain_begin() */
 assert(!s->reconnect_delay_timer);
-qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
+/*
+ * If reconnect is in progress we may have no ->ioc.  It will be
+ * re-instantiated in the proper aio context once the connection is
+ * reestablished.
+ */
+if (s->ioc) {
+qio_channel_detach_aio_context(QIO_CHANNEL(s->ioc));
+

[PULL 15/20] block/io: support int64_t bytes in bdrv_co_p{read, write}v_part()

From: Vladimir Sementsov-Ogievskiy 

We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

So, prepare bdrv_co_preadv_part() and bdrv_co_pwritev_part() and their
remaining dependencies now.

bdrv_pad_request() is updated simultaneously, as pointer to bytes passed
to it both from bdrv_co_pwritev_part() and bdrv_co_preadv_part().

So, all callers of bdrv_pad_request() are updated to pass 64bit bytes.
bdrv_pad_request() is already good for 64bit requests, add
corresponding assertion.

Look at bdrv_co_preadv_part() and bdrv_co_pwritev_part().
Type is widening, so callers are safe. Let's look inside the functions.

In bdrv_co_preadv_part() and bdrv_aligned_pwritev() we only pass bytes
to other already int64_t interfaces (and some obviously safe
calculations), it's OK.

In bdrv_co_do_zero_pwritev() aligned_bytes may become large now, still
it's passed to bdrv_aligned_pwritev which supports int64_t bytes.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-15-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 include/block/block_int.h |  4 ++--
 block/io.c| 14 --
 block/trace-events|  4 ++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7f41f0990cc0..f2ad8aa771c3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1035,13 +1035,13 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
 BdrvRequestFlags flags);
 int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
-int64_t offset, unsigned int bytes,
+int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
 int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
 BdrvRequestFlags flags);
 int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
-int64_t offset, unsigned int bytes,
+int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);

 static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
diff --git a/block/io.c b/block/io.c
index cef284e3a189..7b6b0027bc4c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1725,11 +1725,13 @@ static void bdrv_padding_destroy(BdrvRequestPadding 
*pad)
  */
 static int bdrv_pad_request(BlockDriverState *bs,
 QEMUIOVector **qiov, size_t *qiov_offset,
-int64_t *offset, unsigned int *bytes,
+int64_t *offset, int64_t *bytes,
 BdrvRequestPadding *pad, bool *padded)
 {
 int ret;

+bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, 
&error_abort);
+
 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
 if (padded) {
 *padded = false;
@@ -1764,7 +1766,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
 }

 int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
-int64_t offset, unsigned int bytes,
+int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, size_t qiov_offset,
 BdrvRequestFlags flags)
 {
@@ -1773,7 +1775,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
 BdrvRequestPadding pad;
 int ret;

-trace_bdrv_co_preadv(bs, offset, bytes, flags);
+trace_bdrv_co_preadv_part(bs, offset, bytes, flags);

 if (!bdrv_is_inserted(bs)) {
 return -ENOMEDIUM;
@@ -2117,7 +2119,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild 
*child,

 static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
 int64_t offset,
-unsigned int bytes,
+int64_t bytes,
 BdrvRequestFlags flags,
 BdrvTrackedRequest *req)
 {
@@ -2191,7 +2193,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
 }

 int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
-int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
+int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
 BdrvRequestFlags flags)
 {
 BlockDriverState *bs = child->bs;
@@ -2201,7 +2203,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 int ret;
 bool padded = false;

-trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
+trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);

 if (!bdrv_is_inserted(bs)) {
 return -ENOMEDIUM;
dif

Re: [PATCH v5 1/2] drivers/misc: sysgenid: add system generation id driver

2021-02-02 Thread Randy Dunlap

Hi--

On 2/1/21 9:24 AM, Adrian Catangiu wrote:
> - Background and problem
> 
> The System Generation ID feature is required in virtualized or
> containerized environments by applications that work with local copies
> or caches of world-unique data such as random values, uuids,
> monotonically increasing counters, etc.

  ... if those applications want to comply with .

> Such applications can be negatively affected by VM or container
> snapshotting when the VM or container is either cloned or returned to
> an earlier point in time.


> Signed-off-by: Adrian Catangiu 
> ---
>  Documentation/misc-devices/sysgenid.rst| 236 
>  Documentation/userspace-api/ioctl/ioctl-number.rst |   1 +
>  MAINTAINERS|   8 +
>  drivers/misc/Kconfig   |  16 ++
>  drivers/misc/Makefile  |   1 +
>  drivers/misc/sysgenid.c| 307 
> +
>  include/uapi/linux/sysgenid.h  |  17 ++
>  7 files changed, 586 insertions(+)
>  create mode 100644 Documentation/misc-devices/sysgenid.rst
>  create mode 100644 drivers/misc/sysgenid.c
>  create mode 100644 include/uapi/linux/sysgenid.h
> 
> diff --git a/Documentation/misc-devices/sysgenid.rst 
> b/Documentation/misc-devices/sysgenid.rst
> new file mode 100644
> index 000..4337ca0
> --- /dev/null
> +++ b/Documentation/misc-devices/sysgenid.rst
> @@ -0,0 +1,236 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +
> +SYSGENID
> +
> +
> +The System Generation ID feature is required in virtualized or
> +containerized environments by applications that work with local copies
> +or caches of world-unique data such as random values, UUIDs,
> +monotonically increasing counters, etc.
> +Such applications can be negatively affected by VM or container
> +snapshotting when the VM or container is either cloned or returned to
> +an earlier point in time.
> +
> +The System Generation ID is a simple concept meant to alleviate the
> +issue by providing a monotonically increasing counter that changes
> +each time the VM or container is restored from a snapshot.
> +The driver for it lives at ``drivers/misc/sysgenid.c``.
> +
> +The ``sysgenid`` driver exposes a monotonic incremental System
> +Generation u32 counter via a char-dev FS interface accessible through

s/FS/filesystem/

> +``/dev/sysgenid`` that provides sync and async SysGen counter update
> +notifications. It also provides SysGen counter retrieval and
> +confirmation mechanisms.
> +
> +The counter starts from zero when the driver is initialized and
> +monotonically increments every time the system generation changes.
> +
> +The ``sysgenid`` driver exports the ``void sysgenid_bump_generation()``
> +symbol which can be used by backend drivers to drive system generation
> +changes based on hardware events.
> +System generation changes can also be driven by userspace software
> +through a dedicated driver ioctl.
> +
> +Userspace applications or libraries can (a)synchronously consume the
> +system generation counter through the provided FS interface, to make

s/FS/filesystem/

> +any necessary internal adjustments following a system generation update.
> +
> +Driver FS interface:
> +
> +``open()``:
> +  When the device is opened, a copy of the current Sys-Gen-Id (counter)
> +  is associated with the open file descriptor. The driver now tracks
> +  this file as an independent *watcher*. The driver tracks how many
> +  watchers are aware of the latest Sys-Gen-Id counter and how many of
> +  them are *outdated*; outdated being those that have lived through
> +  a Sys-Gen-Id change but not yet confirmed the new generation counter.
> +
> +``read()``:
> +  Read is meant to provide the *new* system generation counter when a
> +  generation change takes place. The read operation blocks until the
> +  associated counter is no longer up to date, at which point the new
> +  counter is provided/returned.
> +  Nonblocking ``read()`` uses ``EAGAIN`` to signal that there is no
> +  *new* counter value available. The generation counter is considered
> +  *new* for each open file descriptor that hasn't confirmed the new
> +  value following a generation change. Therefore, once a generation
> +  change takes place, all ``read()`` calls will immediately return the
> +  new generation counter and will continue to do so until the
> +  new value is confirmed back to the driver through ``write()``.
> +  Partial reads are not allowed - read buffer needs to be at least
> +  32 bits in size.
> +
> +``write()``:
> +  Write is used to confirm the up-to-date Sys Gen counter back to the
> +  driver.
> +  Following a VM generation change, all existing watchers are marked
> +  as *outdated*. Each file descriptor will maintain the *outdated*
> +  status until a ``write()`` confirms the up-to-date counter back to
> +  the driver.
> +  Partial writes are not allowed - write buffer should be e

[PULL 12/20] block/io: support int64_t bytes in bdrv_aligned_pwritev()

From: Vladimir Sementsov-Ogievskiy 

We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

So, prepare bdrv_aligned_pwritev() now and convert the dependencies:
bdrv_co_write_req_prepare() and bdrv_co_write_req_finish() to signed
type bytes.

Conversion of bdrv_co_write_req_prepare() and
bdrv_co_write_req_finish() is definitely safe, as all requests in
block/io must not overflow BDRV_MAX_LENGTH. Still add assertions.

For bdrv_aligned_pwritev() 'bytes' type is widened, so callers are
safe. Let's check usage of the parameter inside the function.

Passing to bdrv_co_write_req_prepare() and bdrv_co_write_req_finish()
is OK.

Passing to qemu_iovec_* is OK after new assertion. All other callees
are already updated to int64_t.

Checking alignment is not changed, offset + bytes and qiov_offset +
bytes calculations are safe (thanks to new assertions).

max_transfer is kept to be int for now. It has a default of INT_MAX
here, and some drivers may rely on it. It's to be refactored later.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-12-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 block/io.c | 21 +
 block/io.c.rej | 16 
 2 files changed, 13 insertions(+), 24 deletions(-)
 delete mode 100644 block/io.c.rej

diff --git a/block/io.c b/block/io.c
index 98d9f5bdf48a..59ae0a110da1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1932,11 +1932,12 @@ fail:
 }

 static inline int coroutine_fn
-bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
+bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
   BdrvTrackedRequest *req, int flags)
 {
 BlockDriverState *bs = child->bs;
-int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+
+bdrv_check_request(offset, bytes, &error_abort);

 if (bs->read_only) {
 return -EPERM;
@@ -1963,7 +1964,8 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t 
offset, uint64_t bytes,

 assert(req->overlap_offset <= offset);
 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
+assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
+   child->perm & BLK_PERM_RESIZE);

 switch (req->type) {
 case BDRV_TRACKED_WRITE:
@@ -1984,12 +1986,14 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t 
offset, uint64_t bytes,
 }

 static inline void coroutine_fn
-bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
+bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
  BdrvTrackedRequest *req, int ret)
 {
 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 BlockDriverState *bs = child->bs;

+bdrv_check_request(offset, bytes, &error_abort);
+
 qatomic_inc(&bs->write_gen);

 /*
@@ -2026,16 +2030,18 @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t 
offset, uint64_t bytes,
  * after possibly fragmenting it.
  */
 static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
-BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
 BlockDriverState *bs = child->bs;
 BlockDriver *drv = bs->drv;
 int ret;

-uint64_t bytes_remaining = bytes;
+int64_t bytes_remaining = bytes;
 int max_transfer;

+bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
+
 if (!drv) {
 return -ENOMEDIUM;
 }
@@ -2047,7 +2053,6 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild 
*child,
 assert(is_power_of_2(align));
 assert((offset & (align - 1)) == 0);
 assert((bytes & (align - 1)) == 0);
-assert(!qiov || qiov_offset + bytes <= qiov->size);
 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);

@@ -2146,7 +2151,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild 
*child,
 assert(!bytes || (offset & (align - 1)) == 0);
 if (bytes >= align) {
 /* Write the aligned part in the middle. */
-uint64_t aligned_bytes = bytes & ~(align - 1);
+int64_t aligned_bytes = bytes & ~(align - 1);
 ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, 0, flags);
 if (ret < 0) {
diff --git a/block/io.c.rej b/block/io.c.rej
deleted file mode 100644
index ae2f972d1af9..
--- a/block/io.

[PULL 17/20] block/io: use int64_t bytes in copy_range

From: Vladimir Sementsov-Ogievskiy 

We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

So, convert now copy_range parameters which are already 64bit to signed
type.

It's safe as we don't work with requests overflowing BDRV_MAX_LENGTH
(which is less than INT64_MAX), and do check the requests in
bdrv_co_copy_range_internal() (by bdrv_check_request32(), which calls
bdrv_check_request()).

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-17-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 include/block/block.h |  6 +++---
 include/block/block_int.h | 12 ++--
 block/io.c| 22 +++---
 block/trace-events|  4 ++--
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 5f28d0d33f5c..0a9f2c187cdb 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -845,8 +845,8 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host);
  *
  * Returns: 0 if succeeded; negative error code if failed.
  **/
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
-BdrvChild *dst, uint64_t dst_offset,
-uint64_t bytes, BdrvRequestFlags 
read_flags,
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+BdrvChild *dst, int64_t dst_offset,
+int64_t bytes, BdrvRequestFlags read_flags,
 BdrvRequestFlags write_flags);
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 749d1fb9d049..22a2789d3516 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1357,14 +1357,14 @@ void bdrv_dec_in_flight(BlockDriverState *bs);

 void blockdev_close_all_bdrv_states(void);

-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
- BdrvChild *dst, uint64_t dst_offset,
- uint64_t bytes,
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
  BdrvRequestFlags read_flags,
  BdrvRequestFlags write_flags);
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
-   BdrvChild *dst, uint64_t dst_offset,
-   uint64_t bytes,
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+   BdrvChild *dst, int64_t dst_offset,
+   int64_t bytes,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);

diff --git a/block/io.c b/block/io.c
index 8817c8496663..b0435ed6707c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -3201,8 +3201,8 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
 }

 static int coroutine_fn bdrv_co_copy_range_internal(
-BdrvChild *src, uint64_t src_offset, BdrvChild *dst,
-uint64_t dst_offset, uint64_t bytes,
+BdrvChild *src, int64_t src_offset, BdrvChild *dst,
+int64_t dst_offset, int64_t bytes,
 BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
 bool recurse_src)
 {
@@ -3280,9 +3280,9 @@ static int coroutine_fn bdrv_co_copy_range_internal(
  *
  * See the comment of bdrv_co_copy_range for the parameter and return value
  * semantics. */
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
- BdrvChild *dst, uint64_t dst_offset,
- uint64_t bytes,
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
  BdrvRequestFlags read_flags,
  BdrvRequestFlags write_flags)
 {
@@ -3296,9 +3296,9 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, 
uint64_t src_offset,
  *
  * See the comment of bdrv_co_copy_range for the parameter and return value
  * semantics. */
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
-   BdrvChild *dst, ui

[PATCH] acpi: use constants as strncpy limit

gcc is not smart enough to figure out length was validated before use as
strncpy limit, resulting in this warning:

inlined from ‘virt_set_oem_table_id’ at ../../hw/arm/virt.c:2197:5:
/usr/include/aarch64-linux-gnu/bits/string_fortified.h:106:10: error:
‘__builtin_strncpy’ specified bound depends on the length of the
source argument [-Werror=stringop-overflow=]

Simplify things by using a constant limit instead.

Fixes: 97fc5d507fca ("acpi: Permit OEM ID and OEM table ID fields to be 
changed")
Signed-off-by: Michael S. Tsirkin 
---
 hw/arm/virt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ecb0e14816..f538194e32 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2107,7 +2107,7 @@ static void virt_set_oem_id(Object *obj, const char 
*value, Error **errp)
 return;
 }
 
-strncpy(vms->oem_id, value, len + 1);
+strncpy(vms->oem_id, value, 6);
 }
 
 static char *virt_get_oem_table_id(Object *obj, Error **errp)
@@ -2128,7 +2128,7 @@ static void virt_set_oem_table_id(Object *obj, const char 
*value,
"User specified oem-table-id value is bigger than 8 bytes 
in size");
 return;
 }
-strncpy(vms->oem_table_id, value, len + 1);
+strncpy(vms->oem_table_id, value, 8);
 }
 
 
-- 
MST

[PULL 06/20] block/io: bdrv_pad_request(): support qemu_iovec_init_extended failure

From: Vladimir Sementsov-Ogievskiy 

Make bdrv_pad_request() honest: return error if
qemu_iovec_init_extended() failed.

Update also bdrv_padding_destroy() to clean the structure for safety.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-6-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 block/io.c | 45 +++--
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/block/io.c b/block/io.c
index 3b1aec366ede..39d943c33a39 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1665,6 +1665,7 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
 qemu_vfree(pad->buf);
 qemu_iovec_destroy(&pad->local_qiov);
 }
+memset(pad, 0, sizeof(*pad));
 }

 /*
@@ -1674,33 +1675,42 @@ static void bdrv_padding_destroy(BdrvRequestPadding 
*pad)
  * read of padding, bdrv_padding_rmw_read() should be called separately if
  * needed.
  *
- * All parameters except @bs are in-out: they represent original request at
- * function call and padded (if padding needed) at function finish.
- *
- * Function always succeeds.
+ * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
+ *  - on function start they represent original request
+ *  - on failure or when padding is not needed they are unchanged
+ *  - on success when padding is needed they represent padded request
  */
-static bool bdrv_pad_request(BlockDriverState *bs,
- QEMUIOVector **qiov, size_t *qiov_offset,
- int64_t *offset, unsigned int *bytes,
- BdrvRequestPadding *pad)
+static int bdrv_pad_request(BlockDriverState *bs,
+QEMUIOVector **qiov, size_t *qiov_offset,
+int64_t *offset, unsigned int *bytes,
+BdrvRequestPadding *pad, bool *padded)
 {
 int ret;

 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
-return false;
+if (padded) {
+*padded = false;
+}
+return 0;
 }

 ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
*qiov, *qiov_offset, *bytes,
pad->buf + pad->buf_len - pad->tail,
pad->tail);
-assert(ret == 0);
+if (ret < 0) {
+bdrv_padding_destroy(pad);
+return ret;
+}
 *bytes += pad->head + pad->tail;
 *offset -= pad->head;
 *qiov = &pad->local_qiov;
 *qiov_offset = 0;
+if (padded) {
+*padded = true;
+}

-return true;
+return 0;
 }

 int coroutine_fn bdrv_co_preadv(BdrvChild *child,
@@ -1750,7 +1760,11 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
 flags |= BDRV_REQ_COPY_ON_READ;
 }

-bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
+ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
+   NULL);
+if (ret < 0) {
+return ret;
+}

 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
 ret = bdrv_aligned_preadv(child, &req, offset, bytes,
@@ -2173,8 +2187,11 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
  * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
  * alignment only if there is no ZERO flag.
  */
-padded = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes,
-  &pad);
+ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
+   &padded);
+if (ret < 0) {
+return ret;
+}
 }

 bdrv_inc_in_flight(bs);
-- 
2.30.0

[PULL 08/20] block/io: improve bdrv_check_request: check qiov too

From: Vladimir Sementsov-Ogievskiy 

Operations with qiov add more restrictions on bytes, let's cover it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-8-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 block/io.c | 46 +++---
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/block/io.c b/block/io.c
index 39d943c33a39..b56db913da30 100644
--- a/block/io.c
+++ b/block/io.c
@@ -920,8 +920,14 @@ bool coroutine_fn 
bdrv_make_request_serialising(BdrvTrackedRequest *req,
 return waited;
 }

-int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
+static int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+   QEMUIOVector *qiov, size_t qiov_offset,
+   Error **errp)
 {
+/*
+ * Check generic offset/bytes correctness
+ */
+
 if (offset < 0) {
 error_setg(errp, "offset is negative: %" PRIi64, offset);
 return -EIO;
@@ -951,12 +957,38 @@ int bdrv_check_request(int64_t offset, int64_t bytes, 
Error **errp)
 return -EIO;
 }

+if (!qiov) {
+return 0;
+}
+
+/*
+ * Check qiov and qiov_offset
+ */
+
+if (qiov_offset > qiov->size) {
+error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
+   qiov_offset, qiov->size);
+return -EIO;
+}
+
+if (bytes > qiov->size - qiov_offset) {
+error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
+   "vector size(%zu)", bytes, qiov_offset, qiov->size);
+return -EIO;
+}
+
 return 0;
 }

-static int bdrv_check_request32(int64_t offset, int64_t bytes)
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
 {
-int ret = bdrv_check_request(offset, bytes, NULL);
+return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
+}
+
+static int bdrv_check_request32(int64_t offset, int64_t bytes,
+QEMUIOVector *qiov, size_t qiov_offset)
+{
+int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
 if (ret < 0) {
 return ret;
 }
@@ -1736,7 +1768,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
 return -ENOMEDIUM;
 }

-ret = bdrv_check_request32(offset, bytes);
+ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
 if (ret < 0) {
 return ret;
 }
@@ -2157,7 +2189,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 return -ENOMEDIUM;
 }

-ret = bdrv_check_request32(offset, bytes);
+ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
 if (ret < 0) {
 return ret;
 }
@@ -3163,7 +3195,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(
 if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
 return -ENOMEDIUM;
 }
-ret = bdrv_check_request32(dst_offset, bytes);
+ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
 if (ret) {
 return ret;
 }
@@ -3174,7 +3206,7 @@ static int coroutine_fn bdrv_co_copy_range_internal(
 if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
 return -ENOMEDIUM;
 }
-ret = bdrv_check_request32(src_offset, bytes);
+ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
 if (ret) {
 return ret;
 }
-- 
2.30.0

[PULL 07/20] block/throttle-groups: throttle_group_co_io_limits_intercept(): 64bit bytes

From: Vladimir Sementsov-Ogievskiy 

The function is called from 64bit io handlers, and bytes is just passed
to throttle_account() which is 64bit too (unsigned though). So, let's
convert intermediate argument to 64bit too.

This patch is a first in the 64-bit-blocklayer series, so we are
generally moving to int64_t for both offset and bytes parameters on all
io paths. Main motivation is realization of 64-bit write_zeroes
operation for fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

Patch-correctness audit by Eric Blake:

  Caller has 32-bit, this patch now causes widening which is safe:
  block/block-backend.c: blk_do_preadv() passes 'unsigned int'
  block/block-backend.c: blk_do_pwritev_part() passes 'unsigned int'
  block/throttle.c: throttle_co_pwrite_zeroes() passes 'int'
  block/throttle.c: throttle_co_pdiscard() passes 'int'

  Caller has 64-bit, this patch fixes potential bug where pre-patch
  could narrow, except it's easy enough to trace that callers are still
  capped at 2G actions:
  block/throttle.c: throttle_co_preadv() passes 'uint64_t'
  block/throttle.c: throttle_co_pwritev() passes 'uint64_t'

  Implementation in question: block/throttle-groups.c
  throttle_group_co_io_limits_intercept() takes 'unsigned int bytes'
  and uses it: argument to util/throttle.c throttle_account(uint64_t)

  All safe: it patches a latent bug, and does not introduce any 64-bit
  gotchas once throttle_co_p{read,write}v are relaxed, and assuming
  throttle_account() is not buggy.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Message-Id: <20201211183934.169161-7-vsement...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 include/block/throttle-groups.h | 2 +-
 block/throttle-groups.c | 5 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
index 8bf7d233fae5..9541b3243280 100644
--- a/include/block/throttle-groups.h
+++ b/include/block/throttle-groups.h
@@ -77,7 +77,7 @@ void throttle_group_unregister_tgm(ThrottleGroupMember *tgm);
 void throttle_group_restart_tgm(ThrottleGroupMember *tgm);

 void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember 
*tgm,
-unsigned int bytes,
+int64_t bytes,
 bool is_write);
 void throttle_group_attach_aio_context(ThrottleGroupMember *tgm,
AioContext *new_context);
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index abd16ed9dbfd..fb203c3ced4a 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -358,12 +358,15 @@ static void schedule_next_request(ThrottleGroupMember 
*tgm, bool is_write)
  * @is_write:  the type of operation (read/write)
  */
 void coroutine_fn throttle_group_co_io_limits_intercept(ThrottleGroupMember 
*tgm,
-unsigned int bytes,
+int64_t bytes,
 bool is_write)
 {
 bool must_wait;
 ThrottleGroupMember *token;
 ThrottleGroup *tg = container_of(tgm->throttle_state, ThrottleGroup, ts);
+
+assert(bytes >= 0);
+
 qemu_mutex_lock(&tg->lock);

 /* First we check if this I/O has to be throttled. */
-- 
2.30.0

[PULL 05/20] block/io: refactor bdrv_pad_request(): move bdrv_pad_request() up

From: Vladimir Sementsov-Ogievskiy 

Prepare for the following patch when bdrv_pad_request() will be able to
fail. Update the comments.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-5-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
[eblake: grammar tweak]
Signed-off-by: Eric Blake 
---
 block/io.c | 25 +++--
 block/io.c.rej | 40 
 2 files changed, 59 insertions(+), 6 deletions(-)
 create mode 100644 block/io.c.rej

diff --git a/block/io.c b/block/io.c
index c8c9dea55466..3b1aec366ede 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2135,6 +2135,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 uint64_t align = bs->bl.request_alignment;
 BdrvRequestPadding pad;
 int ret;
+bool padded = false;

 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);

@@ -2166,20 +2167,32 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 return 0;
 }

+if (!(flags & BDRV_REQ_ZERO_WRITE)) {
+/*
+ * Pad request for following read-modify-write cycle.
+ * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
+ * alignment only if there is no ZERO flag.
+ */
+padded = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes,
+  &pad);
+}
+
 bdrv_inc_in_flight(bs);
-/*
- * Align write if necessary by performing a read-modify-write cycle.
- * Pad qiov with the read parts and be sure to have a tracked request not
- * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
- */
 tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);

 if (flags & BDRV_REQ_ZERO_WRITE) {
+assert(!padded);
 ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
 goto out;
 }

-if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
+if (padded) {
+/*
+ * Request was unaligned to request_alignment and therefore
+ * padded.  We are going to do read-modify-write, and must
+ * serialize the request to prevent interactions of the
+ * widened region with other transactions.
+ */
 bdrv_make_request_serialising(&req, align);
 bdrv_padding_rmw_read(child, &req, &pad, false);
 }
diff --git a/block/io.c.rej b/block/io.c.rej
new file mode 100644
index ..f52df016263b
--- /dev/null
+++ b/block/io.c.rej
@@ -0,0 +1,40 @@
+diff a/block/io.c b/block/io.c (rejected hunks)
+@@ -2138,20 +2139,32 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+ return 0;
+ }
+ 
++if (!(flags & BDRV_REQ_ZERO_WRITE)) {
++/*
++ * Pad request for following read-modify-write cycle.
++ * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
++ * alignment only if there is no ZERO flag.
++ */
++padded = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes,
++  &pad);
++}
++
+ bdrv_inc_in_flight(bs);
+-/*
+- * Align write if necessary by performing a read-modify-write cycle.
+- * Pad qiov with the read parts and be sure to have a tracked request not
+- * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
+- */
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
+ 
+ if (flags & BDRV_REQ_ZERO_WRITE) {
++assert(!padded);
+ ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
+ goto out;
+ }
+ 
+-if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
++if (padded) {
++/*
++ * Request was unaligned to request_alignment and therefore padded.
++ * We are going to do read-modify-write. User is not prepared to 
widened
++ * request intersections with other requests, so we serialize the
++ * request.
++ */
+ bdrv_mark_request_serialising(&req, align);
+ bdrv_padding_rmw_read(child, &req, &pad, false);
+ }
-- 
2.30.0

Re: iotest failures in head [was: [PATCH v4 00/16] 64bit block-layer: part I]

2021-02-02 Thread Peter Maydell

On Tue, 2 Feb 2021 at 17:09, Vladimir Sementsov-Ogievskiy
 wrote:
> Note that 30 is known to crash sometimes. Look at
>
> "[PATCH RFC 0/5] Fix accidental crash in iotest 30"
>
> https://patchew.org/QEMU/20201120161622.1537-1-vsement...@virtuozzo.com/

It certainly seems to be the least reliable iotest in my experience.
For example it just fell over on ppc64 on MST's latest pullreq merge:

https://lore.kernel.org/qemu-devel/cafeaca8az6qtljp00fyqyuwtqk0tafyupjw0feeppmmvfou...@mail.gmail.com/

thanks
-- PMM

[PULL 14/20] block/io: support int64_t bytes in bdrv_aligned_preadv()

From: Vladimir Sementsov-Ogievskiy 

We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

So, prepare bdrv_aligned_preadv() now.

Make the bytes variable in bdrv_padding_rmw_read() int64_t, as it is
only used for pass-through to bdrv_aligned_preadv().

All bdrv_aligned_preadv() callers are safe as type is widening. Let's
look inside:

 - add a new-style assertion that request is good.
 - callees bdrv_is_allocated(), bdrv_co_do_copy_on_readv() supports
   int64_t bytes
 - conversion of bytes_remaining is OK, as we never have requests
   overflowing BDRV_MAX_LENGTH
 - looping through bytes_remaining is ok, num is updated to int64_t
   - for bdrv_driver_preadv we have same limit of max_transfer
   - qemu_iovec_memset is OK, as bytes+qiov_offset should not overflow
 qiov->size anyway (thanks to bdrv_check_qiov_request())

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-14-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
[eblake: grammar tweak]
Signed-off-by: Eric Blake 
---
 block/io.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/io.c b/block/io.c
index 63b0fa0e9ed7..cef284e3a189 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1475,15 +1475,16 @@ err:
  * reads; any other features must be implemented by the caller.
  */
 static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
-BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
 int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
 {
 BlockDriverState *bs = child->bs;
 int64_t total_bytes, max_bytes;
 int ret = 0;
-uint64_t bytes_remaining = bytes;
+int64_t bytes_remaining = bytes;
 int max_transfer;

+bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
 assert(is_power_of_2(align));
 assert((offset & (align - 1)) == 0);
 assert((bytes & (align - 1)) == 0);
@@ -1545,7 +1546,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 }

 while (bytes_remaining) {
-int num;
+int64_t num;

 if (max_bytes) {
 num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
@@ -1652,7 +1653,7 @@ static int bdrv_padding_rmw_read(BdrvChild *child,
 assert(req->serialising && pad->buf);

 if (pad->head || pad->merge_reads) {
-uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
+int64_t bytes = pad->merge_reads ? pad->buf_len : align;

 qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);

-- 
2.30.0

[PULL 03/20] util/iov: make qemu_iovec_init_extended() honest

From: Vladimir Sementsov-Ogievskiy 

Actually, we can't extend the io vector in all cases. Handle possible
MAX_IOV and size_t overflows.

For now add assertion to callers (actually they rely on success anyway)
and fix them in the following patch.

Add also some additional good assertions to qemu_iovec_init_slice()
while being here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-3-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 include/qemu/iov.h |  2 +-
 block/io.c | 10 +++---
 util/iov.c | 25 +++--
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/include/qemu/iov.h b/include/qemu/iov.h
index b6b283a5e5c1..93307466809b 100644
--- a/include/qemu/iov.h
+++ b/include/qemu/iov.h
@@ -222,7 +222,7 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)

 void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
 void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
-void qemu_iovec_init_extended(
+int qemu_iovec_init_extended(
 QEMUIOVector *qiov,
 void *head_buf, size_t head_len,
 QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
diff --git a/block/io.c b/block/io.c
index 23abdae79468..ab953bd58f48 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1680,13 +1680,17 @@ static bool bdrv_pad_request(BlockDriverState *bs,
  int64_t *offset, unsigned int *bytes,
  BdrvRequestPadding *pad)
 {
+int ret;
+
 if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
 return false;
 }

-qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
- *qiov, *qiov_offset, *bytes,
- pad->buf + pad->buf_len - pad->tail, pad->tail);
+ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
+   *qiov, *qiov_offset, *bytes,
+   pad->buf + pad->buf_len - pad->tail,
+   pad->tail);
+assert(ret == 0);
 *bytes += pad->head + pad->tail;
 *offset -= pad->head;
 *qiov = &pad->local_qiov;
diff --git a/util/iov.c b/util/iov.c
index f3a9e92a378f..58c7b35f 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -415,7 +415,7 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t 
offset, size_t len)
  * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
  * and @tail_buf buffer into new qiov.
  */
-void qemu_iovec_init_extended(
+int qemu_iovec_init_extended(
 QEMUIOVector *qiov,
 void *head_buf, size_t head_len,
 QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
@@ -425,12 +425,24 @@ void qemu_iovec_init_extended(
 int total_niov, mid_niov = 0;
 struct iovec *p, *mid_iov = NULL;

+assert(mid_qiov->niov <= IOV_MAX);
+
+if (SIZE_MAX - head_len < mid_len ||
+SIZE_MAX - head_len - mid_len < tail_len)
+{
+return -EINVAL;
+}
+
 if (mid_len) {
 mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
  &mid_head, &mid_tail, &mid_niov);
 }

 total_niov = !!head_len + mid_niov + !!tail_len;
+if (total_niov > IOV_MAX) {
+return -EINVAL;
+}
+
 if (total_niov == 1) {
 qemu_iovec_init_buf(qiov, NULL, 0);
 p = &qiov->local_iov;
@@ -459,6 +471,8 @@ void qemu_iovec_init_extended(
 p->iov_base = tail_buf;
 p->iov_len = tail_len;
 }
+
+return 0;
 }

 /*
@@ -492,7 +506,14 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, 
size_t bytes)
 void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
size_t offset, size_t len)
 {
-qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
+int ret;
+
+assert(source->size >= len);
+assert(source->size - len >= offset);
+
+/* We shrink the request, so we can't overflow neither size_t nor MAX_IOV 
*/
+ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 
0);
+assert(ret == 0);
 }

 void qemu_iovec_destroy(QEMUIOVector *qiov)
-- 
2.30.0

[PULL 02/20] block: refactor bdrv_check_request: add errp

From: Vladimir Sementsov-Ogievskiy 

It's better to pass &error_abort than just assert that result is 0: on
crash, we'll immediately see the reason in the backtrace.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-2-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
[eblake: fix iotest 206 fallout]
Signed-off-by: Eric Blake 
---
 include/block/block_int.h|  2 +-
 block/io.c   | 29 ++---
 block/file-posix.c   |  2 +-
 tests/test-write-threshold.c |  5 +++--
 tests/qemu-iotests/206.out   |  2 +-
 5 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index d01fc2372028..5bbbf9ee0af9 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -93,7 +93,7 @@ typedef struct BdrvTrackedRequest {
 struct BdrvTrackedRequest *waiting_for;
 } BdrvTrackedRequest;

-int bdrv_check_request(int64_t offset, int64_t bytes);
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);

 struct BlockDriver {
 const char *format_name;
diff --git a/block/io.c b/block/io.c
index d203435a73d6..23abdae79468 100644
--- a/block/io.c
+++ b/block/io.c
@@ -920,17 +920,34 @@ bool coroutine_fn 
bdrv_make_request_serialising(BdrvTrackedRequest *req,
 return waited;
 }

-int bdrv_check_request(int64_t offset, int64_t bytes)
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
 {
-if (offset < 0 || bytes < 0) {
+if (offset < 0) {
+error_setg(errp, "offset is negative: %" PRIi64, offset);
+return -EIO;
+}
+
+if (bytes < 0) {
+error_setg(errp, "bytes is negative: %" PRIi64, bytes);
 return -EIO;
 }

 if (bytes > BDRV_MAX_LENGTH) {
+error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
+   bytes, BDRV_MAX_LENGTH);
+return -EIO;
+}
+
+if (offset > BDRV_MAX_LENGTH) {
+error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
+   offset, BDRV_MAX_LENGTH);
 return -EIO;
 }

 if (offset > BDRV_MAX_LENGTH - bytes) {
+error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
+   "exceeds maximum(%" PRIi64 ")", offset, bytes,
+   BDRV_MAX_LENGTH);
 return -EIO;
 }

@@ -939,7 +956,7 @@ int bdrv_check_request(int64_t offset, int64_t bytes)

 static int bdrv_check_request32(int64_t offset, int64_t bytes)
 {
-int ret = bdrv_check_request(offset, bytes);
+int ret = bdrv_check_request(offset, bytes, NULL);
 if (ret < 0) {
 return ret;
 }
@@ -2847,7 +2864,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, 
int64_t offset,
 return -EPERM;
 }

-ret = bdrv_check_request(offset, bytes);
+ret = bdrv_check_request(offset, bytes, NULL);
 if (ret < 0) {
 return ret;
 }
@@ -3249,10 +3266,8 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, 
int64_t offset, bool exact,
 return -EINVAL;
 }

-ret = bdrv_check_request(offset, 0);
+ret = bdrv_check_request(offset, 0, errp);
 if (ret < 0) {
-error_setg(errp, "Required too big image size, it must be not greater "
-   "than %" PRId64, BDRV_MAX_LENGTH);
 return ret;
 }

diff --git a/block/file-posix.c b/block/file-posix.c
index 11aafa9d82b2..05079b40caee 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2969,7 +2969,7 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t 
offset, int bytes,

 req->bytes = BDRV_MAX_LENGTH - req->offset;

-assert(bdrv_check_request(req->offset, req->bytes) == 0);
+bdrv_check_request(req->offset, req->bytes, &error_abort);

 bdrv_make_request_serialising(req, bs->bl.request_alignment);
 }
diff --git a/tests/test-write-threshold.c b/tests/test-write-threshold.c
index 4cf032652dfd..fc1c45a2eb95 100644
--- a/tests/test-write-threshold.c
+++ b/tests/test-write-threshold.c
@@ -7,6 +7,7 @@
  */

 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "block/block_int.h"
 #include "block/write-threshold.h"

@@ -64,7 +65,7 @@ static void test_threshold_not_trigger(void)
 req.offset = 1024;
 req.bytes = 1024;

-assert(bdrv_check_request(req.offset, req.bytes) == 0);
+bdrv_check_request(req.offset, req.bytes, &error_abort);

 bdrv_write_threshold_set(&bs, threshold);
 amount = bdrv_write_threshold_exceeded(&bs, &req);
@@ -84,7 +85,7 @@ static void test_threshold_trigger(void)
 req.offset = (4 * 1024 * 1024) - 1024;
 req.bytes = 2 * 1024;

-assert(bdrv_check_request(req.offset, req.bytes) == 0);
+bdrv_check_request(req.offset, req.bytes, &error_abort);

 bdrv_write_threshold_set(&bs, threshold);
 amount = bdrv_write_threshold_exceeded(&bs, &req);
diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out
index e8a36de00bda..5dd589d14e47 100644
--- a/tes

[PULL 13/20] block/io: support int64_t bytes in bdrv_co_do_copy_on_readv()

From: Vladimir Sementsov-Ogievskiy 

We are generally moving to int64_t for both offset and bytes parameters
on all io paths.

Main motivation is realization of 64-bit write_zeroes operation for
fast zeroing large disk chunks, up to the whole disk.

We chose signed type, to be consistent with off_t (which is signed) and
with possibility for signed return type (where negative value means
error).

So, prepare bdrv_co_do_copy_on_readv() now.

'bytes' type widening, so callers are safe. Look at the function
itself:

bytes, skip_bytes and progress become int64_t.

bdrv_round_to_clusters() is OK, cluster_bytes now may be large.
trace_bdrv_co_do_copy_on_readv() is OK

looping through cluster_bytes is still OK.

pnum is still capped to max_transfer, and to MAX_BOUNCE_BUFFER when we
are going to do COR operation. Therefor calculations in
qemu_iovec_from_buf() and bdrv_driver_preadv() should not change.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20201211183934.169161-13-vsement...@virtuozzo.com>
Reviewed-by: Eric Blake 
Signed-off-by: Eric Blake 
---
 block/io.c | 8 +---
 block/trace-events | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/block/io.c b/block/io.c
index 59ae0a110da1..63b0fa0e9ed7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1311,7 +1311,7 @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, 
int64_t offset,
 }

 static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
-int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+int64_t offset, int64_t bytes, QEMUIOVector *qiov,
 size_t qiov_offset, int flags)
 {
 BlockDriverState *bs = child->bs;
@@ -1326,13 +1326,15 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 BlockDriver *drv = bs->drv;
 int64_t cluster_offset;
 int64_t cluster_bytes;
-size_t skip_bytes;
+int64_t skip_bytes;
 int ret;
 int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
 BDRV_REQUEST_MAX_BYTES);
-unsigned int progress = 0;
+int64_t progress = 0;
 bool skip_write;

+bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
+
 if (!drv) {
 return -ENOMEDIUM;
 }
diff --git a/block/trace-events b/block/trace-events
index ecbc32a80a91..82b5dd7cb619 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -14,7 +14,7 @@ blk_root_detach(void *child, void *blk, void *bs) "child %p 
blk %p bs %p"
 bdrv_co_preadv(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) 
"bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
 bdrv_co_pwritev(void *bs, int64_t offset, int64_t nbytes, unsigned int flags) 
"bs %p offset %"PRId64" nbytes %"PRId64" flags 0x%x"
 bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p 
offset %"PRId64" count %d flags 0x%x"
-bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t 
cluster_offset, int64_t cluster_bytes) "bs %p offset %"PRId64" bytes %u 
cluster_offset %"PRId64" cluster_bytes %"PRId64
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, int64_t bytes, int64_t 
cluster_offset, int64_t cluster_bytes) "bs %p offset %" PRId64 " bytes %" 
PRId64 " cluster_offset %" PRId64 " cluster_bytes %" PRId64
 bdrv_co_copy_range_from(void *src, uint64_t src_offset, void *dst, uint64_t 
dst_offset, uint64_t bytes, int read_flags, int write_flags) "src %p offset 
%"PRIu64" dst %p offset %"PRIu64" bytes %"PRIu64" rw flags 0x%x 0x%x"
 bdrv_co_copy_range_to(void *src, uint64_t src_offset, void *dst, uint64_t 
dst_offset, uint64_t bytes, int read_flags, int write_flags) "src %p offset 
%"PRIu64" dst %p offset %"PRIu64" bytes %"PRIu64" rw flags 0x%x 0x%x"

-- 
2.30.0

[PULL 09/20] block: use int64_t as bytes type in tracked requests