On Mon, Mar 09, 2026 at 09:09:01AM +0000, Tejus GK wrote:
> Currently, the dirty-sync-missed-zero-copy stat is incremented only when
> an entire batch of IO operations fails to use zerocopy and falls back to
> a normal copy. As long as at least one IO in the batch is successfully
> zero-copied, the whole batch is treated as a success. This hides
> individual IO fallbacks and makes the migration stat less accurate than
> it could be.
>
> Make the stat more accurate by reporting at a finer granularity, i.e, by
> incrementing for every individual IO fallback that occurs.
>
> Suggested-by: Peter Xu <[email protected]>
> Signed-off-by: Tejus GK <[email protected]>
> ---
> include/io/channel-socket.h | 6 +-----
> include/io/channel.h | 5 ++---
> io/channel-socket.c | 13 ++++---------
> migration/multifd.c | 8 +++++---
> qapi/migration.json | 3 +--
> 5 files changed, 13 insertions(+), 22 deletions(-)
>
> diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
> index a1ef3136ea..5c5714668e 100644
> --- a/include/io/channel-socket.h
> +++ b/include/io/channel-socket.h
> @@ -50,11 +50,7 @@ struct QIOChannelSocket {
> ssize_t zero_copy_queued;
> ssize_t zero_copy_sent;
> bool blocking;
> - /**
> - * This flag indicates whether any new data was successfully sent with
> - * zerocopy since the last qio_channel_socket_flush() call.
> - */
> - bool new_zero_copy_sent_success;
> + ssize_t zero_copy_fallback;
> };
>
>
> diff --git a/include/io/channel.h b/include/io/channel.h
> index 1b02350437..70f701ae16 100644
> --- a/include/io/channel.h
> +++ b/include/io/channel.h
> @@ -1013,9 +1013,8 @@ int coroutine_mixed_fn
> qio_channel_writev_full_all(QIOChannel *ioc,
> *
> * If not implemented, acts as a no-op, and returns 0.
> *
> - * Returns -1 if any error is found,
> - * 1 if every send failed to use zero copy.
> - * 0 otherwise.
> + * Returns -1 if any error is found, otherwise returns a non-negative number
> + * indicating the number of IO requests that fell back to copy.
IIUC that is *not* what the code is actually counting though.....
> */
>
> int qio_channel_flush(QIOChannel *ioc,
> diff --git a/io/channel-socket.c b/io/channel-socket.c
> index 3053b35ad8..08b9862074 100644
> --- a/io/channel-socket.c
> +++ b/io/channel-socket.c
> @@ -72,7 +72,7 @@ qio_channel_socket_new(void)
> sioc->zero_copy_queued = 0;
> sioc->zero_copy_sent = 0;
> sioc->blocking = false;
> - sioc->new_zero_copy_sent_success = false;
> + sioc->zero_copy_fallback = 0;
>
> ioc = QIO_CHANNEL(sioc);
> qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
> @@ -881,8 +881,8 @@ static int qio_channel_socket_flush_internal(QIOChannel
> *ioc,
> sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
>
> /* If any sendmsg() succeeded using zero copy, mark zerocopy success
> */
> - if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
> - sioc->new_zero_copy_sent_success = true;
> + if (serr->ee_code == SO_EE_CODE_ZEROCOPY_COPIED) {
> + sioc->zero_copy_fallback++;
...this is counting the number of MSG_ERRQUEUE items, which is not
the same as the number of IO requests. That's why we only used it
as a boolean marker originally, rather than making it a counter.
> }
> }
>
> @@ -900,12 +900,7 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
> return ret;
> }
>
> - if (sioc->new_zero_copy_sent_success) {
> - sioc->new_zero_copy_sent_success = false;
> - return 0;
> - }
> -
> - return 1;
> + return sioc->zero_copy_fallback;
> }
>
> #endif /* QEMU_MSG_ZEROCOPY */
> diff --git a/migration/multifd.c b/migration/multifd.c
> index ad6261688f..726e40903d 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -593,7 +593,7 @@ void multifd_send_shutdown(void)
>
> static int multifd_zero_copy_flush(QIOChannel *c)
> {
> - int ret;
> + ssize_t ret;
> Error *err = NULL;
>
> ret = qio_channel_flush(c, &err);
> @@ -601,8 +601,10 @@ static int multifd_zero_copy_flush(QIOChannel *c)
> error_report_err(err);
> return -1;
> }
> - if (ret == 1) {
> - qatomic_add(&mig_stats.dirty_sync_missed_zero_copy, 1);
> +
> + if (qatomic_read(&mig_stats.dirty_sync_missed_zero_copy) !=
> (uint64_t)ret) {
> + /* Update statistics if more fallback detected */
> + qatomic_set(&mig_stats.dirty_sync_missed_zero_copy, (uint64_t)ret);
> }
>
> return ret;
> diff --git a/qapi/migration.json b/qapi/migration.json
> index f925e5541b..94977b8810 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -58,8 +58,7 @@
> # (since 7.0).
> #
> # @dirty-sync-missed-zero-copy: Number of times dirty RAM
> -# synchronization could not avoid copying dirty pages. This is
> -# between 0 and @dirty-sync-count * @multifd-channels.
> +# synchronization could not avoid copying dirty pages.
> # (since 7.1)
> #
> # Since: 0.14
> --
> 2.43.7
>
With regards,
Daniel
--
|: https://berrange.com ~~ https://hachyderm.io/@berrange :|
|: https://libvirt.org ~~ https://entangle-photo.org :|
|: https://pixelfed.art/berrange ~~ https://fstop138.berrange.com :|