On Mon, Mar 09, 2026 at 09:09:01AM +0000, Tejus GK wrote:
> Currently, the dirty-sync-missed-zero-copy stat is incremented only when
> an entire batch of IO operations fails to use zerocopy and falls back to
> a normal copy. As long as at least one IO in the batch is successfully
> zero-copied, the whole batch is treated as a success. This hides
> individual IO fallbacks and makes the migration stat less accurate than
> it could be.
> 
> Make the stat more accurate by reporting at a finer granularity, i.e, by
> incrementing for every individual IO fallback that occurs.
> 
> Suggested-by: Peter Xu <[email protected]>
> Signed-off-by: Tejus GK <[email protected]>
> ---
>  include/io/channel-socket.h |  6 +-----
>  include/io/channel.h        |  5 ++---
>  io/channel-socket.c         | 13 ++++---------
>  migration/multifd.c         |  8 +++++---
>  qapi/migration.json         |  3 +--
>  5 files changed, 13 insertions(+), 22 deletions(-)
> 
> diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
> index a1ef3136ea..5c5714668e 100644
> --- a/include/io/channel-socket.h
> +++ b/include/io/channel-socket.h
> @@ -50,11 +50,7 @@ struct QIOChannelSocket {
>      ssize_t zero_copy_queued;
>      ssize_t zero_copy_sent;
>      bool blocking;
> -    /**
> -     * This flag indicates whether any new data was successfully sent with
> -     * zerocopy since the last qio_channel_socket_flush() call.
> -     */
> -    bool new_zero_copy_sent_success;
> +    ssize_t zero_copy_fallback;
>  };
>  
>  
> diff --git a/include/io/channel.h b/include/io/channel.h
> index 1b02350437..70f701ae16 100644
> --- a/include/io/channel.h
> +++ b/include/io/channel.h
> @@ -1013,9 +1013,8 @@ int coroutine_mixed_fn 
> qio_channel_writev_full_all(QIOChannel *ioc,
>   *
>   * If not implemented, acts as a no-op, and returns 0.
>   *
> - * Returns -1 if any error is found,
> - *          1 if every send failed to use zero copy.
> - *          0 otherwise.
> + * Returns -1 if any error is found, otherwise returns a non-negative number
> + * indicating the number of IO requests that fell back to copy.

IIUC that is *not* what the code is actually counting though.....

>   */
>  
>  int qio_channel_flush(QIOChannel *ioc,
> diff --git a/io/channel-socket.c b/io/channel-socket.c
> index 3053b35ad8..08b9862074 100644
> --- a/io/channel-socket.c
> +++ b/io/channel-socket.c
> @@ -72,7 +72,7 @@ qio_channel_socket_new(void)
>      sioc->zero_copy_queued = 0;
>      sioc->zero_copy_sent = 0;
>      sioc->blocking = false;
> -    sioc->new_zero_copy_sent_success = false;
> +    sioc->zero_copy_fallback = 0;
>  
>      ioc = QIO_CHANNEL(sioc);
>      qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
> @@ -881,8 +881,8 @@ static int qio_channel_socket_flush_internal(QIOChannel 
> *ioc,
>          sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
>  
>          /* If any sendmsg() succeeded using zero copy, mark zerocopy success 
> */
> -        if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
> -            sioc->new_zero_copy_sent_success = true;
> +        if (serr->ee_code == SO_EE_CODE_ZEROCOPY_COPIED) {
> +            sioc->zero_copy_fallback++;

...this is counting the number of MSG_ERRQUEUE items, which is not
the same as the number of IO requests. That's why we only used it
as a boolean marker originally, rather than making it a counter.

>          }
>      }
>  
> @@ -900,12 +900,7 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
>          return ret;
>      }
>  
> -    if (sioc->new_zero_copy_sent_success) {
> -        sioc->new_zero_copy_sent_success = false;
> -        return 0;
> -    }
> -
> -    return 1;
> +    return sioc->zero_copy_fallback;
>  }
>  
>  #endif /* QEMU_MSG_ZEROCOPY */
> diff --git a/migration/multifd.c b/migration/multifd.c
> index ad6261688f..726e40903d 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -593,7 +593,7 @@ void multifd_send_shutdown(void)
>  
>  static int multifd_zero_copy_flush(QIOChannel *c)
>  {
> -    int ret;
> +    ssize_t ret;
>      Error *err = NULL;
>  
>      ret = qio_channel_flush(c, &err);
> @@ -601,8 +601,10 @@ static int multifd_zero_copy_flush(QIOChannel *c)
>          error_report_err(err);
>          return -1;
>      }
> -    if (ret == 1) {
> -        qatomic_add(&mig_stats.dirty_sync_missed_zero_copy, 1);
> +
> +    if (qatomic_read(&mig_stats.dirty_sync_missed_zero_copy) != 
> (uint64_t)ret) {
> +        /* Update statistics if more fallback detected */
> +        qatomic_set(&mig_stats.dirty_sync_missed_zero_copy, (uint64_t)ret);
>      }
>  
>      return ret;
> diff --git a/qapi/migration.json b/qapi/migration.json
> index f925e5541b..94977b8810 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -58,8 +58,7 @@
>  #     (since 7.0).
>  #
>  # @dirty-sync-missed-zero-copy: Number of times dirty RAM
> -#     synchronization could not avoid copying dirty pages.  This is
> -#     between 0 and @dirty-sync-count * @multifd-channels.
> +#     synchronization could not avoid copying dirty pages.
>  #     (since 7.1)
>  #
>  # Since: 0.14
> -- 
> 2.43.7
> 

With regards,
Daniel
-- 
|: https://berrange.com       ~~        https://hachyderm.io/@berrange :|
|: https://libvirt.org          ~~          https://entangle-photo.org :|
|: https://pixelfed.art/berrange   ~~    https://fstop138.berrange.com :|


Reply via email to