RE: [PATCH v6 6/7] migration/multifd: implement qpl compression and decompression

2024-05-15 Thread Liu, Yuan1
> -Original Message-
> From: Fabiano Rosas 
> Sent: Tuesday, May 14, 2024 10:08 PM
> To: Liu, Yuan1 ; pet...@redhat.com
> Cc: qemu-devel@nongnu.org; Zou, Nanhai 
> Subject: RE: [PATCH v6 6/7] migration/multifd: implement qpl compression
> and decompression
> 
> "Liu, Yuan1"  writes:
> 
> >> -Original Message-
> >> From: Fabiano Rosas 
> >> Sent: Monday, May 13, 2024 11:14 PM
> >> To: Liu, Yuan1 ; pet...@redhat.com
> >> Cc: qemu-devel@nongnu.org; Liu, Yuan1 ; Zou,
> Nanhai
> >> 
> >> Subject: Re: [PATCH v6 6/7] migration/multifd: implement qpl
> compression
> >> and decompression
> >>
> >> Yuan Liu  writes:
> >>
> >> > each qpl job is used to (de)compress a normal page and it can
> >> > be processed independently by the IAA hardware. All qpl jobs
> >> > are submitted to the hardware at once, and wait for all jobs
> >> > completion. If hardware path(IAA) is not available, use software
> >> > for compression and decompression.
> >> >
> >> > Signed-off-by: Yuan Liu 
> >> > Reviewed-by: Nanhai Zou 
> >> > ---
> >> >  migration/multifd-qpl.c | 284
> +++-
> >> >  1 file changed, 280 insertions(+), 4 deletions(-)
> >> >
> >> > diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c
> >> > index 89fa51091a..9a1fddbdd0 100644
> >> > --- a/migration/multifd-qpl.c
> >> > +++ b/migration/multifd-qpl.c
> >> > @@ -13,6 +13,7 @@
> >> >  #include "qemu/osdep.h"
> >> >  #include "qemu/module.h"
> >> >  #include "qapi/error.h"
> >> > +#include "exec/ramblock.h"
> >> >  #include "migration.h"
> >> >  #include "multifd.h"
> >> >  #include "qpl/qpl.h"
> >> > @@ -204,6 +205,139 @@ static void
> >> multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp)
> >> >  p->iov = NULL;
> >> >  }
> >> >
> >> > +/**
> >> > + * multifd_qpl_prepare_job: prepare a compression or decompression
> job
> >> > + *
> >> > + * Prepare a compression or decompression job and configure job
> >> attributes
> >> > + * including job compression level and flags.
> >> > + *
> >> > + * @job: pointer to the QplData structure
> >>
> >> qpl_job structure
> >
> > Thanks for the comment, I will fix this next version.
> >
> >> > + * @is_compression: compression or decompression indication
> >> > + * @input: pointer to the input data buffer
> >> > + * @input_len: the length of the input data
> >> > + * @output: pointer to the output data buffer
> >> > + * @output_len: the size of the output data buffer
> >> > + */
> >> > +static void multifd_qpl_prepare_job(qpl_job *job, bool
> is_compression,
> >> > +uint8_t *input, uint32_t
> input_len,
> >> > +uint8_t *output, uint32_t
> >> output_len)
> >> > +{
> >> > +job->op = is_compression ? qpl_op_compress : qpl_op_decompress;
> >> > +job->next_in_ptr = input;
> >> > +job->next_out_ptr = output;
> >> > +job->available_in = input_len;
> >> > +job->available_out = output_len;
> >> > +job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST |
> QPL_FLAG_OMIT_VERIFY;
> >> > +/* only supports one compression level */
> >> > +job->level = 1;
> >> > +}
> >> > +
> >> > +/**
> >> > + * multifd_qpl_build_packet: build a qpl compressed data packet
> >> > + *
> >> > + * The qpl compressed data packet consists of two parts, one part
> >> stores
> >> > + * the compressed length of each page, and the other part is the
> >> compressed
> >> > + * data of each page. The zbuf_hdr stores the compressed length of
> all
> >> pages,
> >> > + * and use a separate IOV to store the compressed data of each page.
> >> > + *
> >> > + * @qpl: pointer to the QplData structure
> >> > + * @p: Params for the channel that we are using
> >> > + * @idx: The index of the compressed length array
> >> > + * @addr: pointer to the compressed data
> >> > + * @len: 

RE: [PATCH v6 6/7] migration/multifd: implement qpl compression and decompression

2024-05-14 Thread Fabiano Rosas
"Liu, Yuan1"  writes:

>> -Original Message-
>> From: Fabiano Rosas 
>> Sent: Monday, May 13, 2024 11:14 PM
>> To: Liu, Yuan1 ; pet...@redhat.com
>> Cc: qemu-devel@nongnu.org; Liu, Yuan1 ; Zou, Nanhai
>> 
>> Subject: Re: [PATCH v6 6/7] migration/multifd: implement qpl compression
>> and decompression
>> 
>> Yuan Liu  writes:
>> 
>> > each qpl job is used to (de)compress a normal page and it can
>> > be processed independently by the IAA hardware. All qpl jobs
>> > are submitted to the hardware at once, and wait for all jobs
>> > completion. If hardware path(IAA) is not available, use software
>> > for compression and decompression.
>> >
>> > Signed-off-by: Yuan Liu 
>> > Reviewed-by: Nanhai Zou 
>> > ---
>> >  migration/multifd-qpl.c | 284 +++-
>> >  1 file changed, 280 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c
>> > index 89fa51091a..9a1fddbdd0 100644
>> > --- a/migration/multifd-qpl.c
>> > +++ b/migration/multifd-qpl.c
>> > @@ -13,6 +13,7 @@
>> >  #include "qemu/osdep.h"
>> >  #include "qemu/module.h"
>> >  #include "qapi/error.h"
>> > +#include "exec/ramblock.h"
>> >  #include "migration.h"
>> >  #include "multifd.h"
>> >  #include "qpl/qpl.h"
>> > @@ -204,6 +205,139 @@ static void
>> multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp)
>> >  p->iov = NULL;
>> >  }
>> >
>> > +/**
>> > + * multifd_qpl_prepare_job: prepare a compression or decompression job
>> > + *
>> > + * Prepare a compression or decompression job and configure job
>> attributes
>> > + * including job compression level and flags.
>> > + *
>> > + * @job: pointer to the QplData structure
>> 
>> qpl_job structure
>
> Thanks for the comment, I will fix this next version.
>
>> > + * @is_compression: compression or decompression indication
>> > + * @input: pointer to the input data buffer
>> > + * @input_len: the length of the input data
>> > + * @output: pointer to the output data buffer
>> > + * @output_len: the size of the output data buffer
>> > + */
>> > +static void multifd_qpl_prepare_job(qpl_job *job, bool is_compression,
>> > +uint8_t *input, uint32_t input_len,
>> > +uint8_t *output, uint32_t
>> output_len)
>> > +{
>> > +job->op = is_compression ? qpl_op_compress : qpl_op_decompress;
>> > +job->next_in_ptr = input;
>> > +job->next_out_ptr = output;
>> > +job->available_in = input_len;
>> > +job->available_out = output_len;
>> > +job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY;
>> > +/* only supports one compression level */
>> > +job->level = 1;
>> > +}
>> > +
>> > +/**
>> > + * multifd_qpl_build_packet: build a qpl compressed data packet
>> > + *
>> > + * The qpl compressed data packet consists of two parts, one part
>> stores
>> > + * the compressed length of each page, and the other part is the
>> compressed
>> > + * data of each page. The zbuf_hdr stores the compressed length of all
>> pages,
>> > + * and use a separate IOV to store the compressed data of each page.
>> > + *
>> > + * @qpl: pointer to the QplData structure
>> > + * @p: Params for the channel that we are using
>> > + * @idx: The index of the compressed length array
>> > + * @addr: pointer to the compressed data
>> > + * @len: The length of the compressed data
>> > + */
>> > +static void multifd_qpl_build_packet(QplData *qpl, MultiFDSendParams
>> *p,
>> > + uint32_t idx, uint8_t *addr,
>> uint32_t len)
>> > +{
>> > +qpl->zbuf_hdr[idx] = cpu_to_be32(len);
>> > +p->iov[p->iovs_num].iov_base = addr;
>> > +p->iov[p->iovs_num].iov_len = len;
>> > +p->iovs_num++;
>> > +p->next_packet_size += len;
>> > +}
>> > +
>> > +/**
>> > + * multifd_qpl_compress_pages: compress normal pages
>> > + *
>> > + * Each normal page will be compressed independently, and the
>> compression jobs
>>

RE: [PATCH v6 6/7] migration/multifd: implement qpl compression and decompression

2024-05-14 Thread Liu, Yuan1
> -Original Message-
> From: Fabiano Rosas 
> Sent: Monday, May 13, 2024 11:14 PM
> To: Liu, Yuan1 ; pet...@redhat.com
> Cc: qemu-devel@nongnu.org; Liu, Yuan1 ; Zou, Nanhai
> 
> Subject: Re: [PATCH v6 6/7] migration/multifd: implement qpl compression
> and decompression
> 
> Yuan Liu  writes:
> 
> > each qpl job is used to (de)compress a normal page and it can
> > be processed independently by the IAA hardware. All qpl jobs
> > are submitted to the hardware at once, and wait for all jobs
> > completion. If hardware path(IAA) is not available, use software
> > for compression and decompression.
> >
> > Signed-off-by: Yuan Liu 
> > Reviewed-by: Nanhai Zou 
> > ---
> >  migration/multifd-qpl.c | 284 +++-
> >  1 file changed, 280 insertions(+), 4 deletions(-)
> >
> > diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c
> > index 89fa51091a..9a1fddbdd0 100644
> > --- a/migration/multifd-qpl.c
> > +++ b/migration/multifd-qpl.c
> > @@ -13,6 +13,7 @@
> >  #include "qemu/osdep.h"
> >  #include "qemu/module.h"
> >  #include "qapi/error.h"
> > +#include "exec/ramblock.h"
> >  #include "migration.h"
> >  #include "multifd.h"
> >  #include "qpl/qpl.h"
> > @@ -204,6 +205,139 @@ static void
> multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp)
> >  p->iov = NULL;
> >  }
> >
> > +/**
> > + * multifd_qpl_prepare_job: prepare a compression or decompression job
> > + *
> > + * Prepare a compression or decompression job and configure job
> attributes
> > + * including job compression level and flags.
> > + *
> > + * @job: pointer to the QplData structure
> 
> qpl_job structure

Thanks for the comment, I will fix this next version.

> > + * @is_compression: compression or decompression indication
> > + * @input: pointer to the input data buffer
> > + * @input_len: the length of the input data
> > + * @output: pointer to the output data buffer
> > + * @output_len: the size of the output data buffer
> > + */
> > +static void multifd_qpl_prepare_job(qpl_job *job, bool is_compression,
> > +uint8_t *input, uint32_t input_len,
> > +uint8_t *output, uint32_t
> output_len)
> > +{
> > +job->op = is_compression ? qpl_op_compress : qpl_op_decompress;
> > +job->next_in_ptr = input;
> > +job->next_out_ptr = output;
> > +job->available_in = input_len;
> > +job->available_out = output_len;
> > +job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY;
> > +/* only supports one compression level */
> > +job->level = 1;
> > +}
> > +
> > +/**
> > + * multifd_qpl_build_packet: build a qpl compressed data packet
> > + *
> > + * The qpl compressed data packet consists of two parts, one part
> stores
> > + * the compressed length of each page, and the other part is the
> compressed
> > + * data of each page. The zbuf_hdr stores the compressed length of all
> pages,
> > + * and use a separate IOV to store the compressed data of each page.
> > + *
> > + * @qpl: pointer to the QplData structure
> > + * @p: Params for the channel that we are using
> > + * @idx: The index of the compressed length array
> > + * @addr: pointer to the compressed data
> > + * @len: The length of the compressed data
> > + */
> > +static void multifd_qpl_build_packet(QplData *qpl, MultiFDSendParams
> *p,
> > + uint32_t idx, uint8_t *addr,
> uint32_t len)
> > +{
> > +qpl->zbuf_hdr[idx] = cpu_to_be32(len);
> > +p->iov[p->iovs_num].iov_base = addr;
> > +p->iov[p->iovs_num].iov_len = len;
> > +p->iovs_num++;
> > +p->next_packet_size += len;
> > +}
> > +
> > +/**
> > + * multifd_qpl_compress_pages: compress normal pages
> > + *
> > + * Each normal page will be compressed independently, and the
> compression jobs
> > + * will be submitted to the IAA hardware in non-blocking mode, waiting
> for all
> > + * jobs to be completed and filling the compressed length and data into
> the
> > + * sending IOVs. If IAA device is not available, the software path is
> used.
> > + *
> > + * Returns 0 for success or -1 for error
> > + *
> > + * @p: Params for the channel that we are using
> > + * @errp: pointer to an error
> > + */
>

Re: [PATCH v6 6/7] migration/multifd: implement qpl compression and decompression

2024-05-13 Thread Fabiano Rosas
Yuan Liu  writes:

> each qpl job is used to (de)compress a normal page and it can
> be processed independently by the IAA hardware. All qpl jobs
> are submitted to the hardware at once, and wait for all jobs
> completion. If hardware path(IAA) is not available, use software
> for compression and decompression.
>
> Signed-off-by: Yuan Liu 
> Reviewed-by: Nanhai Zou 
> ---
>  migration/multifd-qpl.c | 284 +++-
>  1 file changed, 280 insertions(+), 4 deletions(-)
>
> diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c
> index 89fa51091a..9a1fddbdd0 100644
> --- a/migration/multifd-qpl.c
> +++ b/migration/multifd-qpl.c
> @@ -13,6 +13,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/module.h"
>  #include "qapi/error.h"
> +#include "exec/ramblock.h"
>  #include "migration.h"
>  #include "multifd.h"
>  #include "qpl/qpl.h"
> @@ -204,6 +205,139 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams 
> *p, Error **errp)
>  p->iov = NULL;
>  }
>  
> +/**
> + * multifd_qpl_prepare_job: prepare a compression or decompression job
> + *
> + * Prepare a compression or decompression job and configure job attributes
> + * including job compression level and flags.
> + *
> + * @job: pointer to the QplData structure

qpl_job structure

> + * @is_compression: compression or decompression indication
> + * @input: pointer to the input data buffer
> + * @input_len: the length of the input data
> + * @output: pointer to the output data buffer
> + * @output_len: the size of the output data buffer
> + */
> +static void multifd_qpl_prepare_job(qpl_job *job, bool is_compression,
> +uint8_t *input, uint32_t input_len,
> +uint8_t *output, uint32_t output_len)
> +{
> +job->op = is_compression ? qpl_op_compress : qpl_op_decompress;
> +job->next_in_ptr = input;
> +job->next_out_ptr = output;
> +job->available_in = input_len;
> +job->available_out = output_len;
> +job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY;
> +/* only supports one compression level */
> +job->level = 1;
> +}
> +
> +/**
> + * multifd_qpl_build_packet: build a qpl compressed data packet
> + *
> + * The qpl compressed data packet consists of two parts, one part stores
> + * the compressed length of each page, and the other part is the compressed
> + * data of each page. The zbuf_hdr stores the compressed length of all pages,
> + * and use a separate IOV to store the compressed data of each page.
> + *
> + * @qpl: pointer to the QplData structure
> + * @p: Params for the channel that we are using
> + * @idx: The index of the compressed length array
> + * @addr: pointer to the compressed data
> + * @len: The length of the compressed data
> + */
> +static void multifd_qpl_build_packet(QplData *qpl, MultiFDSendParams *p,
> + uint32_t idx, uint8_t *addr, uint32_t 
> len)
> +{
> +qpl->zbuf_hdr[idx] = cpu_to_be32(len);
> +p->iov[p->iovs_num].iov_base = addr;
> +p->iov[p->iovs_num].iov_len = len;
> +p->iovs_num++;
> +p->next_packet_size += len;
> +}
> +
> +/**
> + * multifd_qpl_compress_pages: compress normal pages
> + *
> + * Each normal page will be compressed independently, and the compression 
> jobs
> + * will be submitted to the IAA hardware in non-blocking mode, waiting for 
> all
> + * jobs to be completed and filling the compressed length and data into the
> + * sending IOVs. If IAA device is not available, the software path is used.
> + *
> + * Returns 0 for success or -1 for error
> + *
> + * @p: Params for the channel that we are using
> + * @errp: pointer to an error
> + */
> +static int multifd_qpl_compress_pages(MultiFDSendParams *p, Error **errp)
> +{
> +qpl_status status;
> +QplData *qpl = p->compress_data;
> +MultiFDPages_t *pages = p->pages;
> +uint8_t *zbuf = qpl->zbuf;
> +uint8_t *host = pages->block->host;
> +uint32_t job_num = pages->normal_num;

A bit misleading because job_num is used in the previous patch as a
synonym for page_count. We could change the previous patch to:
multifd_qpl_init(uint32_t page_count, ...

> +qpl_job *job = NULL;
> +
> +assert(job_num <= qpl->total_job_num);
> +/* submit all compression jobs */
> +for (int i = 0; i < job_num; i++) {
> +job = qpl->job_array[i];
> +multifd_qpl_prepare_job(job, true, host + pages->offset[i],
> +p->page_size, zbuf, p->page_size - 1);

Isn't the output buffer size == page size, why the -1?

> +/* if hardware path(IAA) is unavailable, call the software path */

If we're doing the fallback automatically, isn't that what qpl_path_auto
does already? What's the difference betweeen the two approaches?

> +if (!qpl->iaa_avail) {

This function got a bit convoluted, it's probably worth a check at the
start and a branch to different multifd_qpl_compress_pages_slow()
routine 

[PATCH v6 6/7] migration/multifd: implement qpl compression and decompression

2024-05-06 Thread Yuan Liu
each qpl job is used to (de)compress a normal page and it can
be processed independently by the IAA hardware. All qpl jobs
are submitted to the hardware at once, and wait for all jobs
completion. If hardware path(IAA) is not available, use software
for compression and decompression.

Signed-off-by: Yuan Liu 
Reviewed-by: Nanhai Zou 
---
 migration/multifd-qpl.c | 284 +++-
 1 file changed, 280 insertions(+), 4 deletions(-)

diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c
index 89fa51091a..9a1fddbdd0 100644
--- a/migration/multifd-qpl.c
+++ b/migration/multifd-qpl.c
@@ -13,6 +13,7 @@
 #include "qemu/osdep.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "exec/ramblock.h"
 #include "migration.h"
 #include "multifd.h"
 #include "qpl/qpl.h"
@@ -204,6 +205,139 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams 
*p, Error **errp)
 p->iov = NULL;
 }
 
+/**
+ * multifd_qpl_prepare_job: prepare a compression or decompression job
+ *
+ * Prepare a compression or decompression job and configure job attributes
+ * including job compression level and flags.
+ *
+ * @job: pointer to the QplData structure
+ * @is_compression: compression or decompression indication
+ * @input: pointer to the input data buffer
+ * @input_len: the length of the input data
+ * @output: pointer to the output data buffer
+ * @output_len: the size of the output data buffer
+ */
+static void multifd_qpl_prepare_job(qpl_job *job, bool is_compression,
+uint8_t *input, uint32_t input_len,
+uint8_t *output, uint32_t output_len)
+{
+job->op = is_compression ? qpl_op_compress : qpl_op_decompress;
+job->next_in_ptr = input;
+job->next_out_ptr = output;
+job->available_in = input_len;
+job->available_out = output_len;
+job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY;
+/* only supports one compression level */
+job->level = 1;
+}
+
+/**
+ * multifd_qpl_build_packet: build a qpl compressed data packet
+ *
+ * The qpl compressed data packet consists of two parts, one part stores
+ * the compressed length of each page, and the other part is the compressed
+ * data of each page. The zbuf_hdr stores the compressed length of all pages,
+ * and use a separate IOV to store the compressed data of each page.
+ *
+ * @qpl: pointer to the QplData structure
+ * @p: Params for the channel that we are using
+ * @idx: The index of the compressed length array
+ * @addr: pointer to the compressed data
+ * @len: The length of the compressed data
+ */
+static void multifd_qpl_build_packet(QplData *qpl, MultiFDSendParams *p,
+ uint32_t idx, uint8_t *addr, uint32_t len)
+{
+qpl->zbuf_hdr[idx] = cpu_to_be32(len);
+p->iov[p->iovs_num].iov_base = addr;
+p->iov[p->iovs_num].iov_len = len;
+p->iovs_num++;
+p->next_packet_size += len;
+}
+
+/**
+ * multifd_qpl_compress_pages: compress normal pages
+ *
+ * Each normal page will be compressed independently, and the compression jobs
+ * will be submitted to the IAA hardware in non-blocking mode, waiting for all
+ * jobs to be completed and filling the compressed length and data into the
+ * sending IOVs. If IAA device is not available, the software path is used.
+ *
+ * Returns 0 for success or -1 for error
+ *
+ * @p: Params for the channel that we are using
+ * @errp: pointer to an error
+ */
+static int multifd_qpl_compress_pages(MultiFDSendParams *p, Error **errp)
+{
+qpl_status status;
+QplData *qpl = p->compress_data;
+MultiFDPages_t *pages = p->pages;
+uint8_t *zbuf = qpl->zbuf;
+uint8_t *host = pages->block->host;
+uint32_t job_num = pages->normal_num;
+qpl_job *job = NULL;
+
+assert(job_num <= qpl->total_job_num);
+/* submit all compression jobs */
+for (int i = 0; i < job_num; i++) {
+job = qpl->job_array[i];
+multifd_qpl_prepare_job(job, true, host + pages->offset[i],
+p->page_size, zbuf, p->page_size - 1);
+/* if hardware path(IAA) is unavailable, call the software path */
+if (!qpl->iaa_avail) {
+status = qpl_execute_job(job);
+if (status == QPL_STS_OK) {
+multifd_qpl_build_packet(qpl, p, i, zbuf, job->total_out);
+} else if (status == QPL_STS_MORE_OUTPUT_NEEDED) {
+/* compressed length exceeds page size, send page directly */
+multifd_qpl_build_packet(qpl, p, i, host + pages->offset[i],
+ p->page_size);
+} else {
+error_setg(errp, "multifd %u: qpl_execute_job error %d",
+   p->id, status);
+return -1;
+}
+zbuf += p->page_size;
+continue;
+}
+retry:
+status = qpl_submit_job(job);
+if (status == QPL_STS_OK) {
+