Yuan Liu <yuan1....@intel.com> writes: > QPL compression and decompression will use IAA hardware first. > If IAA hardware is not available, it will automatically fall > back to QPL software path, if the software job also fails, > the uncompressed page is sent directly. > > Signed-off-by: Yuan Liu <yuan1....@intel.com> > Reviewed-by: Nanhai Zou <nanhai....@intel.com> > --- > migration/multifd-qpl.c | 412 +++++++++++++++++++++++++++++++++++++++- > 1 file changed, 408 insertions(+), 4 deletions(-) > > diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c > index 6791a204d5..18b3384bd5 100644 > --- a/migration/multifd-qpl.c > +++ b/migration/multifd-qpl.c > @@ -13,9 +13,14 @@ > #include "qemu/osdep.h" > #include "qemu/module.h" > #include "qapi/error.h" > +#include "qapi/qapi-types-migration.h" > +#include "exec/ramblock.h" > #include "multifd.h" > #include "qpl/qpl.h" > > +/* Maximum number of retries to resubmit a job if IAA work queues are full */ > +#define MAX_SUBMIT_RETRY_NUM (3) > + > typedef struct { > /* the QPL hardware path job */ > qpl_job *job; > @@ -260,6 +265,219 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams > *p, Error **errp) > p->iov = NULL; > } > > +/** > + * multifd_qpl_prepare_job: prepare the job > + * > + * Set the QPL job parameters and properties. > + * > + * @job: pointer to the qpl_job structure > + * @is_compression: indicates compression and decompression > + * @input: pointer to the input data buffer > + * @input_len: the length of the input data > + * @output: pointer to the output data buffer > + * @output_len: the length of the output data > + */ > +static void multifd_qpl_prepare_job(qpl_job *job, bool is_compression, > + uint8_t *input, uint32_t input_len, > + uint8_t *output, uint32_t output_len) > +{ > + job->op = is_compression ? qpl_op_compress : qpl_op_decompress; > + job->next_in_ptr = input; > + job->next_out_ptr = output; > + job->available_in = input_len; > + job->available_out = output_len; > + job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; > + /* only supports compression level 1 */ > + job->level = 1; > +} > + > +/** > + * multifd_qpl_prepare_job: prepare the compression job
function name is wrong > + * > + * Set the compression job parameters and properties. > + * > + * @job: pointer to the qpl_job structure > + * @input: pointer to the input data buffer > + * @input_len: the length of the input data > + * @output: pointer to the output data buffer > + * @output_len: the length of the output data > + */ > +static void multifd_qpl_prepare_comp_job(qpl_job *job, uint8_t *input, > + uint32_t input_len, uint8_t *output, > + uint32_t output_len) > +{ > + multifd_qpl_prepare_job(job, true, input, input_len, output, output_len); > +} > + > +/** > + * multifd_qpl_prepare_job: prepare the decompression job here as well > + * > + * Set the decompression job parameters and properties. > + * > + * @job: pointer to the qpl_job structure > + * @input: pointer to the input data buffer > + * @input_len: the length of the input data > + * @output: pointer to the output data buffer > + * @output_len: the length of the output data > + */ > +static void multifd_qpl_prepare_decomp_job(qpl_job *job, uint8_t *input, > + uint32_t input_len, uint8_t > *output, > + uint32_t output_len) > +{ > + multifd_qpl_prepare_job(job, false, input, input_len, output, > output_len); > +} > + > +/** > + * multifd_qpl_fill_iov: fill in the IOV > + * > + * Fill in the QPL packet IOV > + * > + * @p: Params for the channel being used > + * @data: pointer to the IOV data > + * @len: The length of the IOV data > + */ > +static void multifd_qpl_fill_iov(MultiFDSendParams *p, uint8_t *data, > + uint32_t len) > +{ > + p->iov[p->iovs_num].iov_base = data; > + p->iov[p->iovs_num].iov_len = len; > + p->iovs_num++; > + p->next_packet_size += len; > +} > + > +/** > + * multifd_qpl_fill_packet: fill the compressed page into the QPL packet > + * > + * Fill the compressed page length and IOV into the QPL packet > + * > + * @idx: The index of the compressed length array > + * @p: Params for the channel being used > + * @data: pointer to the compressed page buffer > + * @len: The length of the compressed page > + */ > +static void multifd_qpl_fill_packet(uint32_t idx, MultiFDSendParams *p, > + uint8_t *data, uint32_t len) > +{ > + QplData *qpl = p->compress_data; > + > + qpl->zlen[idx] = cpu_to_be32(len); > + multifd_qpl_fill_iov(p, data, len); > +} > + > +/** > + * multifd_qpl_submit_job: submit a job to the hardware > + * > + * Submit a QPL hardware job to the IAA device > + * > + * Returns true if the job is submitted successfully, otherwise false. > + * > + * @job: pointer to the qpl_job structure > + */ > +static bool multifd_qpl_submit_job(qpl_job *job) > +{ > + qpl_status status; > + uint32_t num = 0; > + > +retry: > + status = qpl_submit_job(job); > + if (status == QPL_STS_QUEUES_ARE_BUSY_ERR) { > + if (num < MAX_SUBMIT_RETRY_NUM) { > + num++; > + goto retry; > + } > + } > + return (status == QPL_STS_OK); How often do we expect this to fail? Will the queues be busy frequently or is this an unlikely event? I'm thinking whether we really need to allow a fallback for the hw path. Sorry if this has been discussed already, I don't remember. > +} > + > +/** > + * multifd_qpl_compress_pages_slow_path: compress pages using slow path > + * > + * Compress the pages using software. If compression fails, the page will > + * be sent directly. > + * > + * @p: Params for the channel being used > + */ > +static void multifd_qpl_compress_pages_slow_path(MultiFDSendParams *p) > +{ > + QplData *qpl = p->compress_data; > + uint32_t size = p->page_size; > + qpl_job *job = qpl->sw_job; > + uint8_t *zbuf = qpl->zbuf; > + uint8_t *buf; > + > + for (int i = 0; i < p->pages->normal_num; i++) { > + buf = p->pages->block->host + p->pages->offset[i]; > + /* Set output length to less than the page to reduce decompression */ > + multifd_qpl_prepare_comp_job(job, buf, size, zbuf, size - 1); > + if (qpl_execute_job(job) == QPL_STS_OK) { > + multifd_qpl_fill_packet(i, p, zbuf, job->total_out); > + } else { > + /* send the page directly */ s/directly/uncompressed/ a bit clearer. > + multifd_qpl_fill_packet(i, p, buf, size); > + } > + zbuf += size; > + } > +} > + > +/** > + * multifd_qpl_compress_pages: compress pages > + * > + * Submit the pages to the IAA hardware for compression. If hardware > + * compression fails, it falls back to software compression. If software > + * compression also fails, the page is sent directly > + * > + * @p: Params for the channel being used > + */ > +static void multifd_qpl_compress_pages(MultiFDSendParams *p) > +{ > + QplData *qpl = p->compress_data; > + MultiFDPages_t *pages = p->pages; > + uint32_t size = p->page_size; > + QplHwJob *hw_job; > + uint8_t *buf; > + uint8_t *zbuf; > + Let's document the output size choice more explicitly: /* * Set output length to less than the page size to force the job to * fail in case it compresses to a larger size. We'll send that page * without compression and skip the decompression operation on the * destination. */ out_size = size - 1; you can then omit the other comments. > + for (int i = 0; i < pages->normal_num; i++) { > + buf = pages->block->host + pages->offset[i]; > + zbuf = qpl->zbuf + (size * i); > + hw_job = &qpl->hw_jobs[i]; > + /* Set output length to less than the page to reduce decompression */ > + multifd_qpl_prepare_comp_job(hw_job->job, buf, size, zbuf, size - 1); > + if (multifd_qpl_submit_job(hw_job->job)) { > + hw_job->fallback_sw_path = false; > + } else { > + hw_job->fallback_sw_path = true; > + /* Set output length less than page size to reduce decompression > */ > + multifd_qpl_prepare_comp_job(qpl->sw_job, buf, size, zbuf, > + size - 1); > + if (qpl_execute_job(qpl->sw_job) == QPL_STS_OK) { > + hw_job->sw_output = zbuf; > + hw_job->sw_output_len = qpl->sw_job->total_out; > + } else { > + hw_job->sw_output = buf; > + hw_job->sw_output_len = size; > + } Hmm, these look a bit cumbersome, would it work if we moved the fallback qpl_execute_job() down into the other loop? We could then avoid the extra fields. Something like: static void multifd_qpl_compress_pages(MultiFDSendParams *p) { QplData *qpl = p->compress_data; MultiFDPages_t *pages = p->pages; uint32_t out_size, size = p->page_size; uint8_t *buf, *zbuf; /* * Set output length to less than the page size to force the job to * fail in case it compresses to a larger size. We'll send that page * without compression to skip the decompression operation on the * destination. */ out_size = size - 1; for (int i = 0; i < pages->normal_num; i++) { QplHwJob *hw_job = &qpl->hw_jobs[i]; hw_job->fallback_sw_path = false; buf = pages->block->host + pages->offset[i]; zbuf = qpl->zbuf + (size * i); multifd_qpl_prepare_comp_job(hw_job->job, buf, size, zbuf, out_size); if (!multifd_qpl_submit_job(hw_job->job)) { hw_job->fallback_sw_path = true; } } for (int i = 0; i < pages->normal_num; i++) { QplHwJob *hw_job = &qpl->hw_jobs[i]; qpl_job *job; buf = pages->block->host + pages->offset[i]; zbuf = qpl->zbuf + (size * i); if (hw_job->fallback_sw_path) { job = qpl->sw_job; multifd_qpl_prepare_comp_job(job, buf, size, zbuf, out_size); ret = qpl_execute_job(job); } else { job = hw_job->job; ret = qpl_wait_job(job); } if (ret == QPL_STS_OK) { multifd_qpl_fill_packet(i, p, zbuf, job->total_out); } else { multifd_qpl_fill_packet(i, p, buf, size); } } } > + } > + } > + > + for (int i = 0; i < pages->normal_num; i++) { > + buf = pages->block->host + pages->offset[i]; > + zbuf = qpl->zbuf + (size * i); > + hw_job = &qpl->hw_jobs[i]; > + if (hw_job->fallback_sw_path) { > + multifd_qpl_fill_packet(i, p, hw_job->sw_output, > + hw_job->sw_output_len); > + continue; > + } > + if (qpl_wait_job(hw_job->job) == QPL_STS_OK) { > + multifd_qpl_fill_packet(i, p, zbuf, hw_job->job->total_out); > + } else { > + /* send the page directly */ > + multifd_qpl_fill_packet(i, p, buf, size); > + } > + } > +} > + > /** > * multifd_qpl_send_prepare: prepare data to be able to send > * > @@ -273,8 +491,26 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams > *p, Error **errp) > */ > static int multifd_qpl_send_prepare(MultiFDSendParams *p, Error **errp) > { > - /* Implement in next patch */ > - return -1; > + QplData *qpl = p->compress_data; > + uint32_t len = 0; > + > + if (!multifd_send_prepare_common(p)) { > + goto out; > + } > + > + /* The first IOV is used to store the compressed page lengths */ > + len = p->pages->normal_num * sizeof(uint32_t); > + multifd_qpl_fill_iov(p, (uint8_t *) qpl->zlen, len); > + if (qpl->hw_avail) { > + multifd_qpl_compress_pages(p); > + } else { > + multifd_qpl_compress_pages_slow_path(p); > + } > + > +out: > + p->flags |= MULTIFD_FLAG_QPL; > + multifd_send_fill_packet(p); > + return 0; > } > > /** > @@ -312,6 +548,134 @@ static void multifd_qpl_recv_cleanup(MultiFDRecvParams > *p) > p->compress_data = NULL; > } > > +/** > + * multifd_qpl_process_and_check_job: process and check a QPL job > + * > + * Process the job and check whether the job output length is the > + * same as the specified length > + * > + * Returns true if the job execution succeeded and the output length > + * is equal to the specified length, otherwise false. > + * > + * @job: pointer to the qpl_job structure > + * @is_hardware: indicates whether the job is a hardware job > + * @len: Specified output length > + * @errp: pointer to an error > + */ > +static bool multifd_qpl_process_and_check_job(qpl_job *job, bool is_hardware, > + uint32_t len, Error **errp) > +{ > + qpl_status status; > + > + status = (is_hardware ? qpl_wait_job(job) : qpl_execute_job(job)); > + if (status != QPL_STS_OK) { > + error_setg(errp, "qpl_execute_job failed with error %d", status); The error message should also cover qpl_wait_job(), right? Maybe just use "qpl job failed". > + return false; > + } > + if (job->total_out != len) { > + error_setg(errp, "qpl decompressed len %u, expected len %u", > + job->total_out, len); > + return false; > + } > + return true; > +} > + > +/** > + * multifd_qpl_decompress_pages_slow_path: decompress pages using slow path > + * > + * Decompress the pages using software > + * > + * Returns 0 on success or -1 on error > + * > + * @p: Params for the channel being used > + * @errp: pointer to an error > + */ > +static int multifd_qpl_decompress_pages_slow_path(MultiFDRecvParams *p, > + Error **errp) > +{ > + QplData *qpl = p->compress_data; > + uint32_t size = p->page_size; > + qpl_job *job = qpl->sw_job; > + uint8_t *zbuf = qpl->zbuf; > + uint8_t *addr; > + uint32_t len; > + > + for (int i = 0; i < p->normal_num; i++) { > + len = qpl->zlen[i]; > + addr = p->host + p->normal[i]; > + /* the page is uncompressed, load it */ > + if (len == size) { > + memcpy(addr, zbuf, size); > + zbuf += size; > + continue; > + } > + multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); > + if (!multifd_qpl_process_and_check_job(job, false, size, errp)) { > + return -1; > + } > + zbuf += len; > + } > + return 0; > +} > + > +/** > + * multifd_qpl_decompress_pages: decompress pages > + * > + * Decompress the pages using the IAA hardware. If hardware > + * decompression fails, it falls back to software decompression. > + * > + * Returns 0 on success or -1 on error > + * > + * @p: Params for the channel being used > + * @errp: pointer to an error > + */ > +static int multifd_qpl_decompress_pages(MultiFDRecvParams *p, Error **errp) > +{ > + QplData *qpl = p->compress_data; > + uint32_t size = p->page_size; > + uint8_t *zbuf = qpl->zbuf; > + uint8_t *addr; > + uint32_t len; > + qpl_job *job; > + > + for (int i = 0; i < p->normal_num; i++) { > + addr = p->host + p->normal[i]; > + len = qpl->zlen[i]; > + /* the page is uncompressed if received length equals the page size > */ > + if (len == size) { > + memcpy(addr, zbuf, size); > + zbuf += size; > + continue; > + } > + > + job = qpl->hw_jobs[i].job; > + multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); > + if (multifd_qpl_submit_job(job)) { > + qpl->hw_jobs[i].fallback_sw_path = false; > + } else { > + qpl->hw_jobs[i].fallback_sw_path = true; > + job = qpl->sw_job; > + multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); > + if (!multifd_qpl_process_and_check_job(job, false, size, errp)) { > + return -1; > + } Here the same suggestion applies. You created multifd_qpl_process_and_check_job() but is now calling it twice, which seems to lose the purpose. If the fallback moves to the loop below, then you do it all in one place: for (int i = 0; i < p->normal_num; i++) { bool is_sw = !qpl->hw_jobs[i].fallback_sw_path; if (is_sw) { job = qpl->sw_job; multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); } else { job = qpl->hw_jobs[i].job; } if (!multifd_qpl_process_and_check_job(job, !is_sw, size, errp)) { return -1; } } > + } > + zbuf += len; > + } > + > + for (int i = 0; i < p->normal_num; i++) { > + /* ignore pages that have already been processed */ > + if (qpl->zlen[i] == size || qpl->hw_jobs[i].fallback_sw_path) { > + continue; > + } > + > + job = qpl->hw_jobs[i].job; > + if (!multifd_qpl_process_and_check_job(job, true, size, errp)) { > + return -1; > + } > + } > + return 0; > +} > /** > * multifd_qpl_recv: read the data from the channel into actual pages > * > @@ -325,8 +689,48 @@ static void multifd_qpl_recv_cleanup(MultiFDRecvParams > *p) > */ > static int multifd_qpl_recv(MultiFDRecvParams *p, Error **errp) > { > - /* Implement in next patch */ > - return -1; > + QplData *qpl = p->compress_data; > + uint32_t in_size = p->next_packet_size; > + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; > + uint32_t len = 0; > + uint32_t zbuf_len = 0; > + int ret; > + > + if (flags != MULTIFD_FLAG_QPL) { > + error_setg(errp, "multifd %u: flags received %x flags expected %x", > + p->id, flags, MULTIFD_FLAG_QPL); > + return -1; > + } > + multifd_recv_zero_page_process(p); > + if (!p->normal_num) { > + assert(in_size == 0); > + return 0; > + } > + > + /* read compressed page lengths */ > + len = p->normal_num * sizeof(uint32_t); > + assert(len < in_size); > + ret = qio_channel_read_all(p->c, (void *) qpl->zlen, len, errp); > + if (ret != 0) { > + return ret; > + } > + for (int i = 0; i < p->normal_num; i++) { > + qpl->zlen[i] = be32_to_cpu(qpl->zlen[i]); > + assert(qpl->zlen[i] <= p->page_size); > + zbuf_len += qpl->zlen[i]; > + } > + > + /* read compressed pages */ > + assert(in_size == len + zbuf_len); > + ret = qio_channel_read_all(p->c, (void *) qpl->zbuf, zbuf_len, errp); > + if (ret != 0) { > + return ret; > + } > + > + if (qpl->hw_avail) { > + return multifd_qpl_decompress_pages(p, errp); > + } > + return multifd_qpl_decompress_pages_slow_path(p, errp); > } > > static MultiFDMethods multifd_qpl_ops = {