> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:vkuzn...@redhat.com]
> Sent: Wednesday, September 23, 2015 5:59 AM
> To: linux-s...@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org; de...@linuxdriverproject.org; KY
> Srinivasan <k...@microsoft.com>; Haiyang Zhang
> <haiya...@microsoft.com>; James E.J. Bottomley <jbottom...@odin.com>;
> Radim Krčmář <rkrc...@redhat.com>; Christoph Hellwig
> <h...@infradead.org>
> Subject: [PATCH v2] storvsc: get rid of bounce buffer
> 
> Storvsc driver needs to ensure there are no 'holes' in the presented
> sg list (all segments in the middle of the list need to be of PAGE_SIZE).
> When a hole is detected storvsc driver creates a 'bounce sgl' without
> holes and copies data over with copy_{to,from}_bounce_buffer() functions.
> Setting virt_boundary_mask to PAGE_SIZE - 1 guarantees we'll never see
> such holes so we can significantly simplify the driver. This is also
> supposed to bring us some performance improvement for certain workloads
> as we eliminate copying.
> 
> Reported-by: Radim Krčmář <rkrc...@redhat.com>
> Signed-off-by: Vitaly Kuznetsov <vkuzn...@redhat.com>
> ---
> Changes since v1:
> - This patch is a successor of 'storvsc: get rid of homegrown
>  copy_{to,from}_bounce_buffer()'. Use virt_boundary instead to
>  eliminate the need for bounce buffer completely [Christoph Hellwig].
> ---
>  drivers/scsi/storvsc_drv.c | 286 
> +--------------------------------------------
>  1 file changed, 5 insertions(+), 281 deletions(-)
> 
> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> index 40c43ae..eeade40 100644
> --- a/drivers/scsi/storvsc_drv.c
> +++ b/drivers/scsi/storvsc_drv.c
> @@ -393,9 +393,6 @@ static void storvsc_on_channel_callback(void
> *context);
>  struct storvsc_cmd_request {
>       struct scsi_cmnd *cmd;
> 
> -     unsigned int bounce_sgl_count;
> -     struct scatterlist *bounce_sgl;
> -
>       struct hv_device *device;
> 
>       /* Synchronize the request/response if needed */
> @@ -586,241 +583,6 @@ get_in_err:
> 
>  }
> 
> -static void destroy_bounce_buffer(struct scatterlist *sgl,
> -                               unsigned int sg_count)
> -{
> -     int i;
> -     struct page *page_buf;
> -
> -     for (i = 0; i < sg_count; i++) {
> -             page_buf = sg_page((&sgl[i]));
> -             if (page_buf != NULL)
> -                     __free_page(page_buf);
> -     }
> -
> -     kfree(sgl);
> -}
> -
> -static int do_bounce_buffer(struct scatterlist *sgl, unsigned int sg_count)
> -{
> -     int i;
> -
> -     /* No need to check */
> -     if (sg_count < 2)
> -             return -1;
> -
> -     /* We have at least 2 sg entries */
> -     for (i = 0; i < sg_count; i++) {
> -             if (i == 0) {
> -                     /* make sure 1st one does not have hole */
> -                     if (sgl[i].offset + sgl[i].length != PAGE_SIZE)
> -                             return i;
> -             } else if (i == sg_count - 1) {
> -                     /* make sure last one does not have hole */
> -                     if (sgl[i].offset != 0)
> -                             return i;
> -             } else {
> -                     /* make sure no hole in the middle */
> -                     if (sgl[i].length != PAGE_SIZE || sgl[i].offset != 0)
> -                             return i;
> -             }
> -     }
> -     return -1;
> -}
> -
> -static struct scatterlist *create_bounce_buffer(struct scatterlist *sgl,
> -                                             unsigned int sg_count,
> -                                             unsigned int len,
> -                                             int write)
> -{
> -     int i;
> -     int num_pages;
> -     struct scatterlist *bounce_sgl;
> -     struct page *page_buf;
> -     unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
> -
> -     num_pages = ALIGN(len, PAGE_SIZE) >> PAGE_SHIFT;
> -
> -     bounce_sgl = kcalloc(num_pages, sizeof(struct scatterlist),
> GFP_ATOMIC);
> -     if (!bounce_sgl)
> -             return NULL;
> -
> -     sg_init_table(bounce_sgl, num_pages);
> -     for (i = 0; i < num_pages; i++) {
> -             page_buf = alloc_page(GFP_ATOMIC);
> -             if (!page_buf)
> -                     goto cleanup;
> -             sg_set_page(&bounce_sgl[i], page_buf, buf_len, 0);
> -     }
> -
> -     return bounce_sgl;
> -
> -cleanup:
> -     destroy_bounce_buffer(bounce_sgl, num_pages);
> -     return NULL;
> -}
> -
> -/* Assume the original sgl has enough room */
> -static unsigned int copy_from_bounce_buffer(struct scatterlist *orig_sgl,
> -                                         struct scatterlist *bounce_sgl,
> -                                         unsigned int orig_sgl_count,
> -                                         unsigned int bounce_sgl_count)
> -{
> -     int i;
> -     int j = 0;
> -     unsigned long src, dest;
> -     unsigned int srclen, destlen, copylen;
> -     unsigned int total_copied = 0;
> -     unsigned long bounce_addr = 0;
> -     unsigned long dest_addr = 0;
> -     unsigned long flags;
> -     struct scatterlist *cur_dest_sgl;
> -     struct scatterlist *cur_src_sgl;
> -
> -     local_irq_save(flags);
> -     cur_dest_sgl = orig_sgl;
> -     cur_src_sgl = bounce_sgl;
> -     for (i = 0; i < orig_sgl_count; i++) {
> -             dest_addr = (unsigned long)
> -                             kmap_atomic(sg_page(cur_dest_sgl)) +
> -                             cur_dest_sgl->offset;
> -             dest = dest_addr;
> -             destlen = cur_dest_sgl->length;
> -
> -             if (bounce_addr == 0)
> -                     bounce_addr = (unsigned long)kmap_atomic(
> -
>       sg_page(cur_src_sgl));
> -
> -             while (destlen) {
> -                     src = bounce_addr + cur_src_sgl->offset;
> -                     srclen = cur_src_sgl->length - cur_src_sgl->offset;
> -
> -                     copylen = min(srclen, destlen);
> -                     memcpy((void *)dest, (void *)src, copylen);
> -
> -                     total_copied += copylen;
> -                     cur_src_sgl->offset += copylen;
> -                     destlen -= copylen;
> -                     dest += copylen;
> -
> -                     if (cur_src_sgl->offset == cur_src_sgl->length) {
> -                             /* full */
> -                             kunmap_atomic((void *)bounce_addr);
> -                             j++;
> -
> -                             /*
> -                              * It is possible that the number of elements
> -                              * in the bounce buffer may not be equal to
> -                              * the number of elements in the original
> -                              * scatter list. Handle this correctly.
> -                              */
> -
> -                             if (j == bounce_sgl_count) {
> -                                     /*
> -                                      * We are done; cleanup and return.
> -                                      */
> -                                     kunmap_atomic((void *)(dest_addr -
> -                                             cur_dest_sgl->offset));
> -                                     local_irq_restore(flags);
> -                                     return total_copied;
> -                             }
> -
> -                             /* if we need to use another bounce buffer
> */
> -                             if (destlen || i != orig_sgl_count - 1) {
> -                                     cur_src_sgl = sg_next(cur_src_sgl);
> -                                     bounce_addr = (unsigned long)
> -                                                     kmap_atomic(
> -
>       sg_page(cur_src_sgl));
> -                             }
> -                     } else if (destlen == 0 && i == orig_sgl_count - 1) {
> -                             /* unmap the last bounce that is < PAGE_SIZE
> */
> -                             kunmap_atomic((void *)bounce_addr);
> -                     }
> -             }
> -
> -             kunmap_atomic((void *)(dest_addr - cur_dest_sgl->offset));
> -             cur_dest_sgl = sg_next(cur_dest_sgl);
> -     }
> -
> -     local_irq_restore(flags);
> -
> -     return total_copied;
> -}
> -
> -/* Assume the bounce_sgl has enough room ie using the
> create_bounce_buffer() */
> -static unsigned int copy_to_bounce_buffer(struct scatterlist *orig_sgl,
> -                                       struct scatterlist *bounce_sgl,
> -                                       unsigned int orig_sgl_count)
> -{
> -     int i;
> -     int j = 0;
> -     unsigned long src, dest;
> -     unsigned int srclen, destlen, copylen;
> -     unsigned int total_copied = 0;
> -     unsigned long bounce_addr = 0;
> -     unsigned long src_addr = 0;
> -     unsigned long flags;
> -     struct scatterlist *cur_src_sgl;
> -     struct scatterlist *cur_dest_sgl;
> -
> -     local_irq_save(flags);
> -
> -     cur_src_sgl = orig_sgl;
> -     cur_dest_sgl = bounce_sgl;
> -
> -     for (i = 0; i < orig_sgl_count; i++) {
> -             src_addr = (unsigned long)
> -                             kmap_atomic(sg_page(cur_src_sgl)) +
> -                             cur_src_sgl->offset;
> -             src = src_addr;
> -             srclen = cur_src_sgl->length;
> -
> -             if (bounce_addr == 0)
> -                     bounce_addr = (unsigned long)
> -
>       kmap_atomic(sg_page(cur_dest_sgl));
> -
> -             while (srclen) {
> -                     /* assume bounce offset always == 0 */
> -                     dest = bounce_addr + cur_dest_sgl->length;
> -                     destlen = PAGE_SIZE - cur_dest_sgl->length;
> -
> -                     copylen = min(srclen, destlen);
> -                     memcpy((void *)dest, (void *)src, copylen);
> -
> -                     total_copied += copylen;
> -                     cur_dest_sgl->length += copylen;
> -                     srclen -= copylen;
> -                     src += copylen;
> -
> -                     if (cur_dest_sgl->length == PAGE_SIZE) {
> -                             /* full..move to next entry */
> -                             kunmap_atomic((void *)bounce_addr);
> -                             bounce_addr = 0;
> -                             j++;
> -                     }
> -
> -                     /* if we need to use another bounce buffer */
> -                     if (srclen && bounce_addr == 0) {
> -                             cur_dest_sgl = sg_next(cur_dest_sgl);
> -                             bounce_addr = (unsigned long)
> -                                             kmap_atomic(
> -                                             sg_page(cur_dest_sgl));
> -                     }
> -
> -             }
> -
> -             kunmap_atomic((void *)(src_addr - cur_src_sgl->offset));
> -             cur_src_sgl = sg_next(cur_src_sgl);
> -     }
> -
> -     if (bounce_addr)
> -             kunmap_atomic((void *)bounce_addr);
> -
> -     local_irq_restore(flags);
> -
> -     return total_copied;
> -}
> -
>  static void handle_sc_creation(struct vmbus_channel *new_sc)
>  {
>       struct hv_device *device = new_sc->primary_channel->device_obj;
> @@ -1171,15 +933,6 @@ static void storvsc_command_completion(struct
> storvsc_cmd_request *cmd_request)
>       host = stor_dev->host;
> 
>       vm_srb = &cmd_request->vstor_packet.vm_srb;
> -     if (cmd_request->bounce_sgl_count) {
> -             if (vm_srb->data_in == READ_TYPE)
> -                     copy_from_bounce_buffer(scsi_sglist(scmnd),
> -                                     cmd_request->bounce_sgl,
> -                                     scsi_sg_count(scmnd),
> -                                     cmd_request->bounce_sgl_count);
> -             destroy_bounce_buffer(cmd_request->bounce_sgl,
> -                                     cmd_request->bounce_sgl_count);
> -     }
> 
>       scmnd->result = vm_srb->scsi_status;
> 
> @@ -1474,6 +1227,9 @@ static int storvsc_device_configure(struct
> scsi_device *sdevice)
> 
>       blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout
> * HZ));
> 
> +     /* Ensure there are no gaps in presented sgls */
> +     blk_queue_virt_boundary(sdevice->request_queue, PAGE_SIZE - 1);
> +
>       sdevice->no_write_same = 1;
> 
>       /*
> @@ -1692,40 +1448,13 @@ static int storvsc_queuecommand(struct
> Scsi_Host *host, struct scsi_cmnd *scmnd)
>       payload_sz = sizeof(cmd_request->mpb);
> 
>       if (sg_count) {
> -             /* check if we need to bounce the sgl */
> -             if (do_bounce_buffer(sgl, scsi_sg_count(scmnd)) != -1) {
> -                     cmd_request->bounce_sgl =
> -                             create_bounce_buffer(sgl, sg_count,
> -                                                  length,
> -                                                  vm_srb->data_in);
> -                     if (!cmd_request->bounce_sgl)
> -                             return SCSI_MLQUEUE_HOST_BUSY;
> -
> -                     cmd_request->bounce_sgl_count =
> -                             ALIGN(length, PAGE_SIZE) >> PAGE_SHIFT;
> -
> -                     if (vm_srb->data_in == WRITE_TYPE)
> -                             copy_to_bounce_buffer(sgl,
> -                                     cmd_request->bounce_sgl,
> sg_count);
> -
> -                     sgl = cmd_request->bounce_sgl;
> -                     sg_count = cmd_request->bounce_sgl_count;
> -             }
> -
> -
>               if (sg_count > MAX_PAGE_BUFFER_COUNT) {
> 
>                       payload_sz = (sg_count * sizeof(void *) +
>                                     sizeof(struct vmbus_packet_mpb_array));
>                       payload = kmalloc(payload_sz, GFP_ATOMIC);
> -                     if (!payload) {
> -                             if (cmd_request->bounce_sgl_count)
> -                                     destroy_bounce_buffer(
> -                                     cmd_request->bounce_sgl,
> -                                     cmd_request->bounce_sgl_count);
> -
> -                                     return
> SCSI_MLQUEUE_DEVICE_BUSY;
> -                     }
> +                     if (!payload)
> +                             return SCSI_MLQUEUE_DEVICE_BUSY;
>               }
> 
>               payload->range.len = length;
> @@ -1754,11 +1483,6 @@ static int storvsc_queuecommand(struct Scsi_Host
> *host, struct scsi_cmnd *scmnd)
> 
>       if (ret == -EAGAIN) {
>               /* no more space */
> -
> -             if (cmd_request->bounce_sgl_count)
> -                     destroy_bounce_buffer(cmd_request->bounce_sgl,
> -                                     cmd_request->bounce_sgl_count);
> -
>               return SCSI_MLQUEUE_DEVICE_BUSY;
>       }
> 
> --
> 2.4.3
Thanks Vitaly and Christoph. We will run this patch through our testing.

Regards,

K. Y

Reply via email to