On 05/07/2017 10:44, Changpeng Liu wrote:
> Currently virtio-blk driver does not provide discard feature flag, so the
> filesystems which built on top of the block device will not send discard
> command. This is okay for HDD backend, but it will impact the performance
> for SSD backend.
> 
> Add a feature flag VIRTIO_BLK_F_DISCARD and command VIRTIO_BLK_T_DISCARD
> to extend exist virtio-blk protocol, define 16 bytes discard descriptor
> for each discard segment, the discard segment defination aligns with
> SCSI or NVM Express protocols, virtio-blk driver will support multi-range
> discard request as well.
> 
> Signed-off-by: Changpeng Liu <changpeng....@intel.com>

Please include a patch for the specification.  Since we are at it, I
would like to have three operations defined using the same descriptor:

- discard (SCSI UNMAP)

- write zeroes (SCSI WRITE SAME without UNMAP flag)

- write zeroes and possibly discard (SCSI WRITE SAME with UNMAP flag)

The last two can use the same command VIRTIO_BLK_T_WRITE_ZEROES, using
the reserved field as a flags field.

Paolo

> ---
>  drivers/block/virtio_blk.c      | 76 
> +++++++++++++++++++++++++++++++++++++++--
>  include/uapi/linux/virtio_blk.h | 19 +++++++++++
>  2 files changed, 92 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 0297ad7..8f0c614 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -172,10 +172,52 @@ static int virtblk_add_req(struct virtqueue *vq, struct 
> virtblk_req *vbr,
>       return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> +static inline int virtblk_setup_discard(struct request *req)
> +{
> +     unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
> +     u32 block_size = queue_logical_block_size(req->q);
> +     struct virtio_blk_discard *range;
> +     struct bio *bio;
> +
> +     if (block_size < 512 || !block_size)
> +             return -1;
> +
> +     range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
> +     if (!range)
> +             return -1;
> +
> +     __rq_for_each_bio(bio, req) {
> +             u64 slba = (bio->bi_iter.bi_sector << 9) / block_size;
> +             u32 nlb = bio->bi_iter.bi_size / block_size;
> +
> +             range[n].reserved = cpu_to_le32(0);
> +             range[n].nlba = cpu_to_le32(nlb);
> +             range[n].slba = cpu_to_le64(slba);
> +             n++;
> +     }
> +
> +     if (WARN_ON_ONCE(n != segments)) {
> +             kfree(range);
> +             return -1;
> +     }
> +
> +     req->special_vec.bv_page = virt_to_page(range);
> +     req->special_vec.bv_offset = offset_in_page(range);
> +     req->special_vec.bv_len = sizeof(*range) * segments;
> +     req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> +     return 0;
> +}
> +
>  static inline void virtblk_request_done(struct request *req)
>  {
>       struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
>  
> +     if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> +             kfree(page_address(req->special_vec.bv_page) +
> +                   req->special_vec.bv_offset);
> +     }
> +
>       switch (req_op(req)) {
>       case REQ_OP_SCSI_IN:
>       case REQ_OP_SCSI_OUT:
> @@ -237,6 +279,9 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx 
> *hctx,
>       case REQ_OP_FLUSH:
>               type = VIRTIO_BLK_T_FLUSH;
>               break;
> +     case REQ_OP_DISCARD:
> +             type = VIRTIO_BLK_T_DISCARD;
> +             break;
>       case REQ_OP_SCSI_IN:
>       case REQ_OP_SCSI_OUT:
>               type = VIRTIO_BLK_T_SCSI_CMD;
> @@ -256,9 +301,15 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx 
> *hctx,
>  
>       blk_mq_start_request(req);
>  
> +     if (type == VIRTIO_BLK_T_DISCARD) {
> +             err = virtblk_setup_discard(req);
> +             if (err)
> +                     return BLK_STS_IOERR;
> +     }
> +
>       num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
>       if (num) {
> -             if (rq_data_dir(req) == WRITE)
> +             if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD)
>                       vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, 
> VIRTIO_BLK_T_OUT);
>               else
>                       vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, 
> VIRTIO_BLK_T_IN);
> @@ -767,6 +818,25 @@ static int virtblk_probe(struct virtio_device *vdev)
>       if (!err && opt_io_size)
>               blk_queue_io_opt(q, blk_size * opt_io_size);
>  
> +     if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> +             q->limits.discard_alignment = blk_size;
> +             q->limits.discard_granularity = blk_size;
> +
> +             virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 
> &v);
> +             if (v)
> +                     blk_queue_max_discard_sectors(q, v);
> +             else
> +                     blk_queue_max_discard_sectors(q, -1U);
> +
> +             virtio_cread(vdev, struct virtio_blk_config, max_discard_num, 
> &v);
> +             if (v)
> +                     blk_queue_max_discard_segments(q, v);
> +             else
> +                     blk_queue_max_discard_segments(q, 256);
> +
> +             queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
> +     }
> +
>       virtio_device_ready(vdev);
>  
>       device_add_disk(&vdev->dev, vblk->disk);
> @@ -874,14 +944,14 @@ static int virtblk_restore(struct virtio_device *vdev)
>       VIRTIO_BLK_F_SCSI,
>  #endif
>       VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -     VIRTIO_BLK_F_MQ,
> +     VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
>  }
>  ;
>  static unsigned int features[] = {
>       VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
>       VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
>       VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
> -     VIRTIO_BLK_F_MQ,
> +     VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
>  };
>  
>  static struct virtio_driver virtio_blk = {
> diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> index 9ebe4d9..3354cc3 100644
> --- a/include/uapi/linux/virtio_blk.h
> +++ b/include/uapi/linux/virtio_blk.h
> @@ -38,6 +38,7 @@
>  #define VIRTIO_BLK_F_BLK_SIZE        6       /* Block size of disk is 
> available*/
>  #define VIRTIO_BLK_F_TOPOLOGY        10      /* Topology information is 
> available */
>  #define VIRTIO_BLK_F_MQ              12      /* support more than one vq */
> +#define VIRTIO_BLK_F_DISCARD 13      /* DISCARD command is supported */
>  
>  /* Legacy feature bits */
>  #ifndef VIRTIO_BLK_NO_LEGACY
> @@ -86,6 +87,10 @@ struct virtio_blk_config {
>  
>       /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
>       __u16 num_queues;
> +     /* The maximum segment size (if VIRTIO_BLK_F_DISCARD) */
> +     __u32 max_discard_seg;
> +     /* The maximum number of segments (if VIRTIO_BLK_F_DISCARD) */
> +     __u32 max_discard_num;
>  } __attribute__((packed));
>  
>  /*
> @@ -114,6 +119,9 @@ struct virtio_blk_config {
>  /* Get device ID command */
>  #define VIRTIO_BLK_T_GET_ID    8
>  
> +/* Discard command */
> +#define VIRTIO_BLK_T_DISCARD 16
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  /* Barrier before this op. */
>  #define VIRTIO_BLK_T_BARRIER 0x80000000
> @@ -133,6 +141,17 @@ struct virtio_blk_outhdr {
>       __virtio64 sector;
>  };
>  
> +/*
> + * Array of discard ranges for each request.
> + */
> +struct virtio_blk_discard {
> +     /* start discard lba */
> +     __virtio64 slba;
> +     /* number of discard sectors */
> +     __virtio32 nlba;
> +     __virtio32 reserved;
> +};
> +
>  #ifndef VIRTIO_BLK_NO_LEGACY
>  struct virtio_scsi_inhdr {
>       __virtio32 errors;
> 
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to