Re: [Qemu-devel] [PATCH v4] block/nvme: add support for discard

2019-07-07 Thread Maxim Levitsky
On Fri, 2019-07-05 at 15:50 +0200, Max Reitz wrote:
> On 03.07.19 18:07, Maxim Levitsky wrote:
> > Signed-off-by: Maxim Levitsky 
> > ---
> >  block/nvme.c   | 81 ++
> >  block/trace-events |  2 ++
> >  2 files changed, 83 insertions(+)
> > 
> > diff --git a/block/nvme.c b/block/nvme.c
> > index 02e0846643..96a715dcc1 100644
> > --- a/block/nvme.c
> > +++ b/block/nvme.c
> 
> [...]
> 
> > @@ -460,6 +461,7 @@ static void nvme_identify(BlockDriverState *bs, int 
> > namespace, Error **errp)
> >s->page_size / sizeof(uint64_t) * s->page_size);
> >  
> >  s->supports_write_zeros = (idctrl->oncs & NVME_ONCS_WRITE_ZEROS) != 0;
> > +s->supports_discard = (idctrl->oncs & NVME_ONCS_DSM) != 0;
> 
> Shouldn’t this be le16_to_cpu(idctrl->oncs)?  Same in the previous
> patch, now that I think about it.

This reminds me of how I basically scrubbed though nvme-mdev looking for 
endiannes bugs, 
manually searching for every reference to hardware controlled structure.
Thank you very much!!


> 
> >  
> >  memset(resp, 0, 4096);
> >  
> > @@ -1149,6 +1151,84 @@ static coroutine_fn int 
> > nvme_co_pwrite_zeroes(BlockDriverState *bs,
> >  }
> >  
> >  
> > +static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
> > + int64_t offset,
> > + int bytes)
> > +{
> > +BDRVNVMeState *s = bs->opaque;
> > +NVMeQueuePair *ioq = s->queues[1];
> > +NVMeRequest *req;
> > +NvmeDsmRange *buf;
> > +QEMUIOVector local_qiov;
> > +int r;
> > +
> > +NvmeCmd cmd = {
> > +.opcode = NVME_CMD_DSM,
> > +.nsid = cpu_to_le32(s->nsid),
> > +.cdw10 = 0, /*number of ranges - 0 based*/
> 
> I’d make this cpu_to_le32(0).  Sure, there is no effect for 0, but in
> theory this is a variable value, so...
Let it be.

> 
> > +.cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/
> > +};
> > +
> > +NVMeCoData data = {
> > +.ctx = bdrv_get_aio_context(bs),
> > +.ret = -EINPROGRESS,
> > +};
> > +
> > +if (!s->supports_discard) {
> > +return -ENOTSUP;
> > +}
> > +
> > +assert(s->nr_queues > 1);
> > +
> > +buf = qemu_try_blockalign0(bs, 4096);
> 
> I’m not sure whether this needs to be 4096 or whether 16 would suffice,
>  but I suppose this gets us the least trouble.
Exactly. Even better would be now that I think about it to use 's->page_size', 
the device page size.
It is at least 4K (spec minimum).

Speaking of which, there is a theoretical bug there - the device in theory can 
indicate that its minimal page size is larger that 4K.
The kernel currently rejects such devices, but here the driver just forces 4K 
page size in the CC register

> 
> > +if (!buf) {
> > +return -ENOMEM;
> 
> Indentation is off.
True!

> 
> > +}
> > +
> > +buf->nlb = cpu_to_le32(bytes >> s->blkshift);
> > +buf->slba = cpu_to_le64(offset >> s->blkshift);
> > +buf->cattr = 0;
> > +
> > +qemu_iovec_init(&local_qiov, 1);
> > +qemu_iovec_add(&local_qiov, buf, 4096);
> > +
> > +req = nvme_get_free_req(ioq);
> > +assert(req);
> > +
> > +qemu_co_mutex_lock(&s->dma_map_lock);
> > +r = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov);
> > +qemu_co_mutex_unlock(&s->dma_map_lock);
> > +
> > +if (r) {
> > +req->busy = false;
> > +return r;
> 
> Leaking buf and local_qiov here.
True, fixed.

> 
> > +}
> > +
> > +trace_nvme_dsm(s, offset, bytes);
> > +
> > +nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
> > +
> > +data.co = qemu_coroutine_self();
> > +while (data.ret == -EINPROGRESS) {
> > +qemu_coroutine_yield();
> > +}
> > +
> > +qemu_co_mutex_lock(&s->dma_map_lock);
> > +r = nvme_cmd_unmap_qiov(bs, &local_qiov);
> > +qemu_co_mutex_unlock(&s->dma_map_lock);
> > +if (r) {
> > +return r;
> 
> Leaking buf and local_qiov here, too.
True, fixed - next time will check error paths better.

> 
> Max
> 
> > +}
> > +
> > +trace_nvme_dsm_done(s, offset, bytes, data.ret);
> > +
> > +qemu_iovec_destroy(&local_qiov);
> > +qemu_vfree(buf);
> > +return data.ret;
> > +
> > +}
> > +
> > +
> >  static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
> > BlockReopenQueue *queue, Error **errp)
> >  {
> 
> 

Thanks for the review,
Best regards,
Maxim Levitsky





Re: [Qemu-devel] [PATCH v4] block/nvme: add support for discard

2019-07-05 Thread Max Reitz
On 03.07.19 18:07, Maxim Levitsky wrote:
> Signed-off-by: Maxim Levitsky 
> ---
>  block/nvme.c   | 81 ++
>  block/trace-events |  2 ++
>  2 files changed, 83 insertions(+)
> 
> diff --git a/block/nvme.c b/block/nvme.c
> index 02e0846643..96a715dcc1 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c

[...]

> @@ -460,6 +461,7 @@ static void nvme_identify(BlockDriverState *bs, int 
> namespace, Error **errp)
>s->page_size / sizeof(uint64_t) * s->page_size);
>  
>  s->supports_write_zeros = (idctrl->oncs & NVME_ONCS_WRITE_ZEROS) != 0;
> +s->supports_discard = (idctrl->oncs & NVME_ONCS_DSM) != 0;

Shouldn’t this be le16_to_cpu(idctrl->oncs)?  Same in the previous
patch, now that I think about it.

>  
>  memset(resp, 0, 4096);
>  
> @@ -1149,6 +1151,84 @@ static coroutine_fn int 
> nvme_co_pwrite_zeroes(BlockDriverState *bs,
>  }
>  
>  
> +static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
> + int64_t offset,
> + int bytes)
> +{
> +BDRVNVMeState *s = bs->opaque;
> +NVMeQueuePair *ioq = s->queues[1];
> +NVMeRequest *req;
> +NvmeDsmRange *buf;
> +QEMUIOVector local_qiov;
> +int r;
> +
> +NvmeCmd cmd = {
> +.opcode = NVME_CMD_DSM,
> +.nsid = cpu_to_le32(s->nsid),
> +.cdw10 = 0, /*number of ranges - 0 based*/

I’d make this cpu_to_le32(0).  Sure, there is no effect for 0, but in
theory this is a variable value, so...

> +.cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/
> +};
> +
> +NVMeCoData data = {
> +.ctx = bdrv_get_aio_context(bs),
> +.ret = -EINPROGRESS,
> +};
> +
> +if (!s->supports_discard) {
> +return -ENOTSUP;
> +}
> +
> +assert(s->nr_queues > 1);
> +
> +buf = qemu_try_blockalign0(bs, 4096);

I’m not sure whether this needs to be 4096 or whether 16 would suffice,
 but I suppose this gets us the least trouble.

> +if (!buf) {
> +return -ENOMEM;

Indentation is off.

> +}
> +
> +buf->nlb = cpu_to_le32(bytes >> s->blkshift);
> +buf->slba = cpu_to_le64(offset >> s->blkshift);
> +buf->cattr = 0;
> +
> +qemu_iovec_init(&local_qiov, 1);
> +qemu_iovec_add(&local_qiov, buf, 4096);
> +
> +req = nvme_get_free_req(ioq);
> +assert(req);
> +
> +qemu_co_mutex_lock(&s->dma_map_lock);
> +r = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov);
> +qemu_co_mutex_unlock(&s->dma_map_lock);
> +
> +if (r) {
> +req->busy = false;
> +return r;

Leaking buf and local_qiov here.

> +}
> +
> +trace_nvme_dsm(s, offset, bytes);
> +
> +nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
> +
> +data.co = qemu_coroutine_self();
> +while (data.ret == -EINPROGRESS) {
> +qemu_coroutine_yield();
> +}
> +
> +qemu_co_mutex_lock(&s->dma_map_lock);
> +r = nvme_cmd_unmap_qiov(bs, &local_qiov);
> +qemu_co_mutex_unlock(&s->dma_map_lock);
> +if (r) {
> +return r;

Leaking buf and local_qiov here, too.

Max

> +}
> +
> +trace_nvme_dsm_done(s, offset, bytes, data.ret);
> +
> +qemu_iovec_destroy(&local_qiov);
> +qemu_vfree(buf);
> +return data.ret;
> +
> +}
> +
> +
>  static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
> BlockReopenQueue *queue, Error **errp)
>  {



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [PATCH v4] block/nvme: add support for discard

2019-07-03 Thread Maxim Levitsky
Signed-off-by: Maxim Levitsky 
---
 block/nvme.c   | 81 ++
 block/trace-events |  2 ++
 2 files changed, 83 insertions(+)

diff --git a/block/nvme.c b/block/nvme.c
index 02e0846643..96a715dcc1 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -111,6 +111,7 @@ typedef struct {
 bool plugged;
 
 bool supports_write_zeros;
+bool supports_discard;
 
 CoMutex dma_map_lock;
 CoQueue dma_flush_queue;
@@ -460,6 +461,7 @@ static void nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
   s->page_size / sizeof(uint64_t) * s->page_size);
 
 s->supports_write_zeros = (idctrl->oncs & NVME_ONCS_WRITE_ZEROS) != 0;
+s->supports_discard = (idctrl->oncs & NVME_ONCS_DSM) != 0;
 
 memset(resp, 0, 4096);
 
@@ -1149,6 +1151,84 @@ static coroutine_fn int 
nvme_co_pwrite_zeroes(BlockDriverState *bs,
 }
 
 
+static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
+ int64_t offset,
+ int bytes)
+{
+BDRVNVMeState *s = bs->opaque;
+NVMeQueuePair *ioq = s->queues[1];
+NVMeRequest *req;
+NvmeDsmRange *buf;
+QEMUIOVector local_qiov;
+int r;
+
+NvmeCmd cmd = {
+.opcode = NVME_CMD_DSM,
+.nsid = cpu_to_le32(s->nsid),
+.cdw10 = 0, /*number of ranges - 0 based*/
+.cdw11 = cpu_to_le32(1 << 2), /*deallocate bit*/
+};
+
+NVMeCoData data = {
+.ctx = bdrv_get_aio_context(bs),
+.ret = -EINPROGRESS,
+};
+
+if (!s->supports_discard) {
+return -ENOTSUP;
+}
+
+assert(s->nr_queues > 1);
+
+buf = qemu_try_blockalign0(bs, 4096);
+if (!buf) {
+return -ENOMEM;
+}
+
+buf->nlb = cpu_to_le32(bytes >> s->blkshift);
+buf->slba = cpu_to_le64(offset >> s->blkshift);
+buf->cattr = 0;
+
+qemu_iovec_init(&local_qiov, 1);
+qemu_iovec_add(&local_qiov, buf, 4096);
+
+req = nvme_get_free_req(ioq);
+assert(req);
+
+qemu_co_mutex_lock(&s->dma_map_lock);
+r = nvme_cmd_map_qiov(bs, &cmd, req, &local_qiov);
+qemu_co_mutex_unlock(&s->dma_map_lock);
+
+if (r) {
+req->busy = false;
+return r;
+}
+
+trace_nvme_dsm(s, offset, bytes);
+
+nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data);
+
+data.co = qemu_coroutine_self();
+while (data.ret == -EINPROGRESS) {
+qemu_coroutine_yield();
+}
+
+qemu_co_mutex_lock(&s->dma_map_lock);
+r = nvme_cmd_unmap_qiov(bs, &local_qiov);
+qemu_co_mutex_unlock(&s->dma_map_lock);
+if (r) {
+return r;
+}
+
+trace_nvme_dsm_done(s, offset, bytes, data.ret);
+
+qemu_iovec_destroy(&local_qiov);
+qemu_vfree(buf);
+return data.ret;
+
+}
+
+
 static int nvme_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp)
 {
@@ -1363,6 +1443,7 @@ static BlockDriver bdrv_nvme = {
 .bdrv_co_pwritev  = nvme_co_pwritev,
 
 .bdrv_co_pwrite_zeroes= nvme_co_pwrite_zeroes,
+.bdrv_co_pdiscard = nvme_co_pdiscard,
 
 .bdrv_co_flush_to_disk= nvme_co_flush,
 .bdrv_reopen_prepare  = nvme_reopen_prepare,
diff --git a/block/trace-events b/block/trace-events
index 12f363bb44..f763f79d99 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -152,6 +152,8 @@ nvme_write_zeros(void *s, uint64_t offset, uint64_t bytes, 
int flags) "s %p offs
 nvme_qiov_unaligned(const void *qiov, int n, void *base, size_t size, int 
align) "qiov %p n %d base %p size 0x%zx align 0x%x"
 nvme_prw_buffered(void *s, uint64_t offset, uint64_t bytes, int niov, int 
is_write) "s %p offset %"PRId64" bytes %"PRId64" niov %d is_write %d"
 nvme_rw_done(void *s, int is_write, uint64_t offset, uint64_t bytes, int ret) 
"s %p is_write %d offset %"PRId64" bytes %"PRId64" ret %d"
+nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p offset %"PRId64" 
bytes %"PRId64""
+nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 
%"PRId64" bytes %"PRId64" ret %d"
 nvme_dma_map_flush(void *s) "s %p"
 nvme_free_req_queue_wait(void *q) "q %p"
 nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s 
%p cmd %p req %p qiov %p entries %d"
-- 
2.17.2