[SeaBIOS] Re: [PATCH v2] nvme: Clean up nvme_cmd_readwrite()

2020-11-05 Thread David Woodhouse
On Thu, 2020-11-05 at 16:09 +, David Woodhouse wrote:
> From: David Woodhouse 
> 
> This ended up with an odd mix of recursion (albeit *mostly*
> tail-recursion) and interation that could have been prettier. In
> addition, while recursing it potentially adjusted op->count which is
> used by callers to see the amount of I/O actually performed.
> 
> Fix it by bringing nvme_build_prpl() into the normal loop using 'i'
> as the offset in the op.
> 
> Fixes: 94f0510dc ("nvme: Split requests by maximum allowed size")
> Signed-off-by: David Woodhouse 
> ---
> v2: Fix my bug with checking a u16 for being < 0.
> Fix the bug I inherited from commit 94f0510dc but made unconditional.
> 
>  src/hw/nvme.c | 77 ++-
>  1 file changed, 33 insertions(+), 44 deletions(-)
> 


Now, on top of that we *could* do something like this...

--- a/src/hw/nvme.c
+++ b/src/hw/nvme.c
@@ -742,6 +742,15 @@ nvme_cmd_readwrite(struct nvme_namespace *ns, struct 
disk_op_s *op, int write)
 blocks = blocks_remaining < max_blocks ? blocks_remaining
: max_blocks;
 
+if (blocks < blocks_remaining && ns->block_size < NVME_PAGE_SIZE &&
+!(((u32)op_buf) & (ns->block_size-1))) {
+u32 align = ((u32)op_buf & (NVME_PAGE_SIZE - 1)) / 
ns->block_size;
+if (blocks > (max_blocks - align)) {
+dprintf(3, "Restrain to %u blocks to align (buf %p 
size %u/%u)\n", max_blocks - align, op_buf, NVME_PAGE_SIZE, ns->block_size);
+blocks = max_blocks - align;
+}
+}
+
 if (write) {
 memcpy(ns->dma_buffer, op_buf, blocks * ns->block_size);
 }

While debugging this I watched a boot sector, after being loaded at
:7c00, load another 63 sectors at :7e00. It didn't get to use
the prpl at all, and looked something like this...

ns 1 read lba 0+1: 0
Booting from :7c00
ns 1 read lba 1+8: 0
ns 1 read lba 9+8: 0
ns 1 read lba 17+8: 0
ns 1 read lba 25+8: 0
ns 1 read lba 33+8: 0
ns 1 read lba 41+8: 0
ns 1 read lba 49+8: 0
ns 1 read lba 57+7: 0

If we make an *attempt* to align it, such as the proof-of-concept shown
above, then it ends up getting back in sync:

ns 1 read lba 0+1: 0
Booting from :7c00
Restrain to 1 blocks to align (buf 0x7e00 size 4096/512)
ns 1 read lba 1+1: 0
ns 1 read lba 1+62: 0

I just don't know that I care enough about the optimisation to make it
worth the ugliness of that special case in the loop, which includes a
division.



smime.p7s
Description: S/MIME cryptographic signature
___
SeaBIOS mailing list -- seabios@seabios.org
To unsubscribe send an email to seabios-le...@seabios.org


[SeaBIOS] [PATCH v2] nvme: Clean up nvme_cmd_readwrite()

2020-11-05 Thread David Woodhouse
From: David Woodhouse 

This ended up with an odd mix of recursion (albeit *mostly*
tail-recursion) and interation that could have been prettier. In
addition, while recursing it potentially adjusted op->count which is
used by callers to see the amount of I/O actually performed.

Fix it by bringing nvme_build_prpl() into the normal loop using 'i'
as the offset in the op.

Fixes: 94f0510dc ("nvme: Split requests by maximum allowed size")
Signed-off-by: David Woodhouse 
---
v2: Fix my bug with checking a u16 for being < 0.
Fix the bug I inherited from commit 94f0510dc but made unconditional.

 src/hw/nvme.c | 77 ++-
 1 file changed, 33 insertions(+), 44 deletions(-)

diff --git a/src/hw/nvme.c b/src/hw/nvme.c
index cc37bca..f26b811 100644
--- a/src/hw/nvme.c
+++ b/src/hw/nvme.c
@@ -470,30 +470,31 @@ static int nvme_add_prpl(struct nvme_namespace *ns, u64 
base)
 return 0;
 }
 
-int nvme_build_prpl(struct nvme_namespace *ns, struct disk_op_s *op)
+static int nvme_build_prpl(struct nvme_namespace *ns, void *op_buf, u16 count)
 {
 int first_page = 1;
-u32 base = (long)op->buf_fl;
-s32 size = op->count * ns->block_size;
+u32 base = (long)op_buf;
+s32 size;
 
-if (op->count > ns->max_req_size)
-return -1;
+if (count > ns->max_req_size)
+count = ns->max_req_size;
 
 nvme_reset_prpl(ns);
 
+size = count * ns->block_size;
 /* Special case for transfers that fit into PRP1, but are unaligned */
 if (((size + (base & ~NVME_PAGE_MASK)) <= NVME_PAGE_SIZE)) {
-ns->prp1 = op->buf_fl;
-return 0;
+ns->prp1 = op_buf;
+return count;
 }
 
 /* Every request has to be page aligned */
 if (base & ~NVME_PAGE_MASK)
-return -1;
+return 0;
 
 /* Make sure a full block fits into the last chunk */
 if (size & (ns->block_size - 1ULL))
-return -1;
+return 0;
 
 for (; size > 0; base += NVME_PAGE_SIZE, size -= NVME_PAGE_SIZE) {
 if (first_page) {
@@ -503,10 +504,10 @@ int nvme_build_prpl(struct nvme_namespace *ns, struct 
disk_op_s *op)
 continue;
 }
 if (nvme_add_prpl(ns, base))
-return -1;
+return 0;
 }
 
-return 0;
+return count;
 }
 
 static int
@@ -725,46 +726,34 @@ nvme_cmd_readwrite(struct nvme_namespace *ns, struct 
disk_op_s *op, int write)
 {
 int res = DISK_RET_SUCCESS;
 u16 const max_blocks = NVME_PAGE_SIZE / ns->block_size;
-u16 i;
-
-/* Split up requests that are larger than the device can handle */
-if (op->count > ns->max_req_size) {
-u16 count = op->count;
-
-/* Handle the first max_req_size elements */
-op->count = ns->max_req_size;
-if (nvme_cmd_readwrite(ns, op, write))
-return res;
-
-/* Handle the remainder of the request */
-op->count = count - ns->max_req_size;
-op->lba += ns->max_req_size;
-op->buf_fl += (ns->max_req_size * ns->block_size);
-return nvme_cmd_readwrite(ns, op, write);
-}
-
-if (!nvme_build_prpl(ns, op)) {
-/* Request goes via PRP List logic */
-return nvme_io_readwrite(ns, op->lba, ns->prp1, op->count, write);
-}
+u16 i, blocks;
 
 for (i = 0; i < op->count && res == DISK_RET_SUCCESS;) {
 u16 blocks_remaining = op->count - i;
-u16 blocks = blocks_remaining < max_blocks ? blocks_remaining
-   : max_blocks;
 char *op_buf = op->buf_fl + i * ns->block_size;
 
-if (write) {
-memcpy(ns->dma_buffer, op_buf, blocks * ns->block_size);
-}
+blocks = nvme_build_prpl(ns, op_buf, blocks_remaining);
+if (blocks) {
+res = nvme_io_readwrite(ns, op->lba + i, ns->prp1, blocks, write);
+dprintf(5, "ns %u %s lba %llu+%u: %d\n", ns->ns_id, write ? "write"
+  : "read",
+op->lba, blocks, res);
+} else {
+blocks = blocks_remaining < max_blocks ? blocks_remaining
+   : max_blocks;
+
+if (write) {
+memcpy(ns->dma_buffer, op_buf, blocks * ns->block_size);
+}
 
-res = nvme_io_readwrite(ns, op->lba + i, ns->dma_buffer, blocks, 
write);
-dprintf(5, "ns %u %s lba %llu+%u: %d\n", ns->ns_id, write ? "write"
-  : "read",
-op->lba + i, blocks, res);
+res = nvme_io_readwrite(ns, op->lba + i, ns->dma_buffer, blocks, 
write);
+dprintf(5, "ns %u %s lba %llu+%u: %d\n", ns->ns_id, write ? "write"
+  : "read",
+op->lba + i, blocks, res);
 
-if (!write && res == DISK_RET_SUCCESS) {
-

[SeaBIOS] Re: [PATCH] nvme: Clean up nvme_cmd_readwrite()

2020-11-05 Thread David Woodhouse
On Wed, 2020-11-04 at 23:27 +, Graf (AWS), Alexander wrote:
> > Am 31.10.2020 um 00:49 schrieb David Woodhouse :
> > 
> > From: David woodhouse 
> > 
> > This ended up with an odd mix of recursion (albeit *mostly* tail-recursion)
> > and interation that could have been prettier. Make it prettier by making
> > nvme_build_prpl() return the number of blocks it consumes, and just
> > looping.
> > 
> > If nvme_build_prpl() doesn't want to play, just fall through to the
> > original loop.
> > 
> > Signed-off-by: David Woodhouse 
> 
> Please don't merge just yet. I think we found a bug with this patch,
> but need to find out where exactly.

Found both of them :)

> > @@ -725,34 +726,28 @@ nvme_cmd_readwrite(struct nvme_namespace *ns, struct 
> > disk_op_s *op, int write)
> > {
> > int res = DISK_RET_SUCCESS;
> > u16 const max_blocks = NVME_PAGE_SIZE / ns->block_size;
> > -u16 i;
> > +u16 i, blocks;
> > 
> > -/* Split up requests that are larger than the device can handle */
> > -if (op->count > ns->max_req_size) {
> > -u16 count = op->count;
> > +while (op->count && ((blocks = nvme_build_prpl(ns, op)) > 0)) {

1. If nvme_build_prpl() returns -1 that becomes (u16)0x and that's >0.


> > +res = nvme_io_readwrite(ns, op->lba, ns->prp1, blocks, write);
> > +dprintf(5, "ns %u %s lba %llu+%u: %d\n", ns->ns_id, write ? "write"
> > +  : "read",
> > +op->lba, blocks, res);
> > 
> > -/* Handle the first max_req_size elements */
> > -op->count = ns->max_req_size;
> > -if (nvme_cmd_readwrite(ns, op, write))
> > +if (res != DISK_RET_SUCCESS)
> > return res;
> > 
> > -/* Handle the remainder of the request */
> > -op->count = count - ns->max_req_size;
> > -op->lba += ns->max_req_size;
> > -op->buf_fl += (ns->max_req_size * ns->block_size);


2. We can't do this. Callers expect op->count to contain the number of
   blocks of I/O actually performed...

> > -return nvme_cmd_readwrite(ns, op, write);
> > -}
> > -
> > -if (!nvme_build_prpl(ns, op)) {
> > -/* Request goes via PRP List logic */
> > -return nvme_io_readwrite(ns, op->lba, ns->prp1, op->count, write);
> > +op->count -= blocks;
> > +op->lba += blocks;
> > +op->buf_fl += (blocks * ns->block_size);

   ... which means that when my cleanup made it happen unconditionally
   instead of only in the case where the max size had been exceeded 
   and it actually had to recurse, it started showing up in the test 
   runs. But commit 94f0510dc has the problem already.

Running another full test set now, will post the fixed patch when it
completes.



smime.p7s
Description: S/MIME cryptographic signature
___
SeaBIOS mailing list -- seabios@seabios.org
To unsubscribe send an email to seabios-le...@seabios.org