When extended headers are in use, the server can send us 64-bit extents, even for a 32-bit query (if the server knows the entire image is data, for example). For maximum flexibility, we are thus better off storing 64-bit lengths internally, even if we have to convert it back to 32-bit lengths when invoking the user's 32-bit callback. The next patch will then add a new API for letting the user access the full 64-bit extent information. The goal is to let both APIs work all the time, regardless of the size extents that the server actually answered with.
Note that when using the old nbd_block_status() API with a server that lacks extended headers, we now add a double-conversion speed penalty (converting the server's 32-bit answer into 64-bit internally and back to 32-bit for the callback). But the speed penalty will not be a problem for applications using the new nbd_block_status_64() API (we have to give a 64-bit answer no matter what the server answered), and ideally the situation will become less common as more servers learn extended headers. So for now I chose to unconditionally use a 64-bit internal representation; but if it turns out to have noticeable degredation, we could tweak things to conditionally retain 32-bit internal representation for servers lacking extended headers at the expense of more code maintenance. One of the trickier aspects of this patch is auditing that both the user's extent and our malloc'd shim get cleaned up once on all possible paths, so that there is neither a leak nor a double free. --- lib/internal.h | 7 +++- generator/states-reply-structured.c | 31 ++++++++++----- lib/handle.c | 4 +- lib/rw.c | 59 ++++++++++++++++++++++++++++- 4 files changed, 85 insertions(+), 16 deletions(-) diff --git a/lib/internal.h b/lib/internal.h index 06f3a65c..4800df83 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -75,7 +75,7 @@ struct export { struct command_cb { union { - nbd_extent_callback extent; + nbd_extent64_callback extent; nbd_chunk_callback chunk; nbd_list_callback list; nbd_context_callback context; @@ -286,7 +286,10 @@ struct nbd_handle { /* When receiving block status, this is used. */ uint32_t bs_contextid; - uint32_t *bs_entries; + union { + nbd_extent *normal; /* Our 64-bit preferred internal form */ + uint32_t *narrow; /* 32-bit form of NBD_REPLY_TYPE_BLOCK_STATUS */ + } bs_entries; /* Commands which are waiting to be issued [meaning the request * packet is sent to the server]. This is used as a simple linked diff --git a/generator/states-reply-structured.c b/generator/states-reply-structured.c index a3e0e2ac..71c761e9 100644 --- a/generator/states-reply-structured.c +++ b/generator/states-reply-structured.c @@ -494,6 +494,7 @@ STATE_MACHINE { REPLY.STRUCTURED_REPLY.RECV_BS_CONTEXTID: struct command *cmd = h->reply_cmd; uint32_t length; + uint32_t count; switch (recv_into_rbuf (h)) { case -1: SET_NEXT_STATE (%.DEAD); return 0; @@ -508,15 +509,19 @@ STATE_MACHINE { assert (cmd->type == NBD_CMD_BLOCK_STATUS); assert (length >= 12); length -= sizeof h->bs_contextid; + count = length / (2 * sizeof (uint32_t)); - free (h->bs_entries); - h->bs_entries = malloc (length); - if (h->bs_entries == NULL) { + /* Read raw data into a subset of h->bs_entries, then expand it + * into place later later during byte-swapping. + */ + free (h->bs_entries.normal); + h->bs_entries.normal = malloc (count * sizeof *h->bs_entries.normal); + if (h->bs_entries.normal == NULL) { SET_NEXT_STATE (%.DEAD); set_error (errno, "malloc"); return 0; } - h->rbuf = h->bs_entries; + h->rbuf = h->bs_entries.narrow; h->rlen = length; SET_NEXT_STATE (%RECV_BS_ENTRIES); } @@ -528,6 +533,7 @@ STATE_MACHINE { uint32_t count; size_t i; uint32_t context_id; + uint32_t *raw; struct meta_context *meta_context; switch (recv_into_rbuf (h)) { @@ -542,15 +548,20 @@ STATE_MACHINE { assert (cmd); /* guaranteed by CHECK */ assert (cmd->type == NBD_CMD_BLOCK_STATUS); assert (CALLBACK_IS_NOT_NULL (cmd->cb.fn.extent)); - assert (h->bs_entries); + assert (h->bs_entries.normal); assert (length >= 12); - count = (length - sizeof h->bs_contextid) / sizeof *h->bs_entries; + count = (length - sizeof h->bs_contextid) / (2 * sizeof (uint32_t)); /* Need to byte-swap the entries returned, but apart from that we - * don't validate them. + * don't validate them. Reverse order is essential, since we are + * expanding in-place from narrow to wider type. */ - for (i = 0; i < count; ++i) - h->bs_entries[i] = be32toh (h->bs_entries[i]); + raw = h->bs_entries.narrow; + for (i = count; i > 0; ) { + --i; + h->bs_entries.normal[i].flags = be32toh (raw[i * 2 + 1]); + h->bs_entries.normal[i].length = be32toh (raw[i * 2]); + } /* Look up the context ID. */ context_id = be32toh (h->bs_contextid); @@ -566,7 +577,7 @@ STATE_MACHINE { if (CALL_CALLBACK (cmd->cb.fn.extent, meta_context->name, cmd->offset, - h->bs_entries, count, + h->bs_entries.normal, count, &error) == -1) if (cmd->error == 0) cmd->error = error ? error : EPROTO; diff --git a/lib/handle.c b/lib/handle.c index cbb37e89..74fe87ec 100644 --- a/lib/handle.c +++ b/lib/handle.c @@ -1,5 +1,5 @@ /* NBD client library in userspace - * Copyright (C) 2013-2020 Red Hat Inc. + * Copyright (C) 2013-2021 Red Hat Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -123,7 +123,7 @@ nbd_close (struct nbd_handle *h) /* Free user callbacks first. */ nbd_unlocked_clear_debug_callback (h); - free (h->bs_entries); + free (h->bs_entries.normal); nbd_internal_reset_size_and_flags (h); nbd_internal_free_option (h); free_cmd_list (h->cmds_to_issue); diff --git a/lib/rw.c b/lib/rw.c index 16c2e848..f36f4e15 100644 --- a/lib/rw.c +++ b/lib/rw.c @@ -42,6 +42,50 @@ wait_for_command (struct nbd_handle *h, int64_t cookie) return r == -1 ? -1 : 0; } +/* Convert from 64-bit to 32-bit extent callback. */ +static int +nbd_convert_extent (void *data, const char *metacontext, uint64_t offset, + nbd_extent *entries, size_t nr_entries, int *error) +{ + nbd_extent_callback *cb = data; + uint32_t *array = malloc (nr_entries * 2 * sizeof *array); + size_t i; + int ret; + + if (array == NULL) { + set_error (*error = errno, "malloc"); + return -1; + } + + for (i = 0; i < nr_entries; i++) { + array[i * 2] = entries[i].length; + array[i * 2 + 1] = entries[i].flags; + /* If an extent is larger than 32 bits, silently truncate the rest + * of the server's response. Technically, such a server was + * non-compliant if the client did not negotiate extended headers, + * but it is easier to let the caller make progress than to make + * the call fail. Rather than track the connection's alignment, + * just blindly truncate the large extent to 4G-64M. + */ + if (entries[i].length > UINT32_MAX) { + array[i++ * 2] = -MAX_REQUEST_SIZE; + break; + } + } + + ret = CALL_CALLBACK (*cb, metacontext, offset, array, i * 2, error); + free (array); + return ret; +} + +static void +nbd_convert_extent_free (void *data) +{ + nbd_extent_callback *cb = data; + FREE_CALLBACK (*cb); + free (cb); +} + /* Issue a read command and wait for the reply. */ int nbd_unlocked_pread (struct nbd_handle *h, void *buf, @@ -469,12 +513,23 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h, nbd_completion_callback *completion, uint32_t flags) { - struct command_cb cb = { .fn.extent = *extent, + nbd_extent_callback *shim = malloc (sizeof *shim); + struct command_cb cb = { .fn.extent.callback = nbd_convert_extent, + .fn.extent.user_data = shim, + .fn.extent.free = nbd_convert_extent_free, .completion = *completion }; + if (shim == NULL) { + set_error (errno, "malloc"); + return -1; + } + *shim = *extent; + SET_CALLBACK_TO_NULL (*extent); + if (h->strict & LIBNBD_STRICT_COMMANDS) { if (!h->structured_replies) { set_error (ENOTSUP, "server does not support structured replies"); + FREE_CALLBACK (cb.fn.extent); return -1; } @@ -482,11 +537,11 @@ nbd_unlocked_aio_block_status (struct nbd_handle *h, set_error (ENOTSUP, "did not negotiate any metadata contexts, " "either you did not call nbd_add_meta_context before " "connecting or the server does not support it"); + FREE_CALLBACK (cb.fn.extent); return -1; } } - SET_CALLBACK_TO_NULL (*extent); SET_CALLBACK_TO_NULL (*completion); return nbd_internal_command_common (h, flags, NBD_CMD_BLOCK_STATUS, offset, count, EINVAL, NULL, &cb); -- 2.33.1