Re: [Qemu-block] [Qemu-devel] [PATCH v3 44/44] nbd: Implement NBD_OPT_BLOCK_SIZE on client

2016-04-25 Thread Eric Blake
On 04/25/2016 06:19 AM, Alex Bligh wrote:
> 
> On 23 Apr 2016, at 00:40, Eric Blake  wrote:
> 
>> The upstream NBD Protocol has defined a new extension to allow
>> the server to advertise block sizes to the client, as well as
>> a way for the client to inform the server that it intends to
>> obey block sizes.
>>
>> Pass any received sizes on to the block layer.
>>
>> Use the minimum block size as the sector size we pass to the
>> kernel - which also has the nice effect of cooperating with
>> (non-qemu) servers that don't do read-modify-write when exposing
>> a block device with 4k sectors; it can also allow us to visit a
>> file larger than 2T on a 32-bit kernel.
>>

>> +be32_to_cpus(>max_block);
>> +TRACE("Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" 
>> PRIx32,
>> +  info->min_block, info->opt_block, info->max_block);
>> +break;
>> +
> 
> You should probably check min_block <= opt_block <= max_block here

opt_block > max_block is possible if max_block is clamped to export size
(in the degenerate case where you have a small export that is too small
for the granularity of a hole or efficient I/O).  But yes, some sanity
checks that the server isn't horribly broken might be worthwhile.

> 
> Also should there be a check that BDRV_SECTOR_SIZE >= min_block?

No, because we take the server's min_block and feed it into
bs->request_align (which forces the block layer to comply with a minimum
alignment larger than 512, using code already tested on physical block
drives with 4k sectors), see the hunk in nbd-client.c.

>> int nbd_init(int fd, QIOChannelSocket *sioc, NbdExportInfo *info)
>> {
>> -unsigned long sectors = info->size / BDRV_SECTOR_SIZE;
>> -if (info->size / BDRV_SECTOR_SIZE != sectors) {
>> +unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
>> +unsigned long sectors = info->size / sector_size;
>> +if (info->size / sector_size != sectors) {
>> LOG("Export size %" PRId64 " too large for 32-bit kernel", 
>> info->size);
>> return -E2BIG;
>> }
>> @@ -724,18 +784,18 @@ int nbd_init(int fd, QIOChannelSocket *sioc, 
>> NbdExportInfo *info)
>> return -serrno;
>> }
>>
>> -TRACE("Setting block size to %lu", (unsigned long)BDRV_SECTOR_SIZE);
>> +TRACE("Setting block size to %lu", sector_size);
>>
>> -if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) {
>> +if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {

We also feed the maximum of 512 or the advertised minimum block size
into the kernel when using ioctl() for the kernel to take over
transmission phase; although I'm not certain whether the kernel obeys
NBD_SET_BLKSIZE as a hint rather than a hard rule - but if that needs
patching, it needs patching in the kernel implementation, not in qemu.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [Qemu-devel] [PATCH v3 44/44] nbd: Implement NBD_OPT_BLOCK_SIZE on client

2016-04-25 Thread Alex Bligh

On 23 Apr 2016, at 00:40, Eric Blake  wrote:

> The upstream NBD Protocol has defined a new extension to allow
> the server to advertise block sizes to the client, as well as
> a way for the client to inform the server that it intends to
> obey block sizes.
> 
> Pass any received sizes on to the block layer.
> 
> Use the minimum block size as the sector size we pass to the
> kernel - which also has the nice effect of cooperating with
> (non-qemu) servers that don't do read-modify-write when exposing
> a block device with 4k sectors; it can also allow us to visit a
> file larger than 2T on a 32-bit kernel.
> 
> Signed-off-by: Eric Blake 
> ---
> include/block/nbd.h |  3 +++
> block/nbd-client.c  |  3 +++
> block/nbd.c | 17 +---
> nbd/client.c| 74 -
> 4 files changed, 87 insertions(+), 10 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index a5c68df..27a6854 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -133,6 +133,9 @@ enum {
> struct NbdExportInfo {
> uint64_t size;
> uint16_t flags;
> +uint32_t min_block;
> +uint32_t opt_block;
> +uint32_t max_block;
> };
> typedef struct NbdExportInfo NbdExportInfo;
> 
> diff --git a/block/nbd-client.c b/block/nbd-client.c
> index 2b6ac27..602a8ab 100644
> --- a/block/nbd-client.c
> +++ b/block/nbd-client.c
> @@ -443,6 +443,9 @@ int nbd_client_init(BlockDriverState *bs,
> logout("Failed to negotiate with the NBD server\n");
> return ret;
> }
> +if (client->info.min_block > bs->request_alignment) {
> +bs->request_alignment = client->info.min_block;
> +}
> 
> qemu_co_mutex_init(>send_mutex);
> qemu_co_mutex_init(>free_sema);
> diff --git a/block/nbd.c b/block/nbd.c
> index 5172039..bb7df55 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -407,9 +407,20 @@ static int nbd_co_flush(BlockDriverState *bs)
> 
> static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
> {
> -bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
> -bs->bl.max_write_zeroes = UINT32_MAX >> BDRV_SECTOR_BITS;
> -bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
> +NbdClientSession *s = nbd_get_client_session(bs);
> +int max = UINT32_MAX >> BDRV_SECTOR_BITS;
> +
> +if (s->info.max_block) {
> +max = s->info.max_block >> BDRV_SECTOR_BITS;
> +}
> +bs->bl.max_discard = max;
> +bs->bl.max_write_zeroes = max;
> +bs->bl.max_transfer_length = max;
> +
> +if (s->info.opt_block &&
> +s->info.opt_block >> BDRV_SECTOR_BITS > bs->bl.opt_transfer_length) {
> +bs->bl.opt_transfer_length = s->info.opt_block >> BDRV_SECTOR_BITS;
> +}
> }
> 
> static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
> diff --git a/nbd/client.c b/nbd/client.c
> index dac4f29..24f6b0b 100644
> --- a/nbd/client.c
> +++ b/nbd/client.c
> @@ -232,6 +232,11 @@ static int nbd_handle_reply_err(QIOChannel *ioc, 
> nbd_opt_reply *reply,
>reply->option);
> break;
> 
> +case NBD_REP_ERR_BLOCK_SIZE_REQD:
> +error_setg(errp, "Server wants OPT_BLOCK_SIZE before option %" 
> PRIx32,
> +   reply->option);
> +break;
> +
> default:
> error_setg(errp, "Unknown error code when asking for option %" PRIx32,
>reply->option);
> @@ -333,6 +338,21 @@ static int nbd_opt_go(QIOChannel *ioc, const char 
> *wantname,
>  * flags still 0 is a witness of a broken server. */
> info->flags = 0;
> 
> +/* Some servers use NBD_OPT_GO to advertise non-default block
> + * sizes, and require that we first use NBD_OPT_BLOCK_SIZE to
> + * agree to that. */
> +TRACE("Attempting NBD_OPT_BLOCK_SIZE");
> +if (nbd_send_option_request(ioc, NBD_OPT_BLOCK_SIZE, 0, NULL, errp) < 0) 
> {
> +return -1;
> +}
> +if (nbd_receive_option_reply(ioc, NBD_OPT_BLOCK_SIZE, , errp) < 0) 
> {
> +return -1;
> +}
> +error = nbd_handle_reply_err(ioc, , errp);
> +if (error < 0) {
> +return error;
> +}
> +
> TRACE("Attempting NBD_OPT_GO for export '%s'", wantname);
> if (nbd_send_option_request(ioc, NBD_OPT_GO, -1, wantname, errp) < 0) {
> return -1;
> @@ -402,6 +422,45 @@ static int nbd_opt_go(QIOChannel *ioc, const char 
> *wantname,
>   info->size, info->flags);
> break;
> 
> +case NBD_INFO_BLOCK_SIZE:
> +if (len != sizeof(info->min_block) * 3) {
> +error_setg(errp, "remaining export info len %" PRIu32
> +   " is unexpected size", len);
> +return -1;
> +}
> +if (read_sync(ioc, >min_block, sizeof(info->min_block)) !=
> +sizeof(info->min_block)) {
> +error_setg(errp, "failed to read info minimum block size");
> +return