Re: [Qemu-devel] [PATCH 08/15] virtio: decrease size of VirtQueueElement
On Thu, Jul 31, 2014 at 5:38 PM, Paolo Bonzini wrote: > Il 31/07/2014 04:07, Ming Lei ha scritto: >> On Wed, Jul 30, 2014 at 9:51 PM, Paolo Bonzini wrote: >>> Il 30/07/2014 13:39, Ming Lei ha scritto: diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index a60104c..943e72f 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -84,12 +84,17 @@ typedef struct VirtQueue VirtQueue; typedef struct VirtQueueElement { unsigned int index; +unsigned int num; unsigned int out_num; unsigned int in_num; -hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; -hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; -struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; -struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; + +hwaddr *in_addr; +hwaddr *out_addr; +struct iovec *in_sg; +struct iovec *out_sg; + +hwaddr addr[VIRTQUEUE_MAX_SIZE]; +struct iovec sg[VIRTQUEUE_MAX_SIZE]; } VirtQueueElement; #define VIRTIO_PCI_QUEUE_MAX 64 -- >>> >>> since addr and sg aren't used directly, allocate them flexibly like >>> >>> char *p; >>> VirtQueueElement *elem; >>> total_size = ROUND_UP(sizeof(struct VirtQueueElement), >>> __alignof__(elem->addr[0]); >>> addr_offset = total_size; >>> total_size = ROUND_UP(total_size + num * sizeof(elem->addr[0]), >>> __alignof__(elem->sg[0])); >>> sg_offset = total_size; >>> total_size += num * sizeof(elem->sg[0]); >>> >>> elem = p = g_slice_alloc(total_size); >>> elem->size = total_size; >>> elem->in_addr = p + addr_offset; >>> elem->out_addr = elem->in_addr + in_num; >>> elem->in_sg = p + sg_offset; >>> elem->out_sg = elem->in_sg + in_num; >>> >>> ... >>> >>> g_slice_free1(elem->size, elem); >>> >>> The small size will help glib do slab-style allocation and should remove >>> the need for an object pool. >> >> Yes, that should be correct way to do, but can't avoid big chunk allocation >> completely because 'num' is a bit big. > > For typical iops microbenchmarks, num will be 3 (4K I/O) to 18 (64K). That is only in benchmark, in reality, wring is always big size from file system, and read might be big too for readahead or by I/O merge. Also you need to walk virt queue first for figure out how many io vectors in current request first, it is a bit expensive to walk the list since lots of dcache miss is often generated. Thanks,
Re: [Qemu-devel] [PATCH 08/15] virtio: decrease size of VirtQueueElement
Il 31/07/2014 04:07, Ming Lei ha scritto: > On Wed, Jul 30, 2014 at 9:51 PM, Paolo Bonzini wrote: >> Il 30/07/2014 13:39, Ming Lei ha scritto: >>> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h >>> index a60104c..943e72f 100644 >>> --- a/include/hw/virtio/virtio.h >>> +++ b/include/hw/virtio/virtio.h >>> @@ -84,12 +84,17 @@ typedef struct VirtQueue VirtQueue; >>> typedef struct VirtQueueElement >>> { >>> unsigned int index; >>> +unsigned int num; >>> unsigned int out_num; >>> unsigned int in_num; >>> -hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; >>> -hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; >>> -struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; >>> -struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; >>> + >>> +hwaddr *in_addr; >>> +hwaddr *out_addr; >>> +struct iovec *in_sg; >>> +struct iovec *out_sg; >>> + >>> +hwaddr addr[VIRTQUEUE_MAX_SIZE]; >>> +struct iovec sg[VIRTQUEUE_MAX_SIZE]; >>> } VirtQueueElement; >>> >>> #define VIRTIO_PCI_QUEUE_MAX 64 >>> -- >> >> since addr and sg aren't used directly, allocate them flexibly like >> >> char *p; >> VirtQueueElement *elem; >> total_size = ROUND_UP(sizeof(struct VirtQueueElement), >> __alignof__(elem->addr[0]); >> addr_offset = total_size; >> total_size = ROUND_UP(total_size + num * sizeof(elem->addr[0]), >> __alignof__(elem->sg[0])); >> sg_offset = total_size; >> total_size += num * sizeof(elem->sg[0]); >> >> elem = p = g_slice_alloc(total_size); >> elem->size = total_size; >> elem->in_addr = p + addr_offset; >> elem->out_addr = elem->in_addr + in_num; >> elem->in_sg = p + sg_offset; >> elem->out_sg = elem->in_sg + in_num; >> >> ... >> >> g_slice_free1(elem->size, elem); >> >> The small size will help glib do slab-style allocation and should remove >> the need for an object pool. > > Yes, that should be correct way to do, but can't avoid big chunk allocation > completely because 'num' is a bit big. For typical iops microbenchmarks, num will be 3 (4K I/O) to 18 (64K). > Also this kind of change requires almost all users of elem to be changed, > that need lots of work. > > That is why I choose to take the simple approach to ease memory > preallocation for obj pool. Yeah, that's simpler. Paolo
Re: [Qemu-devel] [PATCH 08/15] virtio: decrease size of VirtQueueElement
On Wed, Jul 30, 2014 at 10:40 PM, Michael S. Tsirkin wrote: > On Wed, Jul 30, 2014 at 03:51:22PM +0200, Paolo Bonzini wrote: >> Il 30/07/2014 13:39, Ming Lei ha scritto: >> > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h >> > index a60104c..943e72f 100644 >> > --- a/include/hw/virtio/virtio.h >> > +++ b/include/hw/virtio/virtio.h >> > @@ -84,12 +84,17 @@ typedef struct VirtQueue VirtQueue; >> > typedef struct VirtQueueElement >> > { >> > unsigned int index; >> > +unsigned int num; >> > unsigned int out_num; >> > unsigned int in_num; >> > -hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; >> > -hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; >> > -struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; >> > -struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; >> > + >> > +hwaddr *in_addr; >> > +hwaddr *out_addr; >> > +struct iovec *in_sg; >> > +struct iovec *out_sg; >> > + >> > +hwaddr addr[VIRTQUEUE_MAX_SIZE]; >> > +struct iovec sg[VIRTQUEUE_MAX_SIZE]; >> > } VirtQueueElement; >> > >> > #define VIRTIO_PCI_QUEUE_MAX 64 >> > -- >> >> since addr and sg aren't used directly, allocate them flexibly like >> >> char *p; >> VirtQueueElement *elem; >> total_size = ROUND_UP(sizeof(struct VirtQueueElement), >> __alignof__(elem->addr[0]); >> addr_offset = total_size; >> total_size = ROUND_UP(total_size + num * sizeof(elem->addr[0]), >> __alignof__(elem->sg[0])); >> sg_offset = total_size; >> total_size += num * sizeof(elem->sg[0]); >> >> elem = p = g_slice_alloc(total_size); >> elem->size = total_size; >> elem->in_addr = p + addr_offset; >> elem->out_addr = elem->in_addr + in_num; >> elem->in_sg = p + sg_offset; >> elem->out_sg = elem->in_sg + in_num; >> >> ... >> >> g_slice_free1(elem->size, elem); >> >> The small size will help glib do slab-style allocation and should remove >> the need for an object pool. >> >> Paolo > > As long as you relying on this, why not allocate in_sg/out_sg > separately? That won't avoid big allocation(includes the following writing) which is always expensive. Also using each array for in and out is totally wrong and inefficient. > In any case, a bunch of code needs to be audited to make sure > no one assumes we can always stick VIRTQUEUE_MAX_SIZE there. > > > But what really worries me is this is an optimization patch > without any numbers accompanying it. One number is that almost half of sizeof(elem) is saved, it isn't good enough, is it? > I would say split this out from your virtio blk work, > work on this separately. This patch can ease elem preallocation for obj pool, that is related with previous patches. Thanks
Re: [Qemu-devel] [PATCH 08/15] virtio: decrease size of VirtQueueElement
On Wed, Jul 30, 2014 at 9:51 PM, Paolo Bonzini wrote: > Il 30/07/2014 13:39, Ming Lei ha scritto: >> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h >> index a60104c..943e72f 100644 >> --- a/include/hw/virtio/virtio.h >> +++ b/include/hw/virtio/virtio.h >> @@ -84,12 +84,17 @@ typedef struct VirtQueue VirtQueue; >> typedef struct VirtQueueElement >> { >> unsigned int index; >> +unsigned int num; >> unsigned int out_num; >> unsigned int in_num; >> -hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; >> -hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; >> -struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; >> -struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; >> + >> +hwaddr *in_addr; >> +hwaddr *out_addr; >> +struct iovec *in_sg; >> +struct iovec *out_sg; >> + >> +hwaddr addr[VIRTQUEUE_MAX_SIZE]; >> +struct iovec sg[VIRTQUEUE_MAX_SIZE]; >> } VirtQueueElement; >> >> #define VIRTIO_PCI_QUEUE_MAX 64 >> -- > > since addr and sg aren't used directly, allocate them flexibly like > > char *p; > VirtQueueElement *elem; > total_size = ROUND_UP(sizeof(struct VirtQueueElement), > __alignof__(elem->addr[0]); > addr_offset = total_size; > total_size = ROUND_UP(total_size + num * sizeof(elem->addr[0]), > __alignof__(elem->sg[0])); > sg_offset = total_size; > total_size += num * sizeof(elem->sg[0]); > > elem = p = g_slice_alloc(total_size); > elem->size = total_size; > elem->in_addr = p + addr_offset; > elem->out_addr = elem->in_addr + in_num; > elem->in_sg = p + sg_offset; > elem->out_sg = elem->in_sg + in_num; > > ... > > g_slice_free1(elem->size, elem); > > The small size will help glib do slab-style allocation and should remove > the need for an object pool. Yes, that should be correct way to do, but can't avoid big chunk allocation completely because 'num' is a bit big. Also this kind of change requires almost all users of elem to be changed, that need lots of work. That is why I choose to take the simple approach to ease memory preallocation for obj pool. Thanks,
Re: [Qemu-devel] [PATCH 08/15] virtio: decrease size of VirtQueueElement
Il 30/07/2014 16:40, Michael S. Tsirkin ha scritto: > As long as you relying on this, why not allocate in_sg/out_sg > separately? Because allocations can be expensive. > In any case, a bunch of code needs to be audited to make sure > no one assumes we can always stick VIRTQUEUE_MAX_SIZE there. > > But what really worries me is this is an optimization patch > without any numbers accompanying it. I agree. > I would say split this out from your virtio blk work, > work on this separately. This, too. Paolo
Re: [Qemu-devel] [PATCH 08/15] virtio: decrease size of VirtQueueElement
On Wed, Jul 30, 2014 at 03:51:22PM +0200, Paolo Bonzini wrote: > Il 30/07/2014 13:39, Ming Lei ha scritto: > > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > > index a60104c..943e72f 100644 > > --- a/include/hw/virtio/virtio.h > > +++ b/include/hw/virtio/virtio.h > > @@ -84,12 +84,17 @@ typedef struct VirtQueue VirtQueue; > > typedef struct VirtQueueElement > > { > > unsigned int index; > > +unsigned int num; > > unsigned int out_num; > > unsigned int in_num; > > -hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; > > -hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; > > -struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; > > -struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; > > + > > +hwaddr *in_addr; > > +hwaddr *out_addr; > > +struct iovec *in_sg; > > +struct iovec *out_sg; > > + > > +hwaddr addr[VIRTQUEUE_MAX_SIZE]; > > +struct iovec sg[VIRTQUEUE_MAX_SIZE]; > > } VirtQueueElement; > > > > #define VIRTIO_PCI_QUEUE_MAX 64 > > -- > > since addr and sg aren't used directly, allocate them flexibly like > > char *p; > VirtQueueElement *elem; > total_size = ROUND_UP(sizeof(struct VirtQueueElement), > __alignof__(elem->addr[0]); > addr_offset = total_size; > total_size = ROUND_UP(total_size + num * sizeof(elem->addr[0]), > __alignof__(elem->sg[0])); > sg_offset = total_size; > total_size += num * sizeof(elem->sg[0]); > > elem = p = g_slice_alloc(total_size); > elem->size = total_size; > elem->in_addr = p + addr_offset; > elem->out_addr = elem->in_addr + in_num; > elem->in_sg = p + sg_offset; > elem->out_sg = elem->in_sg + in_num; > > ... > > g_slice_free1(elem->size, elem); > > The small size will help glib do slab-style allocation and should remove > the need for an object pool. > > Paolo As long as you relying on this, why not allocate in_sg/out_sg separately? In any case, a bunch of code needs to be audited to make sure no one assumes we can always stick VIRTQUEUE_MAX_SIZE there. But what really worries me is this is an optimization patch without any numbers accompanying it. I would say split this out from your virtio blk work, work on this separately. -- MST
Re: [Qemu-devel] [PATCH 08/15] virtio: decrease size of VirtQueueElement
Il 30/07/2014 13:39, Ming Lei ha scritto: > diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h > index a60104c..943e72f 100644 > --- a/include/hw/virtio/virtio.h > +++ b/include/hw/virtio/virtio.h > @@ -84,12 +84,17 @@ typedef struct VirtQueue VirtQueue; > typedef struct VirtQueueElement > { > unsigned int index; > +unsigned int num; > unsigned int out_num; > unsigned int in_num; > -hwaddr in_addr[VIRTQUEUE_MAX_SIZE]; > -hwaddr out_addr[VIRTQUEUE_MAX_SIZE]; > -struct iovec in_sg[VIRTQUEUE_MAX_SIZE]; > -struct iovec out_sg[VIRTQUEUE_MAX_SIZE]; > + > +hwaddr *in_addr; > +hwaddr *out_addr; > +struct iovec *in_sg; > +struct iovec *out_sg; > + > +hwaddr addr[VIRTQUEUE_MAX_SIZE]; > +struct iovec sg[VIRTQUEUE_MAX_SIZE]; > } VirtQueueElement; > > #define VIRTIO_PCI_QUEUE_MAX 64 > -- since addr and sg aren't used directly, allocate them flexibly like char *p; VirtQueueElement *elem; total_size = ROUND_UP(sizeof(struct VirtQueueElement), __alignof__(elem->addr[0]); addr_offset = total_size; total_size = ROUND_UP(total_size + num * sizeof(elem->addr[0]), __alignof__(elem->sg[0])); sg_offset = total_size; total_size += num * sizeof(elem->sg[0]); elem = p = g_slice_alloc(total_size); elem->size = total_size; elem->in_addr = p + addr_offset; elem->out_addr = elem->in_addr + in_num; elem->in_sg = p + sg_offset; elem->out_sg = elem->in_sg + in_num; ... g_slice_free1(elem->size, elem); The small size will help glib do slab-style allocation and should remove the need for an object pool. Paolo