The VirtQueueElement is a very big structure (> 4k), since it will be initialzed by virtqueue_pop, we can save the expensive zeroing here.
This saves a few nanoseconds per request in my test: [fio-test] rw bs iodepth jobs bw iops latency -------------------------------------------------------------------------------------------- Before read 4k 1 1 110 28269 34 After read 4k 1 1 131 33745 28 virtio-blk read 4k 1 1 217 55673 16 Signed-off-by: Fam Zheng <f...@redhat.com> --- hw/scsi/virtio-scsi.c | 17 ++++++++++------- include/hw/virtio/virtio-scsi.h | 1 + 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index 86aba88..0792529 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -24,12 +24,12 @@ typedef struct VirtIOSCSIReq { VirtIOSCSI *dev; VirtQueue *vq; - VirtQueueElement elem; QEMUSGList qsgl; + QEMUIOVector resp_iov; + VirtQueueElement elem; SCSIRequest *sreq; size_t resp_size; enum SCSIXferMode mode; - QEMUIOVector resp_iov; union { VirtIOSCSICmdResp cmd; VirtIOSCSICtrlTMFResp tmf; @@ -44,6 +44,7 @@ typedef struct VirtIOSCSIReq { VirtIOSCSICtrlTMFReq tmf; VirtIOSCSICtrlANReq an; } req; + uint8_t cdb[VIRTIO_SCSI_CDB_SIZE_MAX]; } VirtIOSCSIReq; QEMU_BUILD_BUG_ON(offsetof(VirtIOSCSIReq, req.cdb) != @@ -68,15 +69,16 @@ static inline SCSIDevice *virtio_scsi_device_find(VirtIOSCSI *s, uint8_t *lun) static VirtIOSCSIReq *virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq) { VirtIOSCSIReq *req; - VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); - - req = g_malloc0(sizeof(*req) + vs->cdb_size); + const size_t zero_skip = offsetof(VirtIOSCSIReq, elem) + + sizeof(VirtQueueElement); + req = g_slice_new(VirtIOSCSIReq); req->vq = vq; req->dev = s; req->sreq = NULL; qemu_sglist_init(&req->qsgl, DEVICE(s), 8, &address_space_memory); qemu_iovec_init(&req->resp_iov, 1); + memset((uint8_t *)req + zero_skip, 0, sizeof(*req) - zero_skip); return req; } @@ -84,7 +86,7 @@ static void virtio_scsi_free_req(VirtIOSCSIReq *req) { qemu_iovec_destroy(&req->resp_iov); qemu_sglist_destroy(&req->qsgl); - g_free(req); + g_slice_free(VirtIOSCSIReq, req); } static void virtio_scsi_complete_req(VirtIOSCSIReq *req) @@ -532,7 +534,8 @@ static void virtio_scsi_set_config(VirtIODevice *vdev, VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev); if ((uint32_t) virtio_ldl_p(vdev, &scsiconf->sense_size) >= 65536 || - (uint32_t) virtio_ldl_p(vdev, &scsiconf->cdb_size) >= 256) { + (uint32_t) virtio_ldl_p(vdev, &scsiconf->cdb_size) + >= VIRTIO_SCSI_CDB_SIZE_MAX) { error_report("bad data written to virtio-scsi configuration space"); exit(1); } diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 188a2d9..6e876f4 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -37,6 +37,7 @@ #define VIRTIO_SCSI_VQ_SIZE 128 #define VIRTIO_SCSI_CDB_SIZE 32 +#define VIRTIO_SCSI_CDB_SIZE_MAX 256 #define VIRTIO_SCSI_SENSE_SIZE 96 #define VIRTIO_SCSI_MAX_CHANNEL 0 #define VIRTIO_SCSI_MAX_TARGET 255 -- 1.9.3