This avoids useless masking and shifting when a single call to the MemoryRegion ops will do. It cuts 30 cycles off the common case of memory dispatch (out of ~150).
Paolo Bonzini (4): memory: cache min/max_access_size memory: streamline common case for memory dispatch memory: hoist coalesced MMIO flush memory: small tweaks include/exec/memory.h | 2 + memory.c | 114 +++++++++++++++++++++++++++++++++++--------------- 2 files changed, 82 insertions(+), 34 deletions(-) -- 1.8.4.2 >From 8324dd46284285cec1de394ce2fc3fb449e776d6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonz...@redhat.com> Date: Fri, 8 Nov 2013 11:29:41 +0100 Subject: [PATCH 1/4] memory: cache min/max_access_size This will simplify the code in the next patch. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- include/exec/memory.h | 2 ++ memory.c | 27 +++++++++++---------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 480dfbf..cf63adb 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -163,6 +163,8 @@ struct MemoryRegion { unsigned ioeventfd_nb; MemoryRegionIoeventfd *ioeventfds; NotifierList iommu_notify; + + uint8_t min_access_size, max_access_size; }; typedef struct MemoryListener MemoryListener; diff --git a/memory.c b/memory.c index 28f6449..56e54aa 100644 --- a/memory.c +++ b/memory.c @@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr, static void access_with_adjusted_size(hwaddr addr, uint64_t *value, unsigned size, - unsigned access_size_min, - unsigned access_size_max, void (*access)(MemoryRegion *mr, hwaddr addr, uint64_t *value, @@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr, unsigned access_size; unsigned i; - if (!access_size_min) { - access_size_min = 1; - } - if (!access_size_max) { - access_size_max = 4; - } - /* FIXME: support unaligned access? */ - access_size = MAX(MIN(size, access_size_max), access_size_min); + access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size); access_mask = -1ULL >> (64 - access_size * 8); if (memory_region_big_endian(mr)) { for (i = 0; i < size; i += access_size) { @@ -942,11 +933,9 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr, if (mr->ops->read) { access_with_adjusted_size(addr, &data, size, - mr->ops->impl.min_access_size, - mr->ops->impl.max_access_size, memory_region_read_accessor, mr); } else { - access_with_adjusted_size(addr, &data, size, 1, 4, + access_with_adjusted_size(addr, &data, size, memory_region_oldmmio_read_accessor, mr); } @@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion *mr, if (mr->ops->write) { access_with_adjusted_size(addr, &data, size, - mr->ops->impl.min_access_size, - mr->ops->impl.max_access_size, memory_region_write_accessor, mr); } else { - access_with_adjusted_size(addr, &data, size, 1, 4, + access_with_adjusted_size(addr, &data, size, memory_region_oldmmio_write_accessor, mr); } return false; @@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr, mr->opaque = opaque; mr->terminates = true; mr->ram_addr = ~(ram_addr_t)0; + + if (mr->ops->read) { + mr->min_access_size = mr->ops->impl.min_access_size ? : 1; + mr->max_access_size = mr->ops->impl.max_access_size ? : 4; + } else { + mr->min_access_size = 1; + mr->max_access_size = 4; + } } void memory_region_init_ram(MemoryRegion *mr, -- 1.8.4.2 >From 2afa3d3451dbfc76b3c6a28af1e7cdd3ab50c7ec Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonz...@redhat.com> Date: Fri, 8 Nov 2013 11:39:18 +0100 Subject: [PATCH 2/4] memory: streamline common case for memory dispatch In the common case where there is no combining or splitting, access_with_adjusted_size is adding a lot of overhead. Call the MMIO ops directly in that case. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/memory.c b/memory.c index 56e54aa..1ade19c 100644 --- a/memory.c +++ b/memory.c @@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr, static void access_with_adjusted_size(hwaddr addr, uint64_t *value, unsigned size, + unsigned access_size, void (*access)(MemoryRegion *mr, hwaddr addr, uint64_t *value, @@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr, MemoryRegion *mr) { uint64_t access_mask; - unsigned access_size; unsigned i; /* FIXME: support unaligned access? */ - access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size); access_mask = -1ULL >> (64 - access_size * 8); if (memory_region_big_endian(mr)) { for (i = 0; i < size; i += access_size) { @@ -929,13 +928,32 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr, hwaddr addr, unsigned size) { + unsigned access_size; uint64_t data = 0; + if (size < mr->min_access_size) { + access_size = mr->min_access_size; + goto adjusted; + } + if (size > mr->max_access_size) { + access_size = mr->max_access_size; + goto adjusted; + } + + if (mr->ops->read) { + data = mr->ops->read(mr->opaque, addr, size); + } else { + data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr); + } + trace_memory_region_ops_read(mr, addr, data, size); + return data; + +adjusted: if (mr->ops->read) { - access_with_adjusted_size(addr, &data, size, + access_with_adjusted_size(addr, &data, size, access_size, memory_region_read_accessor, mr); } else { - access_with_adjusted_size(addr, &data, size, + access_with_adjusted_size(addr, &data, size, access_size, memory_region_oldmmio_read_accessor, mr); } @@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr, return false; } +static void memory_region_dispatch_write1(MemoryRegion *mr, + hwaddr addr, + uint64_t data, + unsigned size) +{ + unsigned access_size; + if (size < mr->min_access_size) { + access_size = mr->min_access_size; + goto adjusted; + } + if (size > mr->max_access_size) { + access_size = mr->max_access_size; + goto adjusted; + } + + trace_memory_region_ops_write(mr, addr, data, size); + if (mr->ops->write) { + mr->ops->write(mr->opaque, addr, data, size); + } else { + mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data); + } + return; + +adjusted: + if (mr->ops->write) { + access_with_adjusted_size(addr, &data, size, access_size, + memory_region_write_accessor, mr); + } else { + access_with_adjusted_size(addr, &data, size, access_size, + memory_region_oldmmio_write_accessor, mr); + } +} + static bool memory_region_dispatch_write(MemoryRegion *mr, hwaddr addr, uint64_t data, @@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion *mr, } adjust_endianness(mr, &data, size); - - if (mr->ops->write) { - access_with_adjusted_size(addr, &data, size, - memory_region_write_accessor, mr); - } else { - access_with_adjusted_size(addr, &data, size, - memory_region_oldmmio_write_accessor, mr); - } + memory_region_dispatch_write1(mr, addr, data, size); return false; } -- 1.8.4.2 >From c65bf3cd93264eac93bbfe3a5974b8d15a1599b0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonz...@redhat.com> Date: Fri, 8 Nov 2013 11:49:52 +0100 Subject: [PATCH 3/4] memory: hoist coalesced MMIO flush No need to flush the coalesced MMIO buffer multiple times when combining multiple accesses into one. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- memory.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/memory.c b/memory.c index 1ade19c..495e693 100644 --- a/memory.c +++ b/memory.c @@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr, { uint64_t tmp; - if (mr->flush_coalesced_mmio) { - qemu_flush_coalesced_mmio_buffer(); - } tmp = mr->ops->read(mr->opaque, addr, size); trace_memory_region_ops_read(mr, addr, tmp, size); *value |= (tmp & mask) << shift; @@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr, { uint64_t tmp; - if (mr->flush_coalesced_mmio) { - qemu_flush_coalesced_mmio_buffer(); - } tmp = (*value >> shift) & mask; trace_memory_region_ops_write(mr, addr, tmp, size); mr->ops->write(mr->opaque, addr, tmp, size); @@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr, uint64_t *pval, unsigned size) { + if (mr->flush_coalesced_mmio) { + qemu_flush_coalesced_mmio_buffer(); + } + if (!memory_region_access_valid(mr, addr, size, false)) { *pval = unassigned_mem_read(mr, addr, size); return true; @@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion *mr, uint64_t data, unsigned size) { + if (mr->flush_coalesced_mmio) { + qemu_flush_coalesced_mmio_buffer(); + } + if (!memory_region_access_valid(mr, addr, size, true)) { unassigned_mem_write(mr, addr, data, size); return true; -- 1.8.4.2 >From d13d7f894cb4670568a0bd45d592a9321d9d5dab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonz...@redhat.com> Date: Fri, 8 Nov 2013 11:48:11 +0100 Subject: [PATCH 4/4] memory: small tweaks Make adjust_endianness inline, and do not use a ctz instruction when a shift will do. Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- memory.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/memory.c b/memory.c index 495e693..d3b0dce 100644 --- a/memory.c +++ b/memory.c @@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr) #endif } -static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size) +static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size) { if (memory_region_wrong_endianness(mr)) { switch (size) { @@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size) } } +static inline int ctz3(unsigned size) +{ + return size >> 1; +} + static void memory_region_oldmmio_read_accessor(MemoryRegion *mr, hwaddr addr, uint64_t *value, @@ -387,7 +392,7 @@ static void memory_region_oldmmio_read_accessor(MemoryRegion *mr, { uint64_t tmp; - tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr); + tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr); trace_memory_region_ops_read(mr, addr, tmp, size); *value |= (tmp & mask) << shift; } @@ -417,7 +422,7 @@ static void memory_region_oldmmio_write_accessor(MemoryRegion *mr, tmp = (*value >> shift) & mask; trace_memory_region_ops_write(mr, addr, tmp, size); - mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp); + mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp); } static void memory_region_write_accessor(MemoryRegion *mr, @@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion *mr, if (mr->ops->read) { data = mr->ops->read(mr->opaque, addr, size); } else { - data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr); + data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr); } trace_memory_region_ops_read(mr, addr, data, size); return data; @@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion *mr, if (mr->ops->write) { mr->ops->write(mr->opaque, addr, data, size); } else { - mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data); + mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data); } return; -- 1.8.4.2