Re: [Libguestfs] [PATCH libnbd] copy: Implement destination preferred block size
On Fri, Jan 28, 2022 at 10:39 PM Richard W.M. Jones wrote: > > [NB: I think this is a failed attempt, so shoudn't go upstream, and > doesn't need to be reviewed.] > > When nbdcopy writes to an NBD server it ignores the server's > minimum/preferred block size. This hasn't caused a problem til now, > but it turns out that in at least one case we care about (writing to > qemu-nbd backed by a compressed qcow2 file) we must obey the minimum & > preferred block size of 64K. > > For the longer story on that see this thread: > https://lists.nongnu.org/archive/html/qemu-devel/2022-01/threads.html#06108 > > This patch attempts to fix this. The uncontroversial part of this > patch adds a global "blocksize" variable which is the destination > preferred block size. > > The tricky part of this patch tries to ensure that writes to the > destination obey this block size constraint. > > Since nbdcopy is driven by the extent map read from the source, the > theory behind this implementation is that we read the extent map and > then try to "adjust" it so that extents which are not aligned to the > block size grow, shrink or are deleted. It proved to be very > difficult to get that right, but you can see the implementation in the > new function "adjust_extents_for_blocksize". > > Unfortunately not only is this difficult to implement, but the theory > is wrong. Read requests are actually split up into smaller > subrequests on write (look for "copy_subcommand" in > multi-thread-copying.c). So this doesn't really solve the problem. I think the theory is right - but this should be solved in 2 steps. The first step is to adapt the extent map the minimum block size. This generates valid read requests that are always aligned to the minimum block size. The read replies are sparsified using --sparse (4k default), but this value can be adjusted to the destination block size automatically - we can treat this as a hint, or document that the value will be aligned to the destination minimum block size. > So I think in my second version I will look at adjusting the NBD > destination driver (nbd-ops.c) directly so that it turns unaligned > writes into buffered read/modify/write operations (which can be > optimized quite a lot because we understand the write pattern and know > that the main program doesn't go backwards within blocks). Doing read-modify-write in nbdcopy sounds like a bad idea. This is best done closer to the storage, avoiding reading blocks from qemu-nbd to nbdcopy just to write them back. Nir ___ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs
Re: [Libguestfs] [PATCH libnbd] copy: Implement destination preferred block size
On Fri, Jan 28, 2022 at 10:37 PM Richard W.M. Jones wrote: > > For destinations, especially NBD, which have a preferred block size, > adjust our write strategy so we always write whole blocks / zeroes of > the preferred size. > --- > copy/file-ops.c | 8 > copy/main.c | 82 - > copy/multi-thread-copying.c | 2 + > copy/nbd-ops.c | 21 ++ > copy/nbdcopy.h | 5 +++ > copy/null-ops.c | 7 > copy/pipe-ops.c | 7 > copy/synch-copying.c| 2 + > 8 files changed, 132 insertions(+), 2 deletions(-) > > diff --git a/copy/file-ops.c b/copy/file-ops.c > index 847043417..3daf55484 100644 > --- a/copy/file-ops.c > +++ b/copy/file-ops.c > @@ -369,6 +369,13 @@ file_is_read_only (struct rw *rw) >return false; > } > > +static uint64_t > +file_get_preferred_block_size (struct rw *rw) > +{ > + /* XXX Could return the filesystem block size here. */ > + return 4096; > +} > + > static bool > file_can_extents (struct rw *rw) > { > @@ -726,6 +733,7 @@ static struct rw_ops file_ops = { >.ops_name = "file_ops", >.close = file_close, >.is_read_only = file_is_read_only, > + .get_preferred_block_size = file_get_preferred_block_size, >.can_extents = file_can_extents, >.can_multi_conn = file_can_multi_conn, >.start_multi_conn = file_start_multi_conn, > diff --git a/copy/main.c b/copy/main.c > index 67788b5d9..82537bafc 100644 > --- a/copy/main.c > +++ b/copy/main.c > @@ -38,12 +38,15 @@ > > #include > > +#include "isaligned.h" > #include "ispowerof2.h" > #include "human-size.h" > +#include "rounding.h" > #include "version.h" > #include "nbdcopy.h" > > bool allocated; /* --allocated flag */ > +uint64_t blocksize; /* destination block size */ > unsigned connections = 4; /* --connections */ > bool destination_is_zero; /* --destination-is-zero flag */ > bool extents = true;/* ! --no-extents flag */ > @@ -367,6 +370,29 @@ main (int argc, char *argv[]) >if (threads < connections) > connections = threads; > > + /* Check the destination preferred block size. */ > + blocksize = dst->ops->get_preferred_block_size (dst); > + if (!is_power_of_2 (blocksize)) { > +fprintf (stderr, "%s: error: " > + "destination block size is not a power of 2: %" PRIu64 "\n", > + prog, blocksize); > +exit (EXIT_FAILURE); > + } > + if (request_size < blocksize) { > +fprintf (stderr, "%s: error: " > + "request size (%u) < destination block size (%" PRIu64 "), " > + "use the --request-size option to pick a larger request size" > + "\n", > + prog, request_size, blocksize); > +exit (EXIT_FAILURE); > + } > + if (blocksize > THREAD_WORK_SIZE) { > +fprintf (stderr, "%s: error: " > + "destination block size is too large for nbdcopy: %" PRIu64 > "\n", > + prog, blocksize); > +exit (EXIT_FAILURE); > + } > + >/* Truncate the destination to the same size as the source. Only > * has an effect on regular files. > */ > @@ -392,9 +418,10 @@ main (int argc, char *argv[]) > print_rw (src, "nbdcopy: src", stderr); > print_rw (dst, "nbdcopy: dst", stderr); > fprintf (stderr, "nbdcopy: connections=%u requests=%u threads=%u " > - "synchronous=%s\n", > + "synchronous=%s blocksize=%" PRIu64 "\n", > connections, max_requests, threads, > - synchronous ? "true" : "false"); > + synchronous ? "true" : "false", > + blocksize); >} > >/* If multi-conn is enabled on either side, then at this point we > @@ -537,6 +564,57 @@ default_get_extents (struct rw *rw, uintptr_t index, >} > } > > +/* Reading extents from the source drives writes and zeroes to the > + * destination. We need to adjust the extents list so that any > + * extents smaller than the destination block size are eliminated. > + */ > +void > +adjust_extents_for_blocksize (extent_list *exts) > +{ > + size_t i, j; > + uint64_t end; > + > + if (blocksize <= 1) > +return; > + > + /* Note we iterate [0..len-2] because the very last extent is > + * permitted to be shorter than a whole block. > + */ > + for (i = 0; i < exts->len - 1; ++i) { > +assert (IS_ALIGNED (exts->ptr[i].offset, blocksize)); > +if (IS_ALIGNED (exts->ptr[i].length, blocksize)) > + continue; > + > +if (!exts->ptr[i].zero) /* data */ { > + /* Data extent: round the length up to the next block, delete > + * any completely overlapped extents, and if still needed move > + * up the start of the next extent. > + */ > + exts->ptr[i].length = ROUND_UP (exts->ptr[i].length, blocksize); > + end = exts->ptr[i].offset + exts->ptr[i].length; > + > + for (j = i+1; j < exts->len && exts->ptr[j].offset < end; ++j) { > +if (ext
[Libguestfs] [PATCH libnbd 0/9] golang: Safer, easier to use, and faster AioBuffer
Improve AioBuffer to make it safer, easier to use, and faster when integrating with other Go APIs. New Go specific APIs: - MakeAioBufferZero() - creates a new buffer using calloc(), to make it easy and efficient to use a zeroed buffer. - AioBuffer.Slice() - create a slice backed by the underlying buffer without copying the contents of the buffer. Performance improments: - FromBytes() is 3 time faster - Code using Bytes() should use Slice() now. aio_copy example shows up to 260% speedup. Improve testing: - New AioBuffer tests - New AioBuffer benchmarks Documention: - AioBuffer is fully documnted now. Nir Soffer (9): golang: tests: Add test for AioBuffer golang: aio_buffer.go: Make it safer to use golang: aio_buffer.go: Add missing documentation golang: aio_buffer.go: Add MakeAioBufferZero() golang: aio_buffer.go: Add Slice() golang: tests: Use AioBuffer.Slice() golang: aio_buffer.go: Speed up FromBytes() golang: aio_buffer.go: Benchmark copy flows golang: examples: aio_copy: Simplify using AioBuffer golang/Makefile.am | 1 + golang/aio_buffer.go | 39 - golang/examples/aio_copy/aio_copy.go | 29 +--- golang/libnbd_500_aio_pread_test.go | 2 +- golang/libnbd_510_aio_pwrite_test.go | 8 +- golang/libnbd_620_aio_buffer_test.go | 236 +++ 6 files changed, 281 insertions(+), 34 deletions(-) create mode 100644 golang/libnbd_620_aio_buffer_test.go -- 2.34.1 ___ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs
[Libguestfs] [PATCH libnbd 1/9] golang: tests: Add test for AioBuffer
Add unit tests and benchmarks for AioBuffer. The tests are trivial but they server as running documentation, and they point out important details about the type. The benchmarks how efficient is allocation a new buffer, zeroing it, and interfacing with Go code. This tests will also ensure that we don't break anything by the next changes. To run the benchmarks use: $ go test -run=xxx -bench=. goos: linux goarch: amd64 pkg: libguestfs.org/libnbd cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz BenchmarkMakeAioBuffer-126871759 157.2 ns/op BenchmarkAioBufferZero-12 17551 69552 ns/op BenchmarkFromBytes-12 9632139112 ns/op BenchmarkAioBufferBytes-12 69375 16410 ns/op PASS ok libguestfs.org/libnbd 5.843s Signed-off-by: Nir Soffer --- golang/Makefile.am | 1 + golang/libnbd_620_aio_buffer_test.go | 104 +++ 2 files changed, 105 insertions(+) create mode 100644 golang/libnbd_620_aio_buffer_test.go diff --git a/golang/Makefile.am b/golang/Makefile.am index 10fb8934..ae0486dd 100644 --- a/golang/Makefile.am +++ b/golang/Makefile.am @@ -37,20 +37,21 @@ source_files = \ libnbd_300_get_size_test.go \ libnbd_400_pread_test.go \ libnbd_405_pread_structured_test.go \ libnbd_410_pwrite_test.go \ libnbd_460_block_status_test.go \ libnbd_500_aio_pread_test.go \ libnbd_510_aio_pwrite_test.go \ libnbd_590_aio_copy_test.go \ libnbd_600_debug_callback_test.go \ libnbd_610_error_test.go \ + libnbd_620_aio_buffer_test.go \ $(NULL) generator_built = \ bindings.go \ closures.go \ wrappers.go \ wrappers.h \ $(NULL) EXTRA_DIST = \ diff --git a/golang/libnbd_620_aio_buffer_test.go b/golang/libnbd_620_aio_buffer_test.go new file mode 100644 index ..2632f87f --- /dev/null +++ b/golang/libnbd_620_aio_buffer_test.go @@ -0,0 +1,104 @@ +/* libnbd golang tests + * Copyright (C) 2013-2021 Red Hat Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +package libnbd + +import ( + "bytes" + "testing" +) + +func TestAioBuffer(t *testing.T) { + /* Create a buffer with unitinialized backing array. */ + buf := MakeAioBuffer(uint(32)) + defer buf.Free() + + /* Initialize backing array contents. */ + for i := uint(0); i < buf.Size; i++ { + *buf.Get(i) = 0 + } + + /* Create a slice by copying the backing arrary contents into Go memory. */ + b := buf.Bytes() + + zeroes := make([]byte, 32) + if !bytes.Equal(b, zeroes) { + t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes()) + } + + /* Modifying returned slice does not modify the buffer. */ + for i := 0; i < len(b); i++ { + b[i] = 42 + } + + /* Bytes() still returns zeroes. */ + if !bytes.Equal(buf.Bytes(), zeroes) { + t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes()) + } + + /* Create a nother buffer from Go slice. */ + buf2 := FromBytes(zeroes) + defer buf2.Free() + + if !bytes.Equal(buf2.Bytes(), zeroes) { + t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes()) + } +} + +// Typical buffer size. +const bufferSize uint = 256 * 1024 + +// Benchmark creating uninitilized buffer. +func BenchmarkMakeAioBuffer(b *testing.B) { + for i := 0; i < b.N; i++ { + buf := MakeAioBuffer(bufferSize) + buf.Free() + } +} + +// Benchmark zeroing a buffer. +func BenchmarkAioBufferZero(b *testing.B) { + for i := 0; i < b.N; i++ { + buf := MakeAioBuffer(bufferSize) + for i := uint(0); i < bufferSize; i++ { + *buf.Get(i) = 0 + } + buf.Free() + } +} + +// Benchmark creating a buffer by coping a Go slice. +func BenchmarkFromBytes(b *testing.B) { + for i := 0; i < b.N; i++ { + zeroes := make([]byte, bufferSize) + buf := FromBytes(zeroes) + buf.Free() + } +} + +// Benchmark creating a slice by copying the contents. +func BenchmarkAioBufferBytes(b *te
[Libguestfs] [PATCH libnbd 2/9] golang: aio_buffer.go: Make it safer to use
If a Go program tries to use AioBuffer after calling AioBuffer.Free(), the program may silently corrupt data, accessing memory that does not belong to the buffer any more, or segfault if the address is not mapped. In the worst case, it can corrupt memory silently. Calling Free() twice may silently free unrelated memory. Make the buffer safer to use by Freeing only on the first call and setting the pointer to nil. This makes multiple calls to Free() harmless, just like the underlying C.free(). Trying to access Bytes() and Get() after calling Free() will always panic now, revealing the bug in the program. Trying to use AioBuffer with libnbd API will likely segfault and panic. I did not try to test this. Signed-off-by: Nir Soffer --- golang/aio_buffer.go | 5 +++- golang/libnbd_620_aio_buffer_test.go | 41 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go index 2bc69a01..2b77d6ee 100644 --- a/golang/aio_buffer.go +++ b/golang/aio_buffer.go @@ -46,20 +46,23 @@ func MakeAioBuffer(size uint) AioBuffer { func FromBytes(buf []byte) AioBuffer { size := len(buf) ret := MakeAioBuffer(uint(size)) for i := 0; i < len(buf); i++ { *ret.Get(uint(i)) = buf[i] } return ret } func (b *AioBuffer) Free() { - C.free(b.P) + if b.P != nil { + C.free(b.P) + b.P = nil + } } func (b *AioBuffer) Bytes() []byte { return C.GoBytes(b.P, C.int(b.Size)) } func (b *AioBuffer) Get(i uint) *byte { return (*byte)(unsafe.Pointer(uintptr(b.P) + uintptr(i))) } diff --git a/golang/libnbd_620_aio_buffer_test.go b/golang/libnbd_620_aio_buffer_test.go index 2632f87f..89badfc6 100644 --- a/golang/libnbd_620_aio_buffer_test.go +++ b/golang/libnbd_620_aio_buffer_test.go @@ -53,20 +53,61 @@ func TestAioBuffer(t *testing.T) { /* Create a nother buffer from Go slice. */ buf2 := FromBytes(zeroes) defer buf2.Free() if !bytes.Equal(buf2.Bytes(), zeroes) { t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes()) } } +func TestAioBufferFree(t *testing.T) { + buf := MakeAioBuffer(uint(32)) + + /* Free the underlying C array. */ + buf.Free() + + /* And clear the pointer. */ + if buf.P != nil { + t.Fatal("Dangling pointer after Free()") + } + + /* Additional Free does nothing. */ + buf.Free() +} + +func TestAioBufferBytesAfterFree(t *testing.T) { + buf := MakeAioBuffer(uint(32)) + buf.Free() + + defer func() { + if r := recover(); r == nil { + t.Fatal("Did not recover from panic calling Bytes() after Free()") + } + }() + + buf.Bytes() +} + +func TestAioBufferGetAfterFree(t *testing.T) { + buf := MakeAioBuffer(uint(32)) + buf.Free() + + defer func() { + if r := recover(); r == nil { + t.Fatal("Did not recover from panic calling Get() after Free()") + } + }() + + *buf.Get(0) = 42 +} + // Typical buffer size. const bufferSize uint = 256 * 1024 // Benchmark creating uninitilized buffer. func BenchmarkMakeAioBuffer(b *testing.B) { for i := 0; i < b.N; i++ { buf := MakeAioBuffer(bufferSize) buf.Free() } } -- 2.34.1 ___ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs
[Libguestfs] [PATCH libnbd 3/9] golang: aio_buffer.go: Add missing documentation
Add standard function documentation comments. The documentation should be available here: https://pkg.go.dev/libguestfs.org/libnbd#AioBuffer Signed-off-by: Nir Soffer --- golang/aio_buffer.go | 12 1 file changed, 12 insertions(+) diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go index 2b77d6ee..8f33f500 100644 --- a/golang/aio_buffer.go +++ b/golang/aio_buffer.go @@ -32,37 +32,49 @@ package libnbd import "C" import "unsafe" /* Asynchronous I/O buffer. */ type AioBuffer struct { Punsafe.Pointer Size uint } +// MakeAioBuffer makes a new buffer backed by an unitilialized C allocated +// array. func MakeAioBuffer(size uint) AioBuffer { return AioBuffer{C.malloc(C.ulong(size)), size} } +// FromBytes makes a new buffer backed by a C allocated array, initialized by +// copying the given Go slice. func FromBytes(buf []byte) AioBuffer { size := len(buf) ret := MakeAioBuffer(uint(size)) for i := 0; i < len(buf); i++ { *ret.Get(uint(i)) = buf[i] } return ret } +// Free deallocates the underlying C allocated array. Using the buffer after +// Free() will panic. func (b *AioBuffer) Free() { if b.P != nil { C.free(b.P) b.P = nil } } +// Bytes copies the underlying C array to Go allocated memory and return a +// slice. Modifying the returned slice does not modify the unerlying buffer +// backking array. func (b *AioBuffer) Bytes() []byte { return C.GoBytes(b.P, C.int(b.Size)) } +// Get returns a pointer to a byte in the underlying C array. The pointer can +// be used to modify the underlying array. The pointer must not be used after +// caling Free(). func (b *AioBuffer) Get(i uint) *byte { return (*byte)(unsafe.Pointer(uintptr(b.P) + uintptr(i))) } -- 2.34.1 ___ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs
[Libguestfs] [PATCH libnbd 4/9] golang: aio_buffer.go: Add MakeAioBufferZero()
Make it easy to create a zeroed buffer via calloc(), preventing leaking sensitive info from the heap. Benchmarking show that creating a zeroed buffer is much slower compared with uninitialized buffer, but much faster compared with manually initializing the buffer with a loop. BenchmarkMakeAioBuffer-127252674 148.1 ns/op BenchmarkMakeAioBufferZero-12 262107 4181 ns/op BenchmarkAioBufferZero-12 17581 68759 ns/op It is interesting that creating a zeroed buffer is 3 times faster compared with making a new []byte slice: BenchmarkMakeAioBufferZero-12 247710 4440 ns/op BenchmarkMakeByteSlice-12 84117 13733 ns/op Signed-off-by: Nir Soffer --- golang/aio_buffer.go | 6 ++ golang/libnbd_620_aio_buffer_test.go | 16 2 files changed, 22 insertions(+) diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go index 8f33f500..dcb036a5 100644 --- a/golang/aio_buffer.go +++ b/golang/aio_buffer.go @@ -38,20 +38,26 @@ type AioBuffer struct { Punsafe.Pointer Size uint } // MakeAioBuffer makes a new buffer backed by an unitilialized C allocated // array. func MakeAioBuffer(size uint) AioBuffer { return AioBuffer{C.malloc(C.ulong(size)), size} } +// MakeAioBuffer makes a new buffer backed by a C allocated array. The +// underlying buffer is set to zero. +func MakeAioBufferZero(size uint) AioBuffer { + return AioBuffer{C.calloc(C.ulong(1), C.ulong(size)), size} +} + // FromBytes makes a new buffer backed by a C allocated array, initialized by // copying the given Go slice. func FromBytes(buf []byte) AioBuffer { size := len(buf) ret := MakeAioBuffer(uint(size)) for i := 0; i < len(buf); i++ { *ret.Get(uint(i)) = buf[i] } return ret } diff --git a/golang/libnbd_620_aio_buffer_test.go b/golang/libnbd_620_aio_buffer_test.go index 89badfc6..53d6233b 100644 --- a/golang/libnbd_620_aio_buffer_test.go +++ b/golang/libnbd_620_aio_buffer_test.go @@ -51,20 +51,28 @@ func TestAioBuffer(t *testing.T) { t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes()) } /* Create a nother buffer from Go slice. */ buf2 := FromBytes(zeroes) defer buf2.Free() if !bytes.Equal(buf2.Bytes(), zeroes) { t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes()) } + + /* Crated a zeroed buffer. */ + buf3 := MakeAioBufferZero(uint(32)) + defer buf.Free() + + if !bytes.Equal(buf3.Bytes(), zeroes) { + t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes()) + } } func TestAioBufferFree(t *testing.T) { buf := MakeAioBuffer(uint(32)) /* Free the underlying C array. */ buf.Free() /* And clear the pointer. */ if buf.P != nil { @@ -105,20 +113,28 @@ func TestAioBufferGetAfterFree(t *testing.T) { const bufferSize uint = 256 * 1024 // Benchmark creating uninitilized buffer. func BenchmarkMakeAioBuffer(b *testing.B) { for i := 0; i < b.N; i++ { buf := MakeAioBuffer(bufferSize) buf.Free() } } +// Benchmark creating zeroed buffer. +func BenchmarkMakeAioBufferZero(b *testing.B) { + for i := 0; i < b.N; i++ { + buf := MakeAioBufferZero(bufferSize) + buf.Free() + } +} + // Benchmark zeroing a buffer. func BenchmarkAioBufferZero(b *testing.B) { for i := 0; i < b.N; i++ { buf := MakeAioBuffer(bufferSize) for i := uint(0); i < bufferSize; i++ { *buf.Get(i) = 0 } buf.Free() } } -- 2.34.1 ___ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs
[Libguestfs] [PATCH libnbd 5/9] golang: aio_buffer.go: Add Slice()
AioBuffer.Bytes() cannot be used for coping images from NBD to other APis because it copies the entire image. Add a new Slice() function, creating a slice backed by the underling buffer. Using Slice() is efficient, but less safe, like Get(). The returned slice must be used only before calling Free(). This should not be an issue with typical code. Testing show that Slice() is much faster than Bytes() for typical 256k buffer: BenchmarkAioBufferBytes-12 86616 16529 ns/op BenchmarkAioBufferSlice-12 10 0.4630 ns/op I modified the aio_copy example to use AioBuffer and complied 2 versions, one using Bytes() and one using Slice(). When copying 6g fully allocated image, the version using Slice() is 1.6 times faster. $ hyperfine -r3 "./aio_copy-bytes $SRC >/dev/null" "./aio_copy-slice $SRC >/dev/null" Benchmark 1: ./aio_copy-bytes nbd+unix:///?socket=/tmp/src.sock >/dev/null Time (mean ± σ): 3.357 s ± 0.039 s[User: 2.656 s, System: 1.162 s] Range (min … max):3.313 s … 3.387 s3 runs Benchmark 2: ./aio_copy-slice nbd+unix:///?socket=/tmp/src.sock >/dev/null Time (mean ± σ): 2.046 s ± 0.009 s[User: 0.423 s, System: 0.892 s] Range (min … max):2.037 s … 2.055 s3 runs Summary './aio_copy-slice nbd+unix:///?socket=/tmp/src.sock >/dev/null' ran 1.64 ± 0.02 times faster than './aio_copy-bytes nbd+unix:///?socket=/tmp/src.sock >/dev/null' When copying a 6g empty image (qemu-nbd sends one hole chunk for every read), the version using Slice() is 2.6 times faster. $ hyperfine -r3 "./aio_copy-bytes $SRC >/dev/null" "./aio_copy-slice $SRC >/dev/null" Benchmark 1: ./aio_copy-bytes nbd+unix:///?socket=/tmp/src.sock >/dev/null Time (mean ± σ): 1.210 s ± 0.023 s[User: 1.428 s, System: 0.345 s] Range (min … max):1.191 s … 1.235 s3 runs Benchmark 2: ./aio_copy-slice nbd+unix:///?socket=/tmp/src.sock >/dev/null Time (mean ± σ): 461.4 ms ± 13.1 ms[User: 394.2 ms, System: 76.6 ms] Range (min … max): 450.6 ms … 476.0 ms3 runs Summary './aio_copy-slice nbd+unix:///?socket=/tmp/src.sock >/dev/null' ran 2.62 ± 0.09 times faster than './aio_copy-bytes nbd+unix:///?socket=/tmp/src.sock >/dev/null' Signed-off-by: Nir Soffer --- golang/aio_buffer.go | 9 ++ golang/libnbd_620_aio_buffer_test.go | 41 2 files changed, 50 insertions(+) diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go index dcb036a5..788fe1a8 100644 --- a/golang/aio_buffer.go +++ b/golang/aio_buffer.go @@ -71,16 +71,25 @@ func (b *AioBuffer) Free() { } } // Bytes copies the underlying C array to Go allocated memory and return a // slice. Modifying the returned slice does not modify the unerlying buffer // backking array. func (b *AioBuffer) Bytes() []byte { return C.GoBytes(b.P, C.int(b.Size)) } +// Slice creates a slice backed by the underlying C array. The slice can be +// used to access or modify the contents of the underlying array. The slice +// must not be used after caling Free(). +func (b *AioBuffer) Slice() []byte { + // See https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices + // TODO: Use unsafe.Slice() when we require Go 1.17. + return (*[1<<30]byte)(b.P)[:b.Size:b.Size] +} + // Get returns a pointer to a byte in the underlying C array. The pointer can // be used to modify the underlying array. The pointer must not be used after // caling Free(). func (b *AioBuffer) Get(i uint) *byte { return (*byte)(unsafe.Pointer(uintptr(b.P) + uintptr(i))) } diff --git a/golang/libnbd_620_aio_buffer_test.go b/golang/libnbd_620_aio_buffer_test.go index 53d6233b..38c6c74d 100644 --- a/golang/libnbd_620_aio_buffer_test.go +++ b/golang/libnbd_620_aio_buffer_test.go @@ -44,20 +44,35 @@ func TestAioBuffer(t *testing.T) { /* Modifying returned slice does not modify the buffer. */ for i := 0; i < len(b); i++ { b[i] = 42 } /* Bytes() still returns zeroes. */ if !bytes.Equal(buf.Bytes(), zeroes) { t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes()) } + /* Creating a slice without copying the underlhing buffer. */ + s := buf.Slice() + if !bytes.Equal(s, zeroes) { + t.Fatalf("Expected %v, got %v", zeroes, s) + } + + /* Modifing the slice modifies the underlying buffer. */ + for i := 0; i < len(s); i++ { + s[i] = 42 + } + + if !bytes.Equal(buf.Slice(), s) { + t.Fatalf("Expected %v, got %v", s, buf.Slice()) + } + /* Create a nother buffer from Go slice. */ buf2 := FromBytes(zeroes) defer buf2.Free() if !bytes.Equal(buf2.Bytes(), zeroes) { t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes()) } /* Crated a zeroed buffer. */ buf3 := MakeA
[Libguestfs] [PATCH libnbd 6/9] golang: tests: Use AioBuffer.Slice()
Slice() is easier to use and faster than Get() or Bytes(). Lets use the new way. Signed-off-by: Nir Soffer --- golang/libnbd_500_aio_pread_test.go | 2 +- golang/libnbd_510_aio_pwrite_test.go | 8 +--- golang/libnbd_620_aio_buffer_test.go | 8 +--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/golang/libnbd_500_aio_pread_test.go b/golang/libnbd_500_aio_pread_test.go index 0811378c..bd0208ef 100644 --- a/golang/libnbd_500_aio_pread_test.go +++ b/golang/libnbd_500_aio_pread_test.go @@ -55,14 +55,14 @@ func Test500AioPRead(t *testing.T) { } h.Poll(-1) } // Expected data. expected := make([]byte, 512) for i := 0; i < 512; i += 8 { binary.BigEndian.PutUint64(expected[i:i+8], uint64(i)) } - if !bytes.Equal(buf.Bytes(), expected) { + if !bytes.Equal(buf.Slice(), expected) { t.Fatalf("did not read expected data") } } diff --git a/golang/libnbd_510_aio_pwrite_test.go b/golang/libnbd_510_aio_pwrite_test.go index 56cdcb05..493159f2 100644 --- a/golang/libnbd_510_aio_pwrite_test.go +++ b/golang/libnbd_510_aio_pwrite_test.go @@ -32,23 +32,25 @@ func Test510AioPWrite(t *testing.T) { "nbdkit", "-s", "--exit-with-parent", "-v", "memory", "size=512", }) if err != nil { t.Fatalf("could not connect: %s", err) } /* Write a pattern and read it back. */ buf := MakeAioBuffer(512) defer buf.Free() + + s := buf.Slice() for i := 0; i < 512; i += 2 { - *buf.Get(uint(i)) = 0x55 - *buf.Get(uint(i + 1)) = 0xAA + s[i] = 0x55 + s[i+1] = 0xAA } var cookie uint64 cookie, err = h.AioPwrite(buf, 0, nil) if err != nil { t.Fatalf("%s", err) } for { var b bool b, err = h.AioCommandCompleted(cookie) @@ -62,14 +64,14 @@ func Test510AioPWrite(t *testing.T) { } /* We already tested aio_pread, let's just read the data back in the regular synchronous way. */ buf2 := make([]byte, 512) err = h.Pread(buf2, 0, nil) if err != nil { t.Fatalf("%s", err) } - if !bytes.Equal(buf.Bytes(), buf2) { + if !bytes.Equal(buf.Slice(), buf2) { t.Fatalf("did not read back same data as written") } } diff --git a/golang/libnbd_620_aio_buffer_test.go b/golang/libnbd_620_aio_buffer_test.go index 38c6c74d..8f65b558 100644 --- a/golang/libnbd_620_aio_buffer_test.go +++ b/golang/libnbd_620_aio_buffer_test.go @@ -22,22 +22,23 @@ import ( "bytes" "testing" ) func TestAioBuffer(t *testing.T) { /* Create a buffer with unitinialized backing array. */ buf := MakeAioBuffer(uint(32)) defer buf.Free() /* Initialize backing array contents. */ + s := buf.Slice() for i := uint(0); i < buf.Size; i++ { - *buf.Get(i) = 0 + s[i] = 0 } /* Create a slice by copying the backing arrary contents into Go memory. */ b := buf.Bytes() zeroes := make([]byte, 32) if !bytes.Equal(b, zeroes) { t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes()) } @@ -45,21 +46,21 @@ func TestAioBuffer(t *testing.T) { for i := 0; i < len(b); i++ { b[i] = 42 } /* Bytes() still returns zeroes. */ if !bytes.Equal(buf.Bytes(), zeroes) { t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes()) } /* Creating a slice without copying the underlhing buffer. */ - s := buf.Slice() + s = buf.Slice() if !bytes.Equal(s, zeroes) { t.Fatalf("Expected %v, got %v", zeroes, s) } /* Modifing the slice modifies the underlying buffer. */ for i := 0; i < len(s); i++ { s[i] = 42 } if !bytes.Equal(buf.Slice(), s) { @@ -154,22 +155,23 @@ func BenchmarkMakeAioBufferZero(b *testing.B) { for i := 0; i < b.N; i++ { buf := MakeAioBufferZero(bufferSize) buf.Free() } } // Benchmark zeroing a buffer. func BenchmarkAioBufferZero(b *testing.B) { for i := 0; i < b.N; i++ { buf := MakeAioBuffer(bufferSize) + s := buf.Slice() for i := uint(0); i < bufferSize; i++ { - *buf.Get(i) = 0 + s[i] = 0 } buf.Free() } } // Benchmark creating a buffer by coping a Go slice. func BenchmarkFromBytes(b *testing.B) { for i := 0; i < b.N; i++ { zeroes := make([]byte, bufferSize) buf := FromBytes(zeroes) -- 2.34.1 ___ Libguestfs maili
[Libguestfs] [PATCH libnbd 7/9] golang: aio_buffer.go: Speed up FromBytes()
Using Slice() we can use builtin copy() instead of a manual loop, which is 4.6 times faster with a typical 256k buffer: Before: BenchmarkFromBytes-12 9806111474 ns/op After: BenchmarkFromBytes-12 48193 24106 ns/op Signed-off-by: Nir Soffer --- golang/aio_buffer.go | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go index 788fe1a8..7960e554 100644 --- a/golang/aio_buffer.go +++ b/golang/aio_buffer.go @@ -47,25 +47,22 @@ func MakeAioBuffer(size uint) AioBuffer { // MakeAioBuffer makes a new buffer backed by a C allocated array. The // underlying buffer is set to zero. func MakeAioBufferZero(size uint) AioBuffer { return AioBuffer{C.calloc(C.ulong(1), C.ulong(size)), size} } // FromBytes makes a new buffer backed by a C allocated array, initialized by // copying the given Go slice. func FromBytes(buf []byte) AioBuffer { - size := len(buf) - ret := MakeAioBuffer(uint(size)) - for i := 0; i < len(buf); i++ { - *ret.Get(uint(i)) = buf[i] - } + ret := MakeAioBuffer(uint(len(buf))) + copy(ret.Slice(), buf) return ret } // Free deallocates the underlying C allocated array. Using the buffer after // Free() will panic. func (b *AioBuffer) Free() { if b.P != nil { C.free(b.P) b.P = nil } -- 2.34.1 ___ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs
[Libguestfs] [PATCH libnbd 8/9] golang: aio_buffer.go: Benchmark copy flows
Add benchmark for coping a buffer using 3 strategies - reusing same buffer, making a new uninitialized buffer per copy, and using a zeroed buffer per copy. This benchmark is the worst possible case, coping a buffer to memory. Any real I/O will be much slower hiding the overhead of allocating buffer or zeroed buffered. $ go test -run=AioBuffer -bench=Copy -benchtime=5s goos: linux goarch: amd64 pkg: libguestfs.org/libnbd cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz BenchmarkAioBufferCopyBaseline-121142508 4523 ns/op BenchmarkAioBufferCopyMake-12100 5320 ns/op BenchmarkAioBufferCopyMakeZero-12 728940 8218 ns/op Signed-off-by: Nir Soffer --- golang/libnbd_620_aio_buffer_test.go | 32 1 file changed, 32 insertions(+) diff --git a/golang/libnbd_620_aio_buffer_test.go b/golang/libnbd_620_aio_buffer_test.go index 8f65b558..9ab411b3 100644 --- a/golang/libnbd_620_aio_buffer_test.go +++ b/golang/libnbd_620_aio_buffer_test.go @@ -195,10 +195,42 @@ func BenchmarkAioBufferBytes(b *testing.B) { func BenchmarkAioBufferSlice(b *testing.B) { buf := MakeAioBuffer(bufferSize) defer buf.Free() var r int b.ResetTimer() for i := 0; i < b.N; i++ { r += len(buf.Slice()) } } + +var data = make([]byte, bufferSize) + +// Benchmark copying into same buffer, used as baseline for CopyMake and +// CopyMakeZero benchmarks. +func BenchmarkAioBufferCopyBaseline(b *testing.B) { + buf := MakeAioBufferZero(bufferSize) + defer buf.Free() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + copy(buf.Slice(), data) + } +} + +// Benchmark overhead of making a new buffer per read. +func BenchmarkAioBufferCopyMake(b *testing.B) { + for i := 0; i < b.N; i++ { + buf := MakeAioBuffer(bufferSize) + copy(buf.Slice(), data) + buf.Free() + } +} + +// Benchmark overhead of making a new zero buffer per read. +func BenchmarkAioBufferCopyMakeZero(b *testing.B) { + for i := 0; i < b.N; i++ { + buf := MakeAioBufferZero(bufferSize) + copy(buf.Slice(), data) + buf.Free() + } +} -- 2.34.1 ___ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs
[Libguestfs] [PATCH libnbd 9/9] golang: examples: aio_copy: Simplify using AioBuffer
Now that we have an efficient way to use AioBuffer, we don't need the hacks to create AioBuffer from Go slice. Benchmarking AioBuffer show that allocating a 256k buffer is practically free, so there is no need for the buffer pool. Now we allocate a new buffer per request, keep it in the command, and free it when the request is finished. Signed-off-by: Nir Soffer --- golang/examples/aio_copy/aio_copy.go | 29 +--- 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/golang/examples/aio_copy/aio_copy.go b/golang/examples/aio_copy/aio_copy.go index b6f5def1..bb20b478 100644 --- a/golang/examples/aio_copy/aio_copy.go +++ b/golang/examples/aio_copy/aio_copy.go @@ -37,53 +37,43 @@ // Example: // // ./aio_copy nbd+unix:///?socket=/tmp.nbd >/dev/null // package main import ( "container/list" "flag" "os" - "sync" "syscall" - "unsafe" "libguestfs.org/libnbd" ) var ( // These options give best performance with fast NVMe drive. requestSize = flag.Uint("request-size", 256*1024, "maximum request size in bytes") requests= flag.Uint("requests", 4, "maximum number of requests in flight") h *libnbd.Libnbd // Keeping commands in a queue ensures commands are written in the right // order, even if they complete out of order. This allows parallel reads // with non-seekable output. queue list.List - - // Buffer pool allocating buffers as needed and reusing them. - bufPool = sync.Pool{ - New: func() interface{} { - return make([]byte, *requestSize) - }, - } ) // command keeps state of single AioPread call while the read is handled by // libnbd, until the command reach the front of the queue and can be writen to // the output. type command struct { - buf[]byte - length uint + buflibnbd.AioBuffer ready bool } func main() { flag.Parse() var err error h, err = libnbd.Create() if err != nil { @@ -139,60 +129,51 @@ func waitForCompletion() { panic(err) } if inflightRequests() < start { break // A read completed. } } } func startRead(offset uint64, length uint) { - buf := bufPool.Get().([]byte) - - // Keep buffer in command so we can put it back into the pool when the - // command completes. - cmd := &command{buf: buf, length: length} - - // Create aio buffer from pool buffer to avoid unneeded allocation for - // every read, and unneeded copy when completing the read. - abuf := libnbd.AioBuffer{P: unsafe.Pointer(&buf[0]), Size: length} + cmd := &command{buf: libnbd.MakeAioBuffer(length)} args := libnbd.AioPreadOptargs{ CompletionCallbackSet: true, CompletionCallback: func(error *int) int { if *error != 0 { // This is not documented, but *error is errno value translated // from the the NBD server error. err := syscall.Errno(*error).Error() panic(err) } cmd.ready = true return 1 }, } - _, err := h.AioPread(abuf, offset, &args) + _, err := h.AioPread(cmd.buf, offset, &args) if err != nil { panic(err) } queue.PushBack(cmd) } func readReady() bool { return queue.Len() > 0 && queue.Front().Value.(*command).ready } func finishRead() { e := queue.Front() queue.Remove(e) cmd := e.Value.(*command) - b := cmd.buf[:cmd.length] - _, err := os.Stdout.Write(b) + _, err := os.Stdout.Write(cmd.buf.Slice()) if err != nil { panic(err) } - bufPool.Put(cmd.buf) + cmd.buf.Free() } -- 2.34.1 ___ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs