Re: [Libguestfs] [PATCH libnbd] copy: Implement destination preferred block size

2022-01-29 Thread Nir Soffer
On Fri, Jan 28, 2022 at 10:39 PM Richard W.M. Jones  wrote:
>
> [NB: I think this is a failed attempt, so shoudn't go upstream, and
> doesn't need to be reviewed.]
>
> When nbdcopy writes to an NBD server it ignores the server's
> minimum/preferred block size.  This hasn't caused a problem til now,
> but it turns out that in at least one case we care about (writing to
> qemu-nbd backed by a compressed qcow2 file) we must obey the minimum &
> preferred block size of 64K.
>
> For the longer story on that see this thread:
> https://lists.nongnu.org/archive/html/qemu-devel/2022-01/threads.html#06108
>
> This patch attempts to fix this.  The uncontroversial part of this
> patch adds a global "blocksize" variable which is the destination
> preferred block size.
>
> The tricky part of this patch tries to ensure that writes to the
> destination obey this block size constraint.
>
> Since nbdcopy is driven by the extent map read from the source, the
> theory behind this implementation is that we read the extent map and
> then try to "adjust" it so that extents which are not aligned to the
> block size grow, shrink or are deleted.  It proved to be very
> difficult to get that right, but you can see the implementation in the
> new function "adjust_extents_for_blocksize".
>
> Unfortunately not only is this difficult to implement, but the theory
> is wrong.  Read requests are actually split up into smaller
> subrequests on write (look for "copy_subcommand" in
> multi-thread-copying.c).  So this doesn't really solve the problem.

I think the theory is right - but this should be solved in 2 steps. The first
step is to adapt the extent map the minimum block size. This generates valid
read requests that are always aligned to the minimum block size.

The read replies are sparsified using --sparse (4k default), but this value
can be adjusted to the destination block size automatically - we can treat
this as a hint, or document that the value will be aligned to the destination
minimum block size.

> So I think in my second version I will look at adjusting the NBD
> destination driver (nbd-ops.c) directly so that it turns unaligned
> writes into buffered read/modify/write operations (which can be
> optimized quite a lot because we understand the write pattern and know
> that the main program doesn't go backwards within blocks).

Doing read-modify-write in nbdcopy sounds like a bad idea. This is best done
closer to the storage, avoiding reading blocks from qemu-nbd to nbdcopy just
to write them back.

Nir

___
Libguestfs mailing list
Libguestfs@redhat.com
https://listman.redhat.com/mailman/listinfo/libguestfs



Re: [Libguestfs] [PATCH libnbd] copy: Implement destination preferred block size

2022-01-29 Thread Nir Soffer
On Fri, Jan 28, 2022 at 10:37 PM Richard W.M. Jones  wrote:
>
> For destinations, especially NBD, which have a preferred block size,
> adjust our write strategy so we always write whole blocks / zeroes of
> the preferred size.
> ---
>  copy/file-ops.c |  8 
>  copy/main.c | 82 -
>  copy/multi-thread-copying.c |  2 +
>  copy/nbd-ops.c  | 21 ++
>  copy/nbdcopy.h  |  5 +++
>  copy/null-ops.c |  7 
>  copy/pipe-ops.c |  7 
>  copy/synch-copying.c|  2 +
>  8 files changed, 132 insertions(+), 2 deletions(-)
>
> diff --git a/copy/file-ops.c b/copy/file-ops.c
> index 847043417..3daf55484 100644
> --- a/copy/file-ops.c
> +++ b/copy/file-ops.c
> @@ -369,6 +369,13 @@ file_is_read_only (struct rw *rw)
>return false;
>  }
>
> +static uint64_t
> +file_get_preferred_block_size (struct rw *rw)
> +{
> +  /* XXX Could return the filesystem block size here. */
> +  return 4096;
> +}
> +
>  static bool
>  file_can_extents (struct rw *rw)
>  {
> @@ -726,6 +733,7 @@ static struct rw_ops file_ops = {
>.ops_name = "file_ops",
>.close = file_close,
>.is_read_only = file_is_read_only,
> +  .get_preferred_block_size = file_get_preferred_block_size,
>.can_extents = file_can_extents,
>.can_multi_conn = file_can_multi_conn,
>.start_multi_conn = file_start_multi_conn,
> diff --git a/copy/main.c b/copy/main.c
> index 67788b5d9..82537bafc 100644
> --- a/copy/main.c
> +++ b/copy/main.c
> @@ -38,12 +38,15 @@
>
>  #include 
>
> +#include "isaligned.h"
>  #include "ispowerof2.h"
>  #include "human-size.h"
> +#include "rounding.h"
>  #include "version.h"
>  #include "nbdcopy.h"
>
>  bool allocated; /* --allocated flag */
> +uint64_t blocksize; /* destination block size */
>  unsigned connections = 4;   /* --connections */
>  bool destination_is_zero;   /* --destination-is-zero flag */
>  bool extents = true;/* ! --no-extents flag */
> @@ -367,6 +370,29 @@ main (int argc, char *argv[])
>if (threads < connections)
>  connections = threads;
>
> +  /* Check the destination preferred block size. */
> +  blocksize = dst->ops->get_preferred_block_size (dst);
> +  if (!is_power_of_2 (blocksize)) {
> +fprintf (stderr, "%s: error: "
> + "destination block size is not a power of 2: %" PRIu64 "\n",
> + prog, blocksize);
> +exit (EXIT_FAILURE);
> +  }
> +  if (request_size < blocksize) {
> +fprintf (stderr, "%s: error: "
> + "request size (%u) < destination block size (%" PRIu64 "), "
> + "use the --request-size option to pick a larger request size"
> + "\n",
> + prog, request_size, blocksize);
> +exit (EXIT_FAILURE);
> +  }
> +  if (blocksize > THREAD_WORK_SIZE) {
> +fprintf (stderr, "%s: error: "
> + "destination block size is too large for nbdcopy: %" PRIu64 
> "\n",
> + prog, blocksize);
> +exit (EXIT_FAILURE);
> +  }
> +
>/* Truncate the destination to the same size as the source.  Only
> * has an effect on regular files.
> */
> @@ -392,9 +418,10 @@ main (int argc, char *argv[])
>  print_rw (src, "nbdcopy: src", stderr);
>  print_rw (dst, "nbdcopy: dst", stderr);
>  fprintf (stderr, "nbdcopy: connections=%u requests=%u threads=%u "
> - "synchronous=%s\n",
> + "synchronous=%s blocksize=%" PRIu64 "\n",
>   connections, max_requests, threads,
> - synchronous ? "true" : "false");
> + synchronous ? "true" : "false",
> + blocksize);
>}
>
>/* If multi-conn is enabled on either side, then at this point we
> @@ -537,6 +564,57 @@ default_get_extents (struct rw *rw, uintptr_t index,
>}
>  }
>
> +/* Reading extents from the source drives writes and zeroes to the
> + * destination.  We need to adjust the extents list so that any
> + * extents smaller than the destination block size are eliminated.
> + */
> +void
> +adjust_extents_for_blocksize (extent_list *exts)
> +{
> +  size_t i, j;
> +  uint64_t end;
> +
> +  if (blocksize <= 1)
> +return;
> +
> +  /* Note we iterate [0..len-2] because the very last extent is
> +   * permitted to be shorter than a whole block.
> +   */
> +  for (i = 0; i < exts->len - 1; ++i) {
> +assert (IS_ALIGNED (exts->ptr[i].offset, blocksize));
> +if (IS_ALIGNED (exts->ptr[i].length, blocksize))
> +  continue;
> +
> +if (!exts->ptr[i].zero) /* data */ {
> +  /* Data extent: round the length up to the next block, delete
> +   * any completely overlapped extents, and if still needed move
> +   * up the start of the next extent.
> +   */
> +  exts->ptr[i].length = ROUND_UP (exts->ptr[i].length, blocksize);
> +  end = exts->ptr[i].offset + exts->ptr[i].length;
> +
> +  for (j = i+1; j < exts->len && exts->ptr[j].offset < end; ++j) {
> +if (ext

[Libguestfs] [PATCH libnbd 0/9] golang: Safer, easier to use, and faster AioBuffer

2022-01-29 Thread Nir Soffer
Improve AioBuffer to make it safer, easier to use, and faster when integrating
with other Go APIs.

New Go specific APIs:

- MakeAioBufferZero() - creates a new buffer using calloc(), to make it easy
  and efficient to use a zeroed buffer.

- AioBuffer.Slice() - create a slice backed by the underlying buffer without
  copying the contents of the buffer.

Performance improments:

- FromBytes() is 3 time faster
- Code using Bytes() should use Slice() now. aio_copy example shows up to 260%
  speedup.

Improve testing:

- New AioBuffer tests
- New AioBuffer benchmarks

Documention:

- AioBuffer is fully documnted now.

Nir Soffer (9):
  golang: tests: Add test for AioBuffer
  golang: aio_buffer.go: Make it safer to use
  golang: aio_buffer.go: Add missing documentation
  golang: aio_buffer.go: Add MakeAioBufferZero()
  golang: aio_buffer.go: Add Slice()
  golang: tests: Use AioBuffer.Slice()
  golang: aio_buffer.go: Speed up FromBytes()
  golang: aio_buffer.go: Benchmark copy flows
  golang: examples: aio_copy: Simplify using AioBuffer

 golang/Makefile.am   |   1 +
 golang/aio_buffer.go |  39 -
 golang/examples/aio_copy/aio_copy.go |  29 +---
 golang/libnbd_500_aio_pread_test.go  |   2 +-
 golang/libnbd_510_aio_pwrite_test.go |   8 +-
 golang/libnbd_620_aio_buffer_test.go | 236 +++
 6 files changed, 281 insertions(+), 34 deletions(-)
 create mode 100644 golang/libnbd_620_aio_buffer_test.go

-- 
2.34.1


___
Libguestfs mailing list
Libguestfs@redhat.com
https://listman.redhat.com/mailman/listinfo/libguestfs



[Libguestfs] [PATCH libnbd 1/9] golang: tests: Add test for AioBuffer

2022-01-29 Thread Nir Soffer
Add unit tests and benchmarks for AioBuffer. The tests are trivial but
they server as running documentation, and they point out important
details about the type.

The benchmarks how efficient is allocation a new buffer, zeroing it, and
interfacing with Go code.

This tests will also ensure that we don't break anything by the next
changes.

To run the benchmarks use:

$ go test -run=xxx -bench=.
goos: linux
goarch: amd64
pkg: libguestfs.org/libnbd
cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz
BenchmarkMakeAioBuffer-126871759   157.2 ns/op
BenchmarkAioBufferZero-12  17551 69552 ns/op
BenchmarkFromBytes-12   9632139112 ns/op
BenchmarkAioBufferBytes-12 69375 16410 ns/op
PASS
ok  libguestfs.org/libnbd   5.843s

Signed-off-by: Nir Soffer 
---
 golang/Makefile.am   |   1 +
 golang/libnbd_620_aio_buffer_test.go | 104 +++
 2 files changed, 105 insertions(+)
 create mode 100644 golang/libnbd_620_aio_buffer_test.go

diff --git a/golang/Makefile.am b/golang/Makefile.am
index 10fb8934..ae0486dd 100644
--- a/golang/Makefile.am
+++ b/golang/Makefile.am
@@ -37,20 +37,21 @@ source_files = \
libnbd_300_get_size_test.go \
libnbd_400_pread_test.go \
libnbd_405_pread_structured_test.go \
libnbd_410_pwrite_test.go \
libnbd_460_block_status_test.go \
libnbd_500_aio_pread_test.go \
libnbd_510_aio_pwrite_test.go \
libnbd_590_aio_copy_test.go \
libnbd_600_debug_callback_test.go \
libnbd_610_error_test.go \
+   libnbd_620_aio_buffer_test.go \
$(NULL)
 
 generator_built = \
bindings.go \
closures.go \
wrappers.go \
wrappers.h \
$(NULL)
 
 EXTRA_DIST = \
diff --git a/golang/libnbd_620_aio_buffer_test.go 
b/golang/libnbd_620_aio_buffer_test.go
new file mode 100644
index ..2632f87f
--- /dev/null
+++ b/golang/libnbd_620_aio_buffer_test.go
@@ -0,0 +1,104 @@
+/* libnbd golang tests
+ * Copyright (C) 2013-2021 Red Hat Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+package libnbd
+
+import (
+   "bytes"
+   "testing"
+)
+
+func TestAioBuffer(t *testing.T) {
+   /* Create a buffer with unitinialized backing array. */
+   buf := MakeAioBuffer(uint(32))
+   defer buf.Free()
+
+   /* Initialize backing array contents. */
+   for i := uint(0); i < buf.Size; i++ {
+   *buf.Get(i) = 0
+   }
+
+   /* Create a slice by copying the backing arrary contents into Go 
memory. */
+   b := buf.Bytes()
+
+   zeroes := make([]byte, 32)
+   if !bytes.Equal(b, zeroes) {
+   t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes())
+   }
+
+   /* Modifying returned slice does not modify the buffer. */
+   for i := 0; i < len(b); i++ {
+   b[i] = 42
+   }
+
+   /* Bytes() still returns zeroes. */
+   if !bytes.Equal(buf.Bytes(), zeroes) {
+   t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes())
+   }
+
+   /* Create a nother buffer from Go slice. */
+   buf2 := FromBytes(zeroes)
+   defer buf2.Free()
+
+   if !bytes.Equal(buf2.Bytes(), zeroes) {
+   t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes())
+   }
+}
+
+// Typical buffer size.
+const bufferSize uint = 256 * 1024
+
+// Benchmark creating uninitilized buffer.
+func BenchmarkMakeAioBuffer(b *testing.B) {
+   for i := 0; i < b.N; i++ {
+   buf := MakeAioBuffer(bufferSize)
+   buf.Free()
+   }
+}
+
+// Benchmark zeroing a buffer.
+func BenchmarkAioBufferZero(b *testing.B) {
+   for i := 0; i < b.N; i++ {
+   buf := MakeAioBuffer(bufferSize)
+   for i := uint(0); i < bufferSize; i++ {
+   *buf.Get(i) = 0
+   }
+   buf.Free()
+   }
+}
+
+// Benchmark creating a buffer by coping a Go slice.
+func BenchmarkFromBytes(b *testing.B) {
+   for i := 0; i < b.N; i++ {
+   zeroes := make([]byte, bufferSize)
+   buf := FromBytes(zeroes)
+   buf.Free()
+   }
+}
+
+// Benchmark creating a slice by copying the contents.
+func BenchmarkAioBufferBytes(b *te

[Libguestfs] [PATCH libnbd 2/9] golang: aio_buffer.go: Make it safer to use

2022-01-29 Thread Nir Soffer
If a Go program tries to use AioBuffer after calling AioBuffer.Free(),
the program may silently corrupt data, accessing memory that does not
belong to the buffer any more, or segfault if the address is not mapped.
In the worst case, it can corrupt memory silently. Calling Free() twice
may silently free unrelated memory.

Make the buffer safer to use by Freeing only on the first call and
setting the pointer to nil. This makes multiple calls to Free()
harmless, just like the underlying C.free().

Trying to access Bytes() and Get() after calling Free() will always
panic now, revealing the bug in the program.

Trying to use AioBuffer with libnbd API will likely segfault and panic.
I did not try to test this.

Signed-off-by: Nir Soffer 
---
 golang/aio_buffer.go |  5 +++-
 golang/libnbd_620_aio_buffer_test.go | 41 
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go
index 2bc69a01..2b77d6ee 100644
--- a/golang/aio_buffer.go
+++ b/golang/aio_buffer.go
@@ -46,20 +46,23 @@ func MakeAioBuffer(size uint) AioBuffer {
 func FromBytes(buf []byte) AioBuffer {
size := len(buf)
ret := MakeAioBuffer(uint(size))
for i := 0; i < len(buf); i++ {
*ret.Get(uint(i)) = buf[i]
}
return ret
 }
 
 func (b *AioBuffer) Free() {
-   C.free(b.P)
+   if b.P != nil {
+   C.free(b.P)
+   b.P = nil
+   }
 }
 
 func (b *AioBuffer) Bytes() []byte {
return C.GoBytes(b.P, C.int(b.Size))
 }
 
 func (b *AioBuffer) Get(i uint) *byte {
return (*byte)(unsafe.Pointer(uintptr(b.P) + uintptr(i)))
 }
diff --git a/golang/libnbd_620_aio_buffer_test.go 
b/golang/libnbd_620_aio_buffer_test.go
index 2632f87f..89badfc6 100644
--- a/golang/libnbd_620_aio_buffer_test.go
+++ b/golang/libnbd_620_aio_buffer_test.go
@@ -53,20 +53,61 @@ func TestAioBuffer(t *testing.T) {
 
/* Create a nother buffer from Go slice. */
buf2 := FromBytes(zeroes)
defer buf2.Free()
 
if !bytes.Equal(buf2.Bytes(), zeroes) {
t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes())
}
 }
 
+func TestAioBufferFree(t *testing.T) {
+   buf := MakeAioBuffer(uint(32))
+
+   /* Free the underlying C array. */
+   buf.Free()
+
+   /* And clear the pointer. */
+   if buf.P != nil {
+   t.Fatal("Dangling pointer after Free()")
+   }
+
+   /* Additional Free does nothing. */
+   buf.Free()
+}
+
+func TestAioBufferBytesAfterFree(t *testing.T) {
+   buf := MakeAioBuffer(uint(32))
+   buf.Free()
+
+   defer func() {
+   if r := recover(); r == nil {
+   t.Fatal("Did not recover from panic calling Bytes() 
after Free()")
+   }
+   }()
+
+   buf.Bytes()
+}
+
+func TestAioBufferGetAfterFree(t *testing.T) {
+   buf := MakeAioBuffer(uint(32))
+   buf.Free()
+
+   defer func() {
+   if r := recover(); r == nil {
+   t.Fatal("Did not recover from panic calling Get() after 
Free()")
+   }
+   }()
+
+   *buf.Get(0) = 42
+}
+
 // Typical buffer size.
 const bufferSize uint = 256 * 1024
 
 // Benchmark creating uninitilized buffer.
 func BenchmarkMakeAioBuffer(b *testing.B) {
for i := 0; i < b.N; i++ {
buf := MakeAioBuffer(bufferSize)
buf.Free()
}
 }
-- 
2.34.1

___
Libguestfs mailing list
Libguestfs@redhat.com
https://listman.redhat.com/mailman/listinfo/libguestfs



[Libguestfs] [PATCH libnbd 3/9] golang: aio_buffer.go: Add missing documentation

2022-01-29 Thread Nir Soffer
Add standard function documentation comments.

The documentation should be available here:
https://pkg.go.dev/libguestfs.org/libnbd#AioBuffer

Signed-off-by: Nir Soffer 
---
 golang/aio_buffer.go | 12 
 1 file changed, 12 insertions(+)

diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go
index 2b77d6ee..8f33f500 100644
--- a/golang/aio_buffer.go
+++ b/golang/aio_buffer.go
@@ -32,37 +32,49 @@ package libnbd
 import "C"
 
 import "unsafe"
 
 /* Asynchronous I/O buffer. */
 type AioBuffer struct {
Punsafe.Pointer
Size uint
 }
 
+// MakeAioBuffer makes a new buffer backed by an unitilialized C allocated
+// array.
 func MakeAioBuffer(size uint) AioBuffer {
return AioBuffer{C.malloc(C.ulong(size)), size}
 }
 
+// FromBytes makes a new buffer backed by a C allocated array, initialized by
+// copying the given Go slice.
 func FromBytes(buf []byte) AioBuffer {
size := len(buf)
ret := MakeAioBuffer(uint(size))
for i := 0; i < len(buf); i++ {
*ret.Get(uint(i)) = buf[i]
}
return ret
 }
 
+// Free deallocates the underlying C allocated array. Using the buffer after
+// Free() will panic.
 func (b *AioBuffer) Free() {
if b.P != nil {
C.free(b.P)
b.P = nil
}
 }
 
+// Bytes copies the underlying C array to Go allocated memory and return a
+// slice. Modifying the returned slice does not modify the unerlying buffer
+// backking array.
 func (b *AioBuffer) Bytes() []byte {
return C.GoBytes(b.P, C.int(b.Size))
 }
 
+// Get returns a pointer to a byte in the underlying C array. The pointer can
+// be used to modify the underlying array. The pointer must not be used after
+// caling Free().
 func (b *AioBuffer) Get(i uint) *byte {
return (*byte)(unsafe.Pointer(uintptr(b.P) + uintptr(i)))
 }
-- 
2.34.1

___
Libguestfs mailing list
Libguestfs@redhat.com
https://listman.redhat.com/mailman/listinfo/libguestfs



[Libguestfs] [PATCH libnbd 4/9] golang: aio_buffer.go: Add MakeAioBufferZero()

2022-01-29 Thread Nir Soffer
Make it easy to create a zeroed buffer via calloc(), preventing leaking
sensitive info from the heap.

Benchmarking show that creating a zeroed buffer is much slower compared
with uninitialized buffer, but much faster compared with manually
initializing the buffer with a loop.

BenchmarkMakeAioBuffer-127252674   148.1 ns/op
BenchmarkMakeAioBufferZero-12 262107  4181 ns/op
BenchmarkAioBufferZero-12  17581 68759 ns/op

It is interesting that creating a zeroed buffer is 3 times faster
compared with making a new []byte slice:

BenchmarkMakeAioBufferZero-12 247710  4440 ns/op
BenchmarkMakeByteSlice-12  84117 13733 ns/op

Signed-off-by: Nir Soffer 
---
 golang/aio_buffer.go |  6 ++
 golang/libnbd_620_aio_buffer_test.go | 16 
 2 files changed, 22 insertions(+)

diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go
index 8f33f500..dcb036a5 100644
--- a/golang/aio_buffer.go
+++ b/golang/aio_buffer.go
@@ -38,20 +38,26 @@ type AioBuffer struct {
Punsafe.Pointer
Size uint
 }
 
 // MakeAioBuffer makes a new buffer backed by an unitilialized C allocated
 // array.
 func MakeAioBuffer(size uint) AioBuffer {
return AioBuffer{C.malloc(C.ulong(size)), size}
 }
 
+// MakeAioBuffer makes a new buffer backed by a C allocated array. The
+// underlying buffer is set to zero.
+func MakeAioBufferZero(size uint) AioBuffer {
+   return AioBuffer{C.calloc(C.ulong(1), C.ulong(size)), size}
+}
+
 // FromBytes makes a new buffer backed by a C allocated array, initialized by
 // copying the given Go slice.
 func FromBytes(buf []byte) AioBuffer {
size := len(buf)
ret := MakeAioBuffer(uint(size))
for i := 0; i < len(buf); i++ {
*ret.Get(uint(i)) = buf[i]
}
return ret
 }
diff --git a/golang/libnbd_620_aio_buffer_test.go 
b/golang/libnbd_620_aio_buffer_test.go
index 89badfc6..53d6233b 100644
--- a/golang/libnbd_620_aio_buffer_test.go
+++ b/golang/libnbd_620_aio_buffer_test.go
@@ -51,20 +51,28 @@ func TestAioBuffer(t *testing.T) {
t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes())
}
 
/* Create a nother buffer from Go slice. */
buf2 := FromBytes(zeroes)
defer buf2.Free()
 
if !bytes.Equal(buf2.Bytes(), zeroes) {
t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes())
}
+
+   /* Crated a zeroed buffer. */
+   buf3 := MakeAioBufferZero(uint(32))
+   defer buf.Free()
+
+   if !bytes.Equal(buf3.Bytes(), zeroes) {
+   t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes())
+   }
 }
 
 func TestAioBufferFree(t *testing.T) {
buf := MakeAioBuffer(uint(32))
 
/* Free the underlying C array. */
buf.Free()
 
/* And clear the pointer. */
if buf.P != nil {
@@ -105,20 +113,28 @@ func TestAioBufferGetAfterFree(t *testing.T) {
 const bufferSize uint = 256 * 1024
 
 // Benchmark creating uninitilized buffer.
 func BenchmarkMakeAioBuffer(b *testing.B) {
for i := 0; i < b.N; i++ {
buf := MakeAioBuffer(bufferSize)
buf.Free()
}
 }
 
+// Benchmark creating zeroed buffer.
+func BenchmarkMakeAioBufferZero(b *testing.B) {
+   for i := 0; i < b.N; i++ {
+   buf := MakeAioBufferZero(bufferSize)
+   buf.Free()
+   }
+}
+
 // Benchmark zeroing a buffer.
 func BenchmarkAioBufferZero(b *testing.B) {
for i := 0; i < b.N; i++ {
buf := MakeAioBuffer(bufferSize)
for i := uint(0); i < bufferSize; i++ {
*buf.Get(i) = 0
}
buf.Free()
}
 }
-- 
2.34.1

___
Libguestfs mailing list
Libguestfs@redhat.com
https://listman.redhat.com/mailman/listinfo/libguestfs



[Libguestfs] [PATCH libnbd 5/9] golang: aio_buffer.go: Add Slice()

2022-01-29 Thread Nir Soffer
AioBuffer.Bytes() cannot be used for coping images from NBD to other
APis because it copies the entire image. Add a new Slice() function,
creating a slice backed by the underling buffer.

Using Slice() is efficient, but less safe, like Get(). The returned
slice must be used only before calling Free(). This should not be an
issue with typical code.

Testing show that Slice() is much faster than Bytes() for typical 256k
buffer:

BenchmarkAioBufferBytes-12 86616 16529 ns/op
BenchmarkAioBufferSlice-12  10   0.4630 ns/op

I modified the aio_copy example to use AioBuffer and complied 2
versions, one using Bytes() and one using Slice(). When copying 6g fully
allocated image, the version using Slice() is 1.6 times faster.

$ hyperfine -r3 "./aio_copy-bytes $SRC >/dev/null" "./aio_copy-slice $SRC 
>/dev/null"

Benchmark 1: ./aio_copy-bytes nbd+unix:///?socket=/tmp/src.sock >/dev/null
  Time (mean ± σ):  3.357 s ±  0.039 s[User: 2.656 s, System: 1.162 s]
  Range (min … max):3.313 s …  3.387 s3 runs

Benchmark 2: ./aio_copy-slice nbd+unix:///?socket=/tmp/src.sock >/dev/null
  Time (mean ± σ):  2.046 s ±  0.009 s[User: 0.423 s, System: 0.892 s]
  Range (min … max):2.037 s …  2.055 s3 runs

Summary
  './aio_copy-slice nbd+unix:///?socket=/tmp/src.sock >/dev/null' ran
1.64 ± 0.02 times faster than './aio_copy-bytes 
nbd+unix:///?socket=/tmp/src.sock >/dev/null'

When copying a 6g empty image (qemu-nbd sends one hole chunk for every
read), the version using Slice() is 2.6 times faster.

$ hyperfine -r3 "./aio_copy-bytes $SRC >/dev/null" "./aio_copy-slice $SRC 
>/dev/null"
Benchmark 1: ./aio_copy-bytes nbd+unix:///?socket=/tmp/src.sock >/dev/null
  Time (mean ± σ):  1.210 s ±  0.023 s[User: 1.428 s, System: 0.345 s]
  Range (min … max):1.191 s …  1.235 s3 runs

Benchmark 2: ./aio_copy-slice nbd+unix:///?socket=/tmp/src.sock >/dev/null
  Time (mean ± σ): 461.4 ms ±  13.1 ms[User: 394.2 ms, System: 76.6 ms]
  Range (min … max):   450.6 ms … 476.0 ms3 runs

Summary
  './aio_copy-slice nbd+unix:///?socket=/tmp/src.sock >/dev/null' ran
2.62 ± 0.09 times faster than './aio_copy-bytes 
nbd+unix:///?socket=/tmp/src.sock >/dev/null'

Signed-off-by: Nir Soffer 
---
 golang/aio_buffer.go |  9 ++
 golang/libnbd_620_aio_buffer_test.go | 41 
 2 files changed, 50 insertions(+)

diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go
index dcb036a5..788fe1a8 100644
--- a/golang/aio_buffer.go
+++ b/golang/aio_buffer.go
@@ -71,16 +71,25 @@ func (b *AioBuffer) Free() {
}
 }
 
 // Bytes copies the underlying C array to Go allocated memory and return a
 // slice. Modifying the returned slice does not modify the unerlying buffer
 // backking array.
 func (b *AioBuffer) Bytes() []byte {
return C.GoBytes(b.P, C.int(b.Size))
 }
 
+// Slice creates a slice backed by the underlying C array. The slice can be
+// used to access or modify the contents of the underlying array. The slice
+// must not be used after caling Free().
+func (b *AioBuffer) Slice() []byte {
+   // See 
https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
+   // TODO: Use unsafe.Slice() when we require Go 1.17.
+   return (*[1<<30]byte)(b.P)[:b.Size:b.Size]
+}
+
 // Get returns a pointer to a byte in the underlying C array. The pointer can
 // be used to modify the underlying array. The pointer must not be used after
 // caling Free().
 func (b *AioBuffer) Get(i uint) *byte {
return (*byte)(unsafe.Pointer(uintptr(b.P) + uintptr(i)))
 }
diff --git a/golang/libnbd_620_aio_buffer_test.go 
b/golang/libnbd_620_aio_buffer_test.go
index 53d6233b..38c6c74d 100644
--- a/golang/libnbd_620_aio_buffer_test.go
+++ b/golang/libnbd_620_aio_buffer_test.go
@@ -44,20 +44,35 @@ func TestAioBuffer(t *testing.T) {
/* Modifying returned slice does not modify the buffer. */
for i := 0; i < len(b); i++ {
b[i] = 42
}
 
/* Bytes() still returns zeroes. */
if !bytes.Equal(buf.Bytes(), zeroes) {
t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes())
}
 
+   /* Creating a slice without copying the underlhing buffer. */
+   s := buf.Slice()
+   if !bytes.Equal(s, zeroes) {
+   t.Fatalf("Expected %v, got %v", zeroes, s)
+   }
+
+   /* Modifing the slice modifies the underlying buffer. */
+   for i := 0; i < len(s); i++ {
+   s[i] = 42
+   }
+
+   if !bytes.Equal(buf.Slice(), s) {
+   t.Fatalf("Expected %v, got %v", s, buf.Slice())
+   }
+
/* Create a nother buffer from Go slice. */
buf2 := FromBytes(zeroes)
defer buf2.Free()
 
if !bytes.Equal(buf2.Bytes(), zeroes) {
t.Fatalf("Expected %v, got %v", zeroes, buf2.Bytes())
}
 
/* Crated a zeroed buffer. */
buf3 := MakeA

[Libguestfs] [PATCH libnbd 6/9] golang: tests: Use AioBuffer.Slice()

2022-01-29 Thread Nir Soffer
Slice() is easier to use and faster than Get() or Bytes(). Lets use the
new way.

Signed-off-by: Nir Soffer 
---
 golang/libnbd_500_aio_pread_test.go  | 2 +-
 golang/libnbd_510_aio_pwrite_test.go | 8 +---
 golang/libnbd_620_aio_buffer_test.go | 8 +---
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/golang/libnbd_500_aio_pread_test.go 
b/golang/libnbd_500_aio_pread_test.go
index 0811378c..bd0208ef 100644
--- a/golang/libnbd_500_aio_pread_test.go
+++ b/golang/libnbd_500_aio_pread_test.go
@@ -55,14 +55,14 @@ func Test500AioPRead(t *testing.T) {
}
h.Poll(-1)
}
 
// Expected data.
expected := make([]byte, 512)
for i := 0; i < 512; i += 8 {
binary.BigEndian.PutUint64(expected[i:i+8], uint64(i))
}
 
-   if !bytes.Equal(buf.Bytes(), expected) {
+   if !bytes.Equal(buf.Slice(), expected) {
t.Fatalf("did not read expected data")
}
 }
diff --git a/golang/libnbd_510_aio_pwrite_test.go 
b/golang/libnbd_510_aio_pwrite_test.go
index 56cdcb05..493159f2 100644
--- a/golang/libnbd_510_aio_pwrite_test.go
+++ b/golang/libnbd_510_aio_pwrite_test.go
@@ -32,23 +32,25 @@ func Test510AioPWrite(t *testing.T) {
"nbdkit", "-s", "--exit-with-parent", "-v",
"memory", "size=512",
})
if err != nil {
t.Fatalf("could not connect: %s", err)
}
 
/* Write a pattern and read it back. */
buf := MakeAioBuffer(512)
defer buf.Free()
+
+   s := buf.Slice()
for i := 0; i < 512; i += 2 {
-   *buf.Get(uint(i)) = 0x55
-   *buf.Get(uint(i + 1)) = 0xAA
+   s[i] = 0x55
+   s[i+1] = 0xAA
}
 
var cookie uint64
cookie, err = h.AioPwrite(buf, 0, nil)
if err != nil {
t.Fatalf("%s", err)
}
for {
var b bool
b, err = h.AioCommandCompleted(cookie)
@@ -62,14 +64,14 @@ func Test510AioPWrite(t *testing.T) {
}
 
/* We already tested aio_pread, let's just read the data
back in the regular synchronous way. */
buf2 := make([]byte, 512)
err = h.Pread(buf2, 0, nil)
if err != nil {
t.Fatalf("%s", err)
}
 
-   if !bytes.Equal(buf.Bytes(), buf2) {
+   if !bytes.Equal(buf.Slice(), buf2) {
t.Fatalf("did not read back same data as written")
}
 }
diff --git a/golang/libnbd_620_aio_buffer_test.go 
b/golang/libnbd_620_aio_buffer_test.go
index 38c6c74d..8f65b558 100644
--- a/golang/libnbd_620_aio_buffer_test.go
+++ b/golang/libnbd_620_aio_buffer_test.go
@@ -22,22 +22,23 @@ import (
"bytes"
"testing"
 )
 
 func TestAioBuffer(t *testing.T) {
/* Create a buffer with unitinialized backing array. */
buf := MakeAioBuffer(uint(32))
defer buf.Free()
 
/* Initialize backing array contents. */
+   s := buf.Slice()
for i := uint(0); i < buf.Size; i++ {
-   *buf.Get(i) = 0
+   s[i] = 0
}
 
/* Create a slice by copying the backing arrary contents into Go 
memory. */
b := buf.Bytes()
 
zeroes := make([]byte, 32)
if !bytes.Equal(b, zeroes) {
t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes())
}
 
@@ -45,21 +46,21 @@ func TestAioBuffer(t *testing.T) {
for i := 0; i < len(b); i++ {
b[i] = 42
}
 
/* Bytes() still returns zeroes. */
if !bytes.Equal(buf.Bytes(), zeroes) {
t.Fatalf("Expected %v, got %v", zeroes, buf.Bytes())
}
 
/* Creating a slice without copying the underlhing buffer. */
-   s := buf.Slice()
+   s = buf.Slice()
if !bytes.Equal(s, zeroes) {
t.Fatalf("Expected %v, got %v", zeroes, s)
}
 
/* Modifing the slice modifies the underlying buffer. */
for i := 0; i < len(s); i++ {
s[i] = 42
}
 
if !bytes.Equal(buf.Slice(), s) {
@@ -154,22 +155,23 @@ func BenchmarkMakeAioBufferZero(b *testing.B) {
for i := 0; i < b.N; i++ {
buf := MakeAioBufferZero(bufferSize)
buf.Free()
}
 }
 
 // Benchmark zeroing a buffer.
 func BenchmarkAioBufferZero(b *testing.B) {
for i := 0; i < b.N; i++ {
buf := MakeAioBuffer(bufferSize)
+   s := buf.Slice()
for i := uint(0); i < bufferSize; i++ {
-   *buf.Get(i) = 0
+   s[i] = 0
}
buf.Free()
}
 }
 
 // Benchmark creating a buffer by coping a Go slice.
 func BenchmarkFromBytes(b *testing.B) {
for i := 0; i < b.N; i++ {
zeroes := make([]byte, bufferSize)
buf := FromBytes(zeroes)
-- 
2.34.1

___
Libguestfs maili

[Libguestfs] [PATCH libnbd 7/9] golang: aio_buffer.go: Speed up FromBytes()

2022-01-29 Thread Nir Soffer
Using Slice() we can use builtin copy() instead of a manual loop, which
is 4.6 times faster with a typical 256k buffer:

Before:
BenchmarkFromBytes-12   9806111474 ns/op

After:
BenchmarkFromBytes-12  48193 24106 ns/op

Signed-off-by: Nir Soffer 
---
 golang/aio_buffer.go | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/golang/aio_buffer.go b/golang/aio_buffer.go
index 788fe1a8..7960e554 100644
--- a/golang/aio_buffer.go
+++ b/golang/aio_buffer.go
@@ -47,25 +47,22 @@ func MakeAioBuffer(size uint) AioBuffer {
 
 // MakeAioBuffer makes a new buffer backed by a C allocated array. The
 // underlying buffer is set to zero.
 func MakeAioBufferZero(size uint) AioBuffer {
return AioBuffer{C.calloc(C.ulong(1), C.ulong(size)), size}
 }
 
 // FromBytes makes a new buffer backed by a C allocated array, initialized by
 // copying the given Go slice.
 func FromBytes(buf []byte) AioBuffer {
-   size := len(buf)
-   ret := MakeAioBuffer(uint(size))
-   for i := 0; i < len(buf); i++ {
-   *ret.Get(uint(i)) = buf[i]
-   }
+   ret := MakeAioBuffer(uint(len(buf)))
+   copy(ret.Slice(), buf)
return ret
 }
 
 // Free deallocates the underlying C allocated array. Using the buffer after
 // Free() will panic.
 func (b *AioBuffer) Free() {
if b.P != nil {
C.free(b.P)
b.P = nil
}
-- 
2.34.1

___
Libguestfs mailing list
Libguestfs@redhat.com
https://listman.redhat.com/mailman/listinfo/libguestfs



[Libguestfs] [PATCH libnbd 8/9] golang: aio_buffer.go: Benchmark copy flows

2022-01-29 Thread Nir Soffer
Add benchmark for coping a buffer using 3 strategies - reusing same
buffer, making a new uninitialized buffer per copy, and using a zeroed
buffer per copy. This benchmark is the worst possible case, coping a
buffer to memory. Any real I/O will be much slower hiding the overhead
of allocating buffer or zeroed buffered.

$ go test -run=AioBuffer -bench=Copy -benchtime=5s
goos: linux
goarch: amd64
pkg: libguestfs.org/libnbd
cpu: Intel(R) Core(TM) i7-10850H CPU @ 2.70GHz
BenchmarkAioBufferCopyBaseline-121142508  4523 ns/op
BenchmarkAioBufferCopyMake-12100  5320 ns/op
BenchmarkAioBufferCopyMakeZero-12 728940  8218 ns/op

Signed-off-by: Nir Soffer 
---
 golang/libnbd_620_aio_buffer_test.go | 32 
 1 file changed, 32 insertions(+)

diff --git a/golang/libnbd_620_aio_buffer_test.go 
b/golang/libnbd_620_aio_buffer_test.go
index 8f65b558..9ab411b3 100644
--- a/golang/libnbd_620_aio_buffer_test.go
+++ b/golang/libnbd_620_aio_buffer_test.go
@@ -195,10 +195,42 @@ func BenchmarkAioBufferBytes(b *testing.B) {
 func BenchmarkAioBufferSlice(b *testing.B) {
buf := MakeAioBuffer(bufferSize)
defer buf.Free()
var r int
 
b.ResetTimer()
for i := 0; i < b.N; i++ {
r += len(buf.Slice())
}
 }
+
+var data = make([]byte, bufferSize)
+
+// Benchmark copying into same buffer, used as baseline for CopyMake and
+// CopyMakeZero benchmarks.
+func BenchmarkAioBufferCopyBaseline(b *testing.B) {
+   buf := MakeAioBufferZero(bufferSize)
+   defer buf.Free()
+
+   b.ResetTimer()
+   for i := 0; i < b.N; i++ {
+   copy(buf.Slice(), data)
+   }
+}
+
+// Benchmark overhead of making a new buffer per read.
+func BenchmarkAioBufferCopyMake(b *testing.B) {
+   for i := 0; i < b.N; i++ {
+   buf := MakeAioBuffer(bufferSize)
+   copy(buf.Slice(), data)
+   buf.Free()
+   }
+}
+
+// Benchmark overhead of making a new zero buffer per read.
+func BenchmarkAioBufferCopyMakeZero(b *testing.B) {
+   for i := 0; i < b.N; i++ {
+   buf := MakeAioBufferZero(bufferSize)
+   copy(buf.Slice(), data)
+   buf.Free()
+   }
+}
-- 
2.34.1

___
Libguestfs mailing list
Libguestfs@redhat.com
https://listman.redhat.com/mailman/listinfo/libguestfs



[Libguestfs] [PATCH libnbd 9/9] golang: examples: aio_copy: Simplify using AioBuffer

2022-01-29 Thread Nir Soffer
Now that we have an efficient way to use AioBuffer, we don't need the
hacks to create AioBuffer from Go slice.

Benchmarking AioBuffer show that allocating a 256k buffer is practically
free, so there is no need for the buffer pool. Now we allocate a new
buffer per request, keep it in the command, and free it when the request
is finished.

Signed-off-by: Nir Soffer 
---
 golang/examples/aio_copy/aio_copy.go | 29 +---
 1 file changed, 5 insertions(+), 24 deletions(-)

diff --git a/golang/examples/aio_copy/aio_copy.go 
b/golang/examples/aio_copy/aio_copy.go
index b6f5def1..bb20b478 100644
--- a/golang/examples/aio_copy/aio_copy.go
+++ b/golang/examples/aio_copy/aio_copy.go
@@ -37,53 +37,43 @@
 // Example:
 //
 //   ./aio_copy nbd+unix:///?socket=/tmp.nbd >/dev/null
 //
 package main
 
 import (
"container/list"
"flag"
"os"
-   "sync"
"syscall"
-   "unsafe"
 
"libguestfs.org/libnbd"
 )
 
 var (
// These options give best performance with fast NVMe drive.
requestSize = flag.Uint("request-size", 256*1024, "maximum request size 
in bytes")
requests= flag.Uint("requests", 4, "maximum number of requests in 
flight")
 
h *libnbd.Libnbd
 
// Keeping commands in a queue ensures commands are written in the right
// order, even if they complete out of order. This allows parallel reads
// with non-seekable output.
queue list.List
-
-   // Buffer pool allocating buffers as needed and reusing them.
-   bufPool = sync.Pool{
-   New: func() interface{} {
-   return make([]byte, *requestSize)
-   },
-   }
 )
 
 // command keeps state of single AioPread call while the read is handled by
 // libnbd, until the command reach the front of the queue and can be writen to
 // the output.
 type command struct {
-   buf[]byte
-   length uint
+   buflibnbd.AioBuffer
ready  bool
 }
 
 func main() {
flag.Parse()
 
var err error
 
h, err = libnbd.Create()
if err != nil {
@@ -139,60 +129,51 @@ func waitForCompletion() {
panic(err)
}
 
if inflightRequests() < start {
break // A read completed.
}
}
 }
 
 func startRead(offset uint64, length uint) {
-   buf := bufPool.Get().([]byte)
-
-   // Keep buffer in command so we can put it back into the pool when the
-   // command completes.
-   cmd := &command{buf: buf, length: length}
-
-   // Create aio buffer from pool buffer to avoid unneeded allocation for
-   // every read, and unneeded copy when completing the read.
-   abuf := libnbd.AioBuffer{P: unsafe.Pointer(&buf[0]), Size: length}
+   cmd := &command{buf: libnbd.MakeAioBuffer(length)}
 
args := libnbd.AioPreadOptargs{
CompletionCallbackSet: true,
CompletionCallback: func(error *int) int {
if *error != 0 {
// This is not documented, but *error is errno 
value translated
// from the the NBD server error.
err := syscall.Errno(*error).Error()
panic(err)
}
cmd.ready = true
return 1
},
}
 
-   _, err := h.AioPread(abuf, offset, &args)
+   _, err := h.AioPread(cmd.buf, offset, &args)
if err != nil {
panic(err)
}
 
queue.PushBack(cmd)
 }
 
 func readReady() bool {
return queue.Len() > 0 && queue.Front().Value.(*command).ready
 }
 
 func finishRead() {
e := queue.Front()
queue.Remove(e)
 
cmd := e.Value.(*command)
-   b := cmd.buf[:cmd.length]
 
-   _, err := os.Stdout.Write(b)
+   _, err := os.Stdout.Write(cmd.buf.Slice())
if err != nil {
panic(err)
}
 
-   bufPool.Put(cmd.buf)
+   cmd.buf.Free()
 }
-- 
2.34.1

___
Libguestfs mailing list
Libguestfs@redhat.com
https://listman.redhat.com/mailman/listinfo/libguestfs