Oops, please ignore this, this was already sent and reviewed here: https://www.redhat.com/archives/libguestfs/2018-July/msg00084.html
The patch was hiding in my tree and selected by a careless glob :-) On Thu, Aug 2, 2018 at 10:06 PM Nir Soffer <[email protected]> wrote: > If we may not trim, we tried ZERO_RANGE, but this is not well supported > yet, for example it is not available on NFS 4.2. ZERO_RANGE and > PUNCH_HOLE are supported now on block devices, but not on RHRL 7, so we > fallback to slow manual zeroing there. > > Change the logic to support block devices on RHEL 7, and file systems > that do not support ZERO_RANGE. > > The new logic: > - If we may trim, try PUNCH_HOLE > - If we can zero range, Try ZERO_RANGE > - If we can punch hole and fallocate, try fallocate(PUNCH_HOLE) followed > by fallocate(0). > - If underlying file is a block device, try ioctl(BLKZEROOUT) > - Otherwise fallback to manual zeroing > > The handle keeps now the underlying file capabilities, so once we > discover that an operation is not supported, we never try it again. > > Here are examples runs on a server based on Intel(R) Xeon(R) CPU E5-2630 > v4 @ 2.20GHz, using XtremIO storage via 4G FC HBA and 4 paths to > storage. > > $ export SOCK=/tmp/nbd.sock > $ export > BLOCK=/dev/e30bfac2-8e13-479d-8cd6-c6da5e306c4e/c9864222-bc52-4359-80d7-76e47d619b15 > > $ src/nbdkit -f plugins/file/.libs/nbdkit-file-plugin.so file=$BLOCK -U > $SOCK > > $ time qemu-img convert -n -f raw -O raw /var/tmp/fedora-27.img > nbd:unix:$SOCK > > real 0m2.741s > user 0m0.224s > sys 0m0.634s > > $ time qemu-img convert -n -f raw -O raw -W /var/tmp/fedora-27.img > nbd:unix:$SOCK > > real 0m1.920s > user 0m0.163s > sys 0m0.735s > > Issues: > - ioctl(BLKZEROOUT) will fail if offset or count are not aligned to > logical sector size. I'm not sure if nbdkit or qemu-img ensure this. > - Need testing with NFS > --- > plugins/file/file.c | 126 ++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 103 insertions(+), 23 deletions(-) > > diff --git a/plugins/file/file.c b/plugins/file/file.c > index fb20622..bce2ed1 100644 > --- a/plugins/file/file.c > +++ b/plugins/file/file.c > @@ -33,6 +33,7 @@ > > #include <config.h> > > +#include <stdbool.h> > #include <stdio.h> > #include <stdlib.h> > #include <string.h> > @@ -42,6 +43,8 @@ > #include <sys/stat.h> > #include <errno.h> > #include <linux/falloc.h> /* For FALLOC_FL_* on RHEL, glibc < 2.18 */ > +#include <sys/ioctl.h> > +#include <linux/fs.h> > > #include <nbdkit-plugin.h> > > @@ -116,6 +119,10 @@ file_config_complete (void) > /* The per-connection handle. */ > struct handle { > int fd; > + bool is_block_device; > + bool can_punch_hole; > + bool can_zero_range; > + bool can_fallocate; > }; > > /* Create the per-connection handle. */ > @@ -123,6 +130,7 @@ static void * > file_open (int readonly) > { > struct handle *h; > + struct stat statbuf; > int flags; > > h = malloc (sizeof *h); > @@ -144,6 +152,23 @@ file_open (int readonly) > return NULL; > } > > + if (fstat (h->fd, &statbuf) == -1) { > + nbdkit_error ("fstat: %s: %m", filename); > + free (h); > + return NULL; > + } > + > + h->is_block_device = S_ISBLK(statbuf.st_mode); > + > + /* These flags will disabled if an operation is not supported. */ > +#ifdef FALLOC_FL_PUNCH_HOLE > + h->can_punch_hole = true; > +#endif > +#ifdef FALLOC_FL_ZERO_RANGE > + h->can_zero_range = true; > +#endif > + h->can_fallocate = true; > + > return h; > } > > @@ -164,27 +189,29 @@ static int64_t > file_get_size (void *handle) > { > struct handle *h = handle; > - struct stat statbuf; > > - if (fstat (h->fd, &statbuf) == -1) { > - nbdkit_error ("stat: %m"); > - return -1; > - } > - > - if (S_ISBLK (statbuf.st_mode)) { > + if (h->is_block_device) { > + /* Block device, so st_size will not be the true size. */ > off_t size; > > - /* Block device, so st_size will not be the true size. */ > size = lseek (h->fd, 0, SEEK_END); > if (size == -1) { > nbdkit_error ("lseek (to find device size): %m"); > return -1; > } > + > return size; > - } > + } else { > + /* Regular file. */ > + struct stat statbuf; > + > + if (fstat (h->fd, &statbuf) == -1) { > + nbdkit_error ("fstat: %m"); > + return -1; > + } > > - /* Else regular file. */ > - return statbuf.st_size; > + return statbuf.st_size; > + } > } > > static int > @@ -250,33 +277,86 @@ file_pwrite (void *handle, const void *buf, uint32_t > count, uint64_t offset) > static int > file_zero (void *handle, uint32_t count, uint64_t offset, int may_trim) > { > -#if defined(FALLOC_FL_PUNCH_HOLE) || defined(FALLOC_FL_ZERO_RANGE) > struct handle *h = handle; > -#endif > int r = -1; > > #ifdef FALLOC_FL_PUNCH_HOLE > - if (may_trim) { > + /* If we can and may trim, punching hole is our best option. */ > + if (h->can_punch_hole && may_trim) { > r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, > offset, count); > - if (r == -1 && errno != EOPNOTSUPP) { > + if (r == 0) > + return 0; > + > + if (errno != EOPNOTSUPP) { > nbdkit_error ("zero: %m"); > + return r; > } > - /* PUNCH_HOLE is older; if it is not supported, it is likely that > - ZERO_RANGE will not work either, so fall back to write. */ > - return r; > + > + h->can_punch_hole = false; > } > #endif > > #ifdef FALLOC_FL_ZERO_RANGE > - r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count); > - if (r == -1 && errno != EOPNOTSUPP) { > - nbdkit_error ("zero: %m"); > + /* ZERO_RANGE is not well supported yet, but it the next best option. */ > + if (h->can_zero_range) { > + r = do_fallocate (h->fd, FALLOC_FL_ZERO_RANGE, offset, count); > + if (r == 0) > + return 0; > + > + if (errno != EOPNOTSUPP) { > + nbdkit_error ("zero: %m"); > + return r; > + } > + > + h->can_zero_range = false; > } > -#else > +#endif > + > +#ifdef FALLOC_FL_PUNCH_HOLE > + /* If we can punch hole but may not trim, we can combine punching hole > and > + fallocate to zero a range. This is much more efficient than writing > zeros > + manually. */ > + if (h->can_punch_hole && h->can_fallocate) { > + r = do_fallocate (h->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, > + offset, count); > + if (r == 0) { > + r = do_fallocate(h->fd, 0, offset, count); > + if (r == 0) > + return 0; > + > + if (errno != EOPNOTSUPP) { > + nbdkit_error ("zero: %m"); > + return r; > + } > + > + h->can_fallocate = false; > + } else { > + if (errno != EOPNOTSUPP) { > + nbdkit_error ("zero: %m"); > + return r; > + } > + > + h->can_punch_hole = false; > + } > + } > +#endif > + > + /* For block devices, we can use BLKZEROOUT. > + NOTE: count and offset must be aligned to logical block size. */ > + if (h->is_block_device) { > + uint64_t range[2] = {offset, count}; > + > + r = ioctl(h->fd, BLKZEROOUT, &range); > + if (r == 0) > + return 0; > + > + nbdkit_error("zero: %m"); > + return r; > + } > + > /* Trigger a fall back to writing */ > errno = EOPNOTSUPP; > -#endif > > return r; > } > -- > 2.17.1 > >
_______________________________________________ Libguestfs mailing list [email protected] https://www.redhat.com/mailman/listinfo/libguestfs
