[libvirt] [PATCH v2 2/2] storage: btrfs subvolumes for directory pools
This commit adds support for btrfs subvolumes as directory volumes. The directory storage pool can be used to manage btrfs subvolumes on an existing btrfs filesystem. The driver can create new blank subvolumes and snapshots of existing subvolumes as well as delete existing subvolumes. The subvolumes created are automatically made visible on the host side and can be attached to domains using the filesystem tags as defined in 'format domain' documentation. Subvolumes do not implement quotas at the moment because the current (btrfs-progs-0.20.rc1.20130501git7854c8b-4.fc20.x86_64) support for quota management in btrfs-progs is lacking the necessary features, for example it's not possible to see the quota assigned to a certain subvolume and usage information is only updated on syncfs(2). Quota support will be implemented once the tools gain the necessary features. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- I did not add a new virDirIsExecutable function as it wasn't really needed by this patch set, it's sufficient to just verify that the backing volume is a directory before running btrfs subvolume snapshot. Part 1/2 still applies cleanly on today's git master so not resending it. configure.ac | 25 ++- docs/schemas/storagevol.rng | 1 + docs/storage.html.in | 30 ++- libvirt.spec.in | 4 + src/conf/storage_conf.c | 3 + src/conf/storage_conf.h | 1 + src/storage/storage_backend.c | 15 +- src/storage/storage_backend_fs.c | 219 -- src/util/virstoragefile.c | 4 +- src/util/virstoragefile.h | 1 + tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml | 13 ++ tests/storagevolxml2xmlin/vol-btrfs.xml | 9 + tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml | 26 +++ tests/storagevolxml2xmlout/vol-btrfs.xml | 17 ++ tests/storagevolxml2xmltest.c | 2 + 15 files changed, 340 insertions(+), 30 deletions(-) create mode 100644 tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml create mode 100644 tests/storagevolxml2xmlin/vol-btrfs.xml create mode 100644 tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml create mode 100644 tests/storagevolxml2xmlout/vol-btrfs.xml diff --git a/configure.ac b/configure.ac index 1993fab..a82640c 100644 --- a/configure.ac +++ b/configure.ac @@ -1646,6 +1646,10 @@ AC_ARG_WITH([storage-sheepdog], [AS_HELP_STRING([--with-storage-sheepdog], [with Sheepdog backend for the storage driver @:@default=check@:@])], [],[with_storage_sheepdog=check]) +AC_ARG_WITH([storage-btrfs], + [AS_HELP_STRING([--with-storage-btrfs], +[with Btrfs backend for the storage driver @:@default=check@:@])], + [],[with_storage_btrfs=check]) if test $with_libvirtd = no; then with_storage_dir=no @@ -1858,6 +1862,24 @@ fi AM_CONDITIONAL([WITH_STORAGE_SHEEPDOG], [test $with_storage_sheepdog = yes]) +if test $with_storage_btrfs = yes || test $with_storage_btrfs = check; then + AC_PATH_PROG([BTRFS], [btrfs], [], [$PATH:/sbin:/usr/sbin]) + + if test $with_storage_btrfs = yes ; then +if test -z $BTRFS ; then AC_MSG_ERROR([We need btrfs -command for btrfs storage driver]) ; fi + else +if test -z $BTRFS ; then with_storage_btrfs=no ; fi +if test $with_storage_btrfs = check ; then with_storage_btrfs=yes ; fi + fi + + if test $with_storage_btrfs = yes ; then +AC_DEFINE_UNQUOTED([WITH_STORAGE_BTRFS], 1, [whether Btrfs backend for storage driver is enabled]) +AC_DEFINE_UNQUOTED([BTRFS],[$BTRFS],[Location of btrfs program]) + fi +fi +AM_CONDITIONAL([WITH_STORAGE_BTRFS], [test $with_storage_btrfs = yes]) + + LIBPARTED_CFLAGS= LIBPARTED_LIBS= @@ -1945,7 +1967,7 @@ AC_SUBST([DEVMAPPER_CFLAGS]) AC_SUBST([DEVMAPPER_LIBS]) with_storage=no -for backend in dir fs lvm iscsi scsi mpath rbd disk; do +for backend in dir fs lvm iscsi scsi mpath rbd disk btrfs; do if eval test \$with_storage_$backend = yes; then with_storage=yes break @@ -2670,6 +2692,7 @@ AC_MSG_NOTICE([ mpath: $with_storage_mpath]) AC_MSG_NOTICE([Disk: $with_storage_disk]) AC_MSG_NOTICE([ RBD: $with_storage_rbd]) AC_MSG_NOTICE([Sheepdog: $with_storage_sheepdog]) +AC_MSG_NOTICE([ Btrfs: $with_storage_btrfs]) AC_MSG_NOTICE([]) AC_MSG_NOTICE([Security Drivers]) AC_MSG_NOTICE([]) diff --git a/docs/schemas/storagevol.rng b/docs/schemas/storagevol.rng index 8bc5907..8814973 100644 --- a/docs/schemas/storagevol.rng +++ b/docs/schemas/storagevol.rng @@ -201,6 +201,7 @@ valueqed/value valuevmdk/value valuevpc/value + valuevolume/value /choice /define diff --git a/docs/storage.html.in b/docs/storage.html.in index 1181444..03018ab 100644 --- a/docs/storage.html.in +++ b/docs/storage.html.in @@ -119,12
Re: [libvirt] [PATCH v2 2/2] storage: btrfs subvolumes for directory pools
Thanks for the review, 10.10.2013 18:57, Daniel P. Berrange kirjoitti: On Thu, Oct 10, 2013 at 05:52:22PM +0300, Oskari Saarenmaa wrote: +if test $with_storage_btrfs = yes || test $with_storage_btrfs = check; then + AC_PATH_PROG([BTRFS], [btrfs], [], [$PATH:/sbin:/usr/sbin]) [...] +AM_CONDITIONAL([WITH_STORAGE_BTRFS], [test $with_storage_btrfs = yes]) Just use 'WITH_BTRFS' here - the WITH_STORAGE_ is for actual storage backends. Do we actually need a flag for enabling / disabling btrfs usage as it's not a separate storage pool? I'm thinking about just calling AC_PATH_PROG(btrfs) if FS pool is enabled, and using #ifdef BTRFS in the fs code to use btrfs if available. --- a/src/conf/storage_conf.h +++ b/src/conf/storage_conf.h @@ -90,6 +90,7 @@ struct _virStorageVolTarget { virStorageEncryptionPtr encryption; virBitmapPtr features; char *compat; +char *uuid; }; This struct represents the public XML data format - adding random fields to it for individual driver use is not allowed. Ok, I'll put this thing somewhere else. +if (volv-backingStore.path == NULL) { +/* backing store not in the pool, ignore it */ Backing stores for volumes are not required to be in the same pool as the source. For example it is valid to have a qcow2 file backed by a lvm volume. In btrfs's case the note about backing store in an existing subvolume is informational only; all subvolumes, snapshots or not, are independent and in case we can't find a symbolic name for the parent volume I think it's safe to just ignore it. I'll add this note to the comment. I'm thinking it would be nice to have a dedicate file with APIs for btrfs eg src/util/virbtrfs.{h,c} and have it contain things like virBtrFSCreateVolume() virBtrFSCreateVolume() virBtrFS...() and so on, so we don't have all these virCommand calls littering this file. I'm not sure it makes sense to do this until there's another user for btrfs commands in libvirt, but I can do it if you like. I think it'd be more useful to have a module which implements copy-on-write operations for different filesystems (btrfs, zfs, something else?) with a common api. diff --git a/src/util/virstoragefile.h b/src/util/virstoragefile.h index d5effa4..4dc6c81 100644 --- a/src/util/virstoragefile.h +++ b/src/util/virstoragefile.h @@ -46,6 +46,7 @@ enum virStorageFileFormat { VIR_STORAGE_FILE_FAT, VIR_STORAGE_FILE_VHD, VIR_STORAGE_FILE_VDI, +VIR_STORAGE_FILE_VOLUME, This isn't right - volumes already have a type={file,block,dir,volume} and we shouldn't duplicate this in the format attribute too. Hmm.. I don't see such a field in virStorageVolType - am I looking at a wrong enum? diff --git a/tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml b/tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml new file mode 100644 index 000..5a58b56 --- /dev/null +++ b/tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml @@ -0,0 +1,13 @@ +volume + nameclone/name + capacity unit=bytes0/capacity + allocation unit=bytes0/allocation + target +path/lxc/clone/path +format type='volume'/ + /target + backingStore +path/lxc/vanilla/path +format type='volume'/ + /backingStore +/volume Using 'format' in this way is wrong. What we should be doing is exposing the volume 'type' as an attribute eg volume type='volume' ... /volume I suppose that makes sense, but it would make this incompatible with existing volume creation. Right now (with the current patch) I can use existing virsh tooling to run, for example 'virsh vol-create-as default guest-1 0 --format volume --backing-store guest-base' diff --git a/tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml b/tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml new file mode 100644 index 000..75830d3 --- /dev/null +++ b/tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml @@ -0,0 +1,26 @@ +volume + nameclone/name + key(null)/key If you're getting (null) here, then something has gone wrong I'll look into this. Thanks, Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH 0/2] btrfs subvolume management
On Tue, Sep 10, 2013 at 09:56:48PM +0300, Oskari Saarenmaa wrote: Date: Tue, 10 Sep 2013 21:56:48 +0300 From: Oskari Saarenmaa o...@ohmu.fi To: libvir-list@redhat.com Subject: [libvirt] [PATCH 0/2] btrfs subvolume management Moved btrfs subvolume management to storage_backend_fs.c instead of implementing it as a separate pool as suggested by Daniel P. Berrange in https://www.redhat.com/archives/libvir-list/2013-September/msg00316.html Ping, anyone had a chance to review this yet? Thanks, Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] virfile: safezero: fix buffer allocation max size
My previous commit 7dc1d4ab was supposed to change safezero to allocate 1 megabyte at maximum, but had the logic reversed and will allocate 1 megabyte at minimum (and a lot more at maximum.) Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- src/util/virfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/virfile.c b/src/util/virfile.c index f662127..e10de5a 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -1075,7 +1075,7 @@ safezero(int fd, off_t offset, off_t len) /* Split up the write in small chunks so as not to allocate lots of RAM */ remain = len; -bytes = MAX(1024 * 1024, len); +bytes = MIN(1024 * 1024, len); r = VIR_ALLOC_N(buf, bytes); if (r 0) { -- 1.8.3.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] virfile: safezero: fall back to writing block by block if mmap fails
mmap can fail on 32-bit systems if we're trying to zero out a lot of data. Fall back to using block-by-block writing in that case. While we could map smaller blocks it's unlikely that this code is used a lot and its easier to just fall back to one of the existing methods. Also modified the block-by-block zeroing to not allocate a megabyte of zeroes if we're writing less than that. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- src/util/virfile.c | 34 ++ 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/src/util/virfile.c b/src/util/virfile.c index 16f8101..f662127 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -1032,16 +1032,18 @@ safezero(int fd, off_t offset, off_t len) errno = ret; return -1; } + #else -# ifdef HAVE_MMAP int safezero(int fd, off_t offset, off_t len) { -static long pagemask = 0; -off_t map_skip; int r; char *buf; +unsigned long long remain, bytes; +# ifdef HAVE_MMAP +static long pagemask = 0; +off_t map_skip; /* align offset and length, rounding offset down and length up */ if (pagemask == 0) @@ -1057,30 +1059,23 @@ safezero(int fd, off_t offset, off_t len) buf = mmap(NULL, len + map_skip, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset - map_skip); -if (buf == MAP_FAILED) -return -1; +if (buf != MAP_FAILED) { +memset(buf + map_skip, 0, len); +munmap(buf, len + map_skip); -memset(buf + map_skip, 0, len); -munmap(buf, len + map_skip); - -return 0; -} - -# else /* HAVE_MMAP */ +return 0; +} -int -safezero(int fd, off_t offset, off_t len) -{ -int r; -char *buf; -unsigned long long remain, bytes; +/* fall back to writing zeroes using safewrite if mmap fails (for + * example because of virtual memory limits) */ +# endif /* HAVE_MMAP */ if (lseek(fd, offset, SEEK_SET) 0) return -1; /* Split up the write in small chunks so as not to allocate lots of RAM */ remain = len; -bytes = 1024 * 1024; +bytes = MAX(1024 * 1024, len); r = VIR_ALLOC_N(buf, bytes); if (r 0) { @@ -1104,7 +1099,6 @@ safezero(int fd, off_t offset, off_t len) VIR_FREE(buf); return 0; } -# endif /* HAVE_MMAP */ #endif /* HAVE_POSIX_FALLOCATE */ -- 1.8.3.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCHv3] lxc veth interfaces: fix interface name collisions
The previous veth interface naming scheme tried to find the lowest unused index for both the parent and container veth interfaces. That's susceptible to race conditions when multiple containers are started at the same time. Try to pick a random unused interface id for the parent if one wasn't given by the caller and use that as a template for the container interface name. This should prevent races to create two uniquely named interfaces for each container. The caller can still assign the parent interface name manually and that name is used for in container (before the interface is moved to the container namespace and renamed.) Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- My previous two patches for this issue were rejected because of concerns with the naming scheme (in v1) or leaving fixing the race condition to the caller (v2) and I mostly forgot about this issue after implementing a workaround in my application, but yesterday someone else on #virt ran into the same issue and I took another look at my patches. The third iteration of this patch uses random identifiers and makes sure they're not already in use, but still does not retry interface creation on failure. I believe this is good enough as the likelihood of two containers starting up at the same time and coming up with the same random 32-bit identifier should be rather low. This does change the interface names from nice low integers to random larger integers, but I don't see that an issue. And the caller can select any other name they like if that's not acceptable. src/util/virnetdevveth.c | 95 ++-- 1 file changed, 27 insertions(+), 68 deletions(-) diff --git a/src/util/virnetdevveth.c b/src/util/virnetdevveth.c index 039767f..9a5bc63 100644 --- a/src/util/virnetdevveth.c +++ b/src/util/virnetdevveth.c @@ -23,6 +23,7 @@ #include config.h +#include net/if.h #include sys/wait.h #include virnetdevveth.h @@ -33,119 +34,77 @@ #include virfile.h #include virstring.h #include virutil.h +#include virrandom.h #define VIR_FROM_THIS VIR_FROM_NONE /* Functions */ /** - * virNetDevVethGetFreeName: - * @veth: pointer to store returned name for veth device - * @startDev: device number to start at (x in vethx) - * - * Looks in /sys/class/net/ to find the first available veth device - * name. - * - * Returns non-negative device number on success or -1 in case of error - */ -static int virNetDevVethGetFreeName(char **veth, int startDev) -{ -int devNum = startDev-1; -char *path = NULL; - -VIR_DEBUG(Find free from veth%d, startDev); -do { -VIR_FREE(path); -++devNum; -if (virAsprintf(path, /sys/class/net/veth%d/, devNum) 0) -return -1; -VIR_DEBUG(Probe %s, path); -} while (virFileExists(path)); -VIR_FREE(path); - -if (virAsprintf(veth, veth%d, devNum) 0) -return -1; - -return devNum; -} - -/** * virNetDevVethCreate: * @veth1: pointer to name for parent end of veth pair - * @veth2: pointer to return name for container end of veth pair + * @veth2: pointer to name for container end of veth pair * * Creates a veth device pair using the ip command: * ip link add veth1 type veth peer name veth2 - * If veth1 points to NULL on entry, it will be a valid interface on - * return. veth2 should point to NULL on entry. * - * NOTE: If veth1 and veth2 names are not specified, ip will auto assign - * names. There seems to be two problems here - - * 1) There doesn't seem to be a way to determine the names of the - * devices that it creates. They show up in ip link show and - * under /sys/class/net/ however there is no guarantee that they - * are the devices that this process just created. - * 2) Once one of the veth devices is moved to another namespace, it - * is no longer visible in the parent namespace. This seems to - * confuse the name assignment causing it to fail with File exists. - * Because of these issues, this function currently allocates names - * prior to using the ip command, and returns any allocated names - * to the caller. + * If veth1 or veth2 points to NULL on entry, they will be + * a valid interface on return. * * Returns 0 on success or -1 in case of error */ int virNetDevVethCreate(char** veth1, char** veth2) { -int rc = -1; const char *argv[] = { ip, link, add, NULL, type, veth, peer, name, NULL, NULL }; -int vethDev = 0; bool veth1_alloc = false; bool veth2_alloc = false; VIR_DEBUG(Host: %s guest: %s, NULLSTR(*veth1), NULLSTR(*veth2)); if (*veth1 == NULL) { -if ((vethDev = virNetDevVethGetFreeName(veth1, vethDev)) 0) -goto cleanup; +size_t veth_path_max = sizeof(/sys/class/net//) + IF_NAMESIZE; +char *veth1_path; + +if (VIR_ALLOC_N(*veth1, IF_NAMESIZE) 0 || +VIR_ALLOC_N(veth1_path
Re: [libvirt] [PATCHv3] lxc veth interfaces: fix interface name collisions
On 02/10/13 14:30, Daniel P. Berrange wrote: On Wed, Oct 02, 2013 at 11:45:27AM +0300, Oskari Saarenmaa wrote: This does change the interface names from nice low integers to random larger integers, but I don't see that an issue. And the caller can select any other name they like if that's not acceptable. I think having 20 digit NICs names is pretty fugly. It is possible to address the race condition by re-trying creation with new names. I will post patches todo this. I doubt a lot people look at the interface names and care what they are (especially on servers with dozens of virtual interfaces). Also, with the new 'consistent network device naming' my desktop's interface name changed from eth0 to enp0s25 which was a bit annoying at first but makes a lot of sense with multiple devices. I think it would've also made sense to make the host interface names for containers and vms consistent, but with the limited (16 byte) size of the interface name we can't stuff the whole vm name there; a MAC address seemed like a good compromise to me and made the code shorter and simpler. Anyway, enough bikeshedding, I'm happy that the conflicts are getting fixed. Thanks, Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] virfile: safezero: align mmap offset to page size
mmap's offset must be aligned to page size or mapping will fail. mmap-based safezero is only used if posix_fallocate isn't available. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- src/util/virfile.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/util/virfile.c b/src/util/virfile.c index 20ca89f..16f8101 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -1038,9 +1038,16 @@ safezero(int fd, off_t offset, off_t len) int safezero(int fd, off_t offset, off_t len) { +static long pagemask = 0; +off_t map_skip; int r; char *buf; +/* align offset and length, rounding offset down and length up */ +if (pagemask == 0) +pagemask = ~(sysconf(_SC_PAGESIZE) - 1); +map_skip = offset - (offset pagemask); + /* memset wants the mmap'ed file to be present on disk so create a * sparse file */ @@ -1048,12 +1055,13 @@ safezero(int fd, off_t offset, off_t len) if (r 0) return -1; -buf = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); +buf = mmap(NULL, len + map_skip, PROT_READ | PROT_WRITE, MAP_SHARED, + fd, offset - map_skip); if (buf == MAP_FAILED) return -1; -memset(buf, 0, len); -munmap(buf, len); +memset(buf + map_skip, 0, len); +munmap(buf, len + map_skip); return 0; } -- 1.8.3.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] storage: fix file allocation behavior in file cloning
Fixed the safezero call for allocating the rest of the file after cloning an existing volume; it used to always use a zero offset, causing it to only allocate the beginning of the file. Also modified file creation to try to use fallocate(2) to pre-allocate disk space before copying any data to make sure it fails early on if disk is full and makes sure we can skip zero blocks when copying file contents. If fallocate isn't available we will zero out the rest of the file after cloning and only use sparse cloning if client requested a lower allocation than the input volume's capacity. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- the safezero mmap issue fixed in my previous patch never showed up because all safezero calls previously had 0 offset (or maybe everyone's using posix_fallocate) configure.ac | 8 src/storage/storage_backend.c | 35 ++- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index 553015a..220d95b 100644 --- a/configure.ac +++ b/configure.ac @@ -253,10 +253,10 @@ AC_CHECK_SIZEOF([long]) dnl Availability of various common functions (non-fatal if missing), dnl and various less common threadsafe functions -AC_CHECK_FUNCS_ONCE([cfmakeraw geteuid getgid getgrnam_r getmntent_r \ - getpwuid_r getuid kill mmap newlocale posix_fallocate posix_memalign \ - prlimit regexec sched_getaffinity setgroups setns setrlimit symlink \ - sysctlbyname]) +AC_CHECK_FUNCS_ONCE([cfmakeraw fallocate geteuid getgid getgrnam_r \ + getmntent_r getpwuid_r getuid kill mmap newlocale posix_fallocate \ + posix_memalign prlimit regexec sched_getaffinity setgroups setns \ + setrlimit symlink sysctlbyname]) dnl Availability of pthread functions (if missing, win32 threading is dnl assumed). Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE. diff --git a/src/storage/storage_backend.c b/src/storage/storage_backend.c index b7edf85..5f1bc66 100644 --- a/src/storage/storage_backend.c +++ b/src/storage/storage_backend.c @@ -129,7 +129,7 @@ virStorageBackendCopyToFD(virStorageVolDefPtr vol, virStorageVolDefPtr inputvol, int fd, unsigned long long *total, - int is_dest_file) + int want_sparse) { int inputfd = -1; int amtread = -1; @@ -191,7 +191,7 @@ virStorageBackendCopyToFD(virStorageVolDefPtr vol, interval = ((wbytes amtleft) ? amtleft : wbytes); int offset = amtread - amtleft; -if (is_dest_file memcmp(buf+offset, zerobuf, interval) == 0) { +if (want_sparse memcmp(buf+offset, zerobuf, interval) == 0) { if (lseek(fd, interval, SEEK_CUR) 0) { ret = -errno; virReportSystemError(errno, @@ -315,6 +315,7 @@ static int createRawFile(int fd, virStorageVolDefPtr vol, virStorageVolDefPtr inputvol) { +int need_alloc = 1; int ret = 0; unsigned long long remain; @@ -328,17 +329,41 @@ createRawFile(int fd, virStorageVolDefPtr vol, goto cleanup; } +#ifdef HAVE_FALLOCATE +/* Try to preallocate all requested disk space, but fall back to + * other methods if this fails with ENOSYS or EOPNOTSUPP. + * NOTE: do not use posix_fallocate; posix_fallocate falls back + * to writing zeroes block by block in case fallocate isn't + * available, and since we're going to copy data from another + * file it doesn't make sense to write the file twice. */ +if (fallocate(fd, 0, 0, vol-allocation) == 0) { +need_alloc = 0; +} else if (errno != ENOSYS errno != EOPNOTSUPP) { +ret = -errno; +virReportSystemError(errno, + _(cannot allocate %llu bytes in file '%s'), + vol-allocation, vol-target.path); +goto cleanup; +} +#endif + remain = vol-allocation; if (inputvol) { -ret = virStorageBackendCopyToFD(vol, inputvol, fd, remain, 1); +/* allow zero blocks to be skipped if we've requested sparse + * allocation (allocation capacity) or we have already + * been able to allocate the required space. */ +int want_sparse = (need_alloc == 0) || + (vol-allocation inputvol-capacity); + +ret = virStorageBackendCopyToFD(vol, inputvol, fd, remain, want_sparse); if (ret 0) { goto cleanup; } } -if (remain) { -if (safezero(fd, 0, remain) 0) { +if (remain need_alloc) { +if (safezero(fd, vol-allocation - remain, remain) 0) { ret = -errno; virReportSystemError(errno, _(cannot fill file '%s'), vol-target.path); -- 1.8.3.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] storage: use btrfs file clone ioctl when possible
On Fri, Sep 27, 2013 at 03:19:06PM +0100, Daniel P. Berrange wrote: On Fri, Sep 27, 2013 at 05:02:53PM +0300, Oskari Saarenmaa wrote: Btrfs provides a copy-on-write clone ioctl so let's try to use it instead of copying files block by block. The ioctl is executed unconditionally if it's available and we fall back to block copying if it fails, similarly to cp --reflink=auto. Currently the virStorageVolCreateXMLFrom method does a full allocation of storage when cloning volumes. This means applications can rely on the image having enough space when clone completes and won't get ENOSPC in the VM. AFAICT, this change to do copy-on-write changes the API to do thin provisioning of the storage during clone, so any future write on either the new or old volume may generate ENOSPC when btrfs finally copies the sector. I don't think this is a good thing. I think applications should have to explicitly request copy-on-write behaviour for the clone so they know the implications. That's a good point. However, it looks like this change would only change the behavior for the old volumes; new volumes are always created sparsely and they may already get ENOSPC on write if they contained zero blocks. This should probably be fixed by calling fallocate instead of lseek when noticing empty blocks (safezero should probably be used instead, but it's currently rather unsafe if posix_fallocate isn't available.) I was wondering if we could reuse the allocation and capacity fields to decide whether or not to try to do a cow-clone (or sparse allocation of the cloned bits)? Currently a cloned volume's allocation is always set to at least the original volume's capacity and the original client-requested allocation value is not passed on to the code doing the cloning, but we could pass it on and allow copy-on-write clones if allocation is set to zero (no space is guaranteed to be available for writing) and also change sparse cloning to only happen if allocation is lower than capacity. / Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] storage: use btrfs file clone ioctl when possible
Btrfs provides a copy-on-write clone ioctl so let's try to use it instead of copying files block by block. The ioctl is executed unconditionally if it's available and we fall back to block copying if it fails, similarly to cp --reflink=auto. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- configure.ac | 5 + src/storage/storage_backend.c | 11 +++ 2 files changed, 16 insertions(+) diff --git a/configure.ac b/configure.ac index 553015a..acae92e 100644 --- a/configure.ac +++ b/configure.ac @@ -1984,6 +1984,11 @@ fi AM_CONDITIONAL([WITH_STORAGE], [test $with_storage = yes]) dnl +dnl check for headers for filesystem specific operations +dnl +AC_CHECK_HEADERS([linux/btrfs.h]) + +dnl dnl check for (ESX) dnl diff --git a/src/storage/storage_backend.c b/src/storage/storage_backend.c index b7edf85..40bfb73 100644 --- a/src/storage/storage_backend.c +++ b/src/storage/storage_backend.c @@ -38,6 +38,9 @@ # include sys/ioctl.h # include linux/fs.h #endif +#ifdef HAVE_LINUX_BTRFS_H +# include linux/btrfs.h +#endif #if WITH_SELINUX # include selinux/selinux.h @@ -149,6 +152,13 @@ virStorageBackendCopyToFD(virStorageVolDefPtr vol, goto cleanup; } +#ifdef HAVE_LINUX_BTRFS_H +/* try to perform a btrfs CoW clone */ +if (ioctl(fd, BTRFS_IOC_CLONE, inputfd) == 0) { +goto done; +} +#endif + #ifdef __linux__ if (ioctl(fd, BLKBSZGET, wbytes) 0) { wbytes = 0; @@ -210,6 +220,7 @@ virStorageBackendCopyToFD(virStorageVolDefPtr vol, } while ((amtleft -= interval) 0); } +done: if (fdatasync(fd) 0) { ret = -errno; virReportSystemError(errno, _(cannot sync data to file '%s'), -- 1.8.3.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH 2/2] storage: btrfs subvolumes for directory pools
This commit adds support for btrfs subvolumes as directory volumes. The directory storage pool can be used to manage btrfs subvolumes on an existing btrfs filesystem. The driver can create new blank subvolumes and snapshots of existing subvolumes as well as delete existing subvolumes. The subvolumes created are automatically made visible on the host side and can be attached to domains using the filesystem tags as defined in 'format domain' documentation. Subvolumes do not implement quotas at the moment because the current (btrfs-progs-0.20.rc1.20130501git7854c8b-4.fc20.x86_64) support for quota management in btrfs-progs is lacking the necessary features, for example it's not possible to see the quota assigned to a certain subvolume and usage information is only updated on syncfs(2). Quota support will be implemented once the tools gain the necessary features. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- configure.ac | 25 ++- docs/schemas/storagevol.rng | 1 + docs/storage.html.in | 30 ++- libvirt.spec.in | 4 + src/conf/storage_conf.c | 3 + src/conf/storage_conf.h | 1 + src/storage/storage_backend.c | 15 +- src/storage/storage_backend_fs.c | 222 -- src/util/virstoragefile.c | 4 +- src/util/virstoragefile.h | 1 + tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml | 13 ++ tests/storagevolxml2xmlin/vol-btrfs.xml | 9 + tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml | 26 +++ tests/storagevolxml2xmlout/vol-btrfs.xml | 17 ++ tests/storagevolxml2xmltest.c | 2 + 15 files changed, 343 insertions(+), 30 deletions(-) create mode 100644 tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml create mode 100644 tests/storagevolxml2xmlin/vol-btrfs.xml create mode 100644 tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml create mode 100644 tests/storagevolxml2xmlout/vol-btrfs.xml diff --git a/configure.ac b/configure.ac index cdbe6cc..91b0a8d 100644 --- a/configure.ac +++ b/configure.ac @@ -1642,6 +1642,10 @@ AC_ARG_WITH([storage-sheepdog], [AS_HELP_STRING([--with-storage-sheepdog], [with Sheepdog backend for the storage driver @:@default=check@:@])], [],[with_storage_sheepdog=check]) +AC_ARG_WITH([storage-btrfs], + [AS_HELP_STRING([--with-storage-btrfs], +[with Btrfs backend for the storage driver @:@default=check@:@])], + [],[with_storage_btrfs=check]) if test $with_libvirtd = no; then with_storage_dir=no @@ -1854,6 +1858,24 @@ fi AM_CONDITIONAL([WITH_STORAGE_SHEEPDOG], [test $with_storage_sheepdog = yes]) +if test $with_storage_btrfs = yes || test $with_storage_btrfs = check; then + AC_PATH_PROG([BTRFS], [btrfs], [], [$PATH:/sbin:/usr/sbin]) + + if test $with_storage_btrfs = yes ; then +if test -z $BTRFS ; then AC_MSG_ERROR([We need btrfs -command for btrfs storage driver]) ; fi + else +if test -z $BTRFS ; then with_storage_btrfs=no ; fi +if test $with_storage_btrfs = check ; then with_storage_btrfs=yes ; fi + fi + + if test $with_storage_btrfs = yes ; then +AC_DEFINE_UNQUOTED([WITH_STORAGE_BTRFS], 1, [whether Btrfs backend for storage driver is enabled]) +AC_DEFINE_UNQUOTED([BTRFS],[$BTRFS],[Location of btrfs program]) + fi +fi +AM_CONDITIONAL([WITH_STORAGE_BTRFS], [test $with_storage_btrfs = yes]) + + LIBPARTED_CFLAGS= LIBPARTED_LIBS= @@ -1941,7 +1963,7 @@ AC_SUBST([DEVMAPPER_CFLAGS]) AC_SUBST([DEVMAPPER_LIBS]) with_storage=no -for backend in dir fs lvm iscsi scsi mpath rbd disk; do +for backend in dir fs lvm iscsi scsi mpath rbd disk btrfs; do if eval test \$with_storage_$backend = yes; then with_storage=yes break @@ -2663,6 +2685,7 @@ AC_MSG_NOTICE([ mpath: $with_storage_mpath]) AC_MSG_NOTICE([Disk: $with_storage_disk]) AC_MSG_NOTICE([ RBD: $with_storage_rbd]) AC_MSG_NOTICE([Sheepdog: $with_storage_sheepdog]) +AC_MSG_NOTICE([ Btrfs: $with_storage_btrfs]) AC_MSG_NOTICE([]) AC_MSG_NOTICE([Security Drivers]) AC_MSG_NOTICE([]) diff --git a/docs/schemas/storagevol.rng b/docs/schemas/storagevol.rng index 8bc5907..8814973 100644 --- a/docs/schemas/storagevol.rng +++ b/docs/schemas/storagevol.rng @@ -201,6 +201,7 @@ valueqed/value valuevmdk/value valuevpc/value + valuevolume/value /choice /define diff --git a/docs/storage.html.in b/docs/storage.html.in index 1181444..03018ab 100644 --- a/docs/storage.html.in +++ b/docs/storage.html.in @@ -119,12 +119,15 @@ h2a name=StorageBackendDirDirectory pool/a/h2 p A pool with a type of codedir/code provides the means to manage - files within a directory. The files can be fully allocated raw files, - sparsely allocated raw files, or one of the special disk
[libvirt] [PATCH 1/2] virFileFsType: get filesystem type of a given path
This can be used by storage pools to figure out which actions are available on various paths (for example subvolumes when running on btrfs.) Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- src/libvirt_private.syms | 1 + src/util/virfile.c | 47 +++ src/util/virfile.h | 1 + 3 files changed, 49 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 35f0f1b..d0238cf 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1370,6 +1370,7 @@ virFileExists; virFileFclose; virFileFdopen; virFileFindMountPoint; +virFileFsType; virFileHasSuffix; virFileIsAbsPath; virFileIsDir; diff --git a/src/util/virfile.c b/src/util/virfile.c index feac3c9..44871d6 100644 --- a/src/util/virfile.c +++ b/src/util/virfile.c @@ -1133,6 +1133,45 @@ cleanup: return ret; } +/* search /proc/mounts for the filesystem type of the given path; + * return pointer to malloc'ed string of type if found, otherwise + * return NULL. + */ +char * +virFileFsType(const char *path) +{ +FILE *f; +struct mntent mb; +char mntbuf[1024]; +char *real = NULL, *ret = NULL; +size_t lookup_len, longest = 0; + +if ((real = realpath(path, NULL)) == NULL) +return NULL; +lookup_len = strlen(real); + +f = setmntent(/proc/mounts, r); +if (!f) { +VIR_FREE(real); +return NULL; +} + +while (getmntent_r(f, mb, mntbuf, sizeof(mntbuf))) { +size_t mnt_dir_len = strlen(mb.mnt_dir); +if (lookup_len = mnt_dir_len mnt_dir_len = longest) { +if (memcmp(mb.mnt_dir, real, mnt_dir_len) == 0) { +longest = mnt_dir_len; +VIR_FREE(ret); +ignore_value(VIR_STRDUP_QUIET(ret, mb.mnt_type)); +} +} +} +endmntent(f); +VIR_FREE(real); + +return ret; +} + #else /* defined HAVE_MNTENT_H defined HAVE_GETMNTENT_R */ char * @@ -1143,6 +1182,14 @@ virFileFindMountPoint(const char *type ATTRIBUTE_UNUSED) return NULL; } +char * +virFileFsType(const char *path) +{ +errno = ENOSYS; + +return NULL; +} + #endif /* defined HAVE_MNTENT_H defined HAVE_GETMNTENT_R */ int diff --git a/src/util/virfile.h b/src/util/virfile.h index 72d35ce..3c01247 100644 --- a/src/util/virfile.h +++ b/src/util/virfile.h @@ -221,6 +221,7 @@ int virFileOpenTty(int *ttymaster, int rawmode); char *virFileFindMountPoint(const char *type); +char *virFileFsType(const char *path); void virFileWaitForDevices(void); -- 1.8.3.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH 0/2] btrfs subvolume management
Moved btrfs subvolume management to storage_backend_fs.c instead of implementing it as a separate pool as suggested by Daniel P. Berrange in https://www.redhat.com/archives/libvir-list/2013-September/msg00316.html Oskari Saarenmaa (2): virFileFsType: get filesystem type of a given path storage: btrfs subvolumes for directory pools configure.ac | 25 ++- docs/schemas/storagevol.rng | 1 + docs/storage.html.in | 30 ++- libvirt.spec.in | 4 + src/conf/storage_conf.c | 3 + src/conf/storage_conf.h | 1 + src/libvirt_private.syms | 1 + src/storage/storage_backend.c | 15 +- src/storage/storage_backend_fs.c | 222 -- src/util/virfile.c| 47 + src/util/virfile.h| 1 + src/util/virstoragefile.c | 4 +- src/util/virstoragefile.h | 1 + tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml | 13 ++ tests/storagevolxml2xmlin/vol-btrfs.xml | 9 + tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml | 26 +++ tests/storagevolxml2xmlout/vol-btrfs.xml | 17 ++ tests/storagevolxml2xmltest.c | 2 + 18 files changed, 392 insertions(+), 30 deletions(-) -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] docs, comments: minor typo fixes
Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- 84 files changed, 97 insertions(+), 97 deletions(-) diff --git a/ChangeLog-old b/ChangeLog-old index b5d44d5..e07b11c 100644 --- a/ChangeLog-old +++ b/ChangeLog-old @@ -5504,7 +5504,7 @@ Tue Nov 11 15:51:42 GMT 2008 Daniel P. Berrange berra...@redhat.com Mon Nov 10 12:05:42 GMT 2008 Daniel P. Berrange berra...@redhat.com - * src/openvz_conf.c: Read filesytem template name from config + * src/openvz_conf.c: Read filesystem template name from config files. Increase buffer size when parsing vzctl version number Thu Nov 6 20:45:42 CET 2008 Jim Meyering meyer...@redhat.com diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index f8bfe0b..d656836 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -254,7 +254,7 @@ to the (optional) ramdisk image in the host OS./dd dtcodecmdline/code/dt ddThe contents of this element specify arguments to be passed to -the kernel (or installer) at boottime. This is often used to +the kernel (or installer) at boot time. This is often used to specify an alternate primary console (eg serial port), or the installation media source / kickstart file/dd dtcodedtb/code/dt @@ -426,7 +426,7 @@ process and virtual CPUs can be specified separately by codecputune/code. If attribute codeemulatorpin/code of codecputune/code is specified, codecpuset/code -specified by codevcpu/code here will be ingored; Similarly, +specified by codevcpu/code here will be ignored; Similarly, For virtual CPUs which has codevcpupin/code specified, codecpuset/code specified by codecpuset/code here will be ignored; For virtual CPUs which doesn't have @@ -1235,7 +1235,7 @@ /tr tr tdrelaxed/td - tdRelax contstraints on timers/td + tdRelax constraints on timers/td td on, off/td tdspan class=since1.0.0 (QEMU only)/span/td /tr @@ -1358,7 +1358,7 @@ dd p The codetickpolicy/code attribute determines what -happens whens QEMU misses a deadline for injecting a +happens when QEMU misses a deadline for injecting a tick to the guest: /p dl @@ -2268,7 +2268,7 @@ dtcodereadonly/code/dt dd -Enables exporting filesytem as a readonly mount for guest, by +Enables exporting filesystem as a readonly mount for guest, by default read-write access is given (currently only works for QEMU/KVM driver). /dd @@ -2385,7 +2385,7 @@ h4a name=elementsControllersControllers/a/h4 p - Depending on the guest architecture, some device busses can + Depending on the guest architecture, some device buses can appear more than once, with a group of virtual devices tied to a virtual controller. Normally, libvirt can automatically infer such controllers without requiring explicit XML markup, but sometimes @@ -3773,7 +3773,7 @@ qemu-kvm -net nic,model=? /dev/null span class=sinceSince 1.1.0./span This uses the optional codenativeMode/code attribute on the codelt;taggt;/code element: codenativeMode/code may be set to 'tagged' or - 'untagged'. The id atribute of the element sets the native vlan. + 'untagged'. The id attribute of the element sets the native vlan. /p h5a name=elementLinkModifying virtual link state/a/h5 @@ -4134,7 +4134,7 @@ qemu-kvm -net nic,model=? /dev/null 9216, and codeheads/code with value 1. By default, the first video device in domain xml is the primary one, but the optional attribute codeprimary/code (span class=sincesince 1.0.2/span) -with value 'yes' can be used to mark the primary in cases of mutiple +with value 'yes' can be used to mark the primary in cases of multiple video device. The non-primary must be type of qxl. The optional attribute coderam/code (span class=sincesince 1.0.2/span) is allowed for qxl type only and specifies diff --git a/docs/formatnetwork.html.in b/docs/formatnetwork.html.in index 36c381a..63600b3 100644 --- a/docs/formatnetwork.html.in +++ b/docs/formatnetwork.html.in @@ -478,7 +478,7 @@ span class=sinceSince 1.1.0/span. This uses the optional codenativeMode/code attribute on the codelt;taggt;/code element: codenativeMode/code may be set to 'tagged' or - 'untagged'. The id atribute of the element sets the native vlan. + 'untagged'. The id attribute of the element sets the native vlan. /p p codelt;vlangt;/code elements can also be specified in @@ -591,7 +591,7 @@ This particular route would *not* be preferred if there was another existing rout on the system with the same address and prefix but with a lower value
[libvirt] [PATCH] storage: new backend: btrfs subvolumes
This commit adds a new storage pool driver, btrfs, which can be used to manage btrfs subvolumes on an existing btrfs filesystem. The driver can create new blank subvolumes and snapshots of existing subvolumes as well as delete existing subvolumes. The subvolumes created are automatically made visible on the host side and can be attached to domains using the filesystem tags as defined in 'format domain' documentation. Libvirt volumes do not implement quotas at the moment because the current (btrfs-progs-0.20.rc1.20130501git7854c8b-4.fc20.x86_64) support for quota management in btrfs-progs is lacking the necessary features, for example it's not possible to see the quota assigned to a certain subvolume and usage information is only updated on syncfs(2). Quota support will be implemented once the tools gain the necessary features. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- configure.ac | 23 +- docs/drivers.html.in | 1 + docs/schemas/storagepool.rng | 11 + docs/storage.html.in | 60 + include/libvirt/libvirt.h.in | 1 + libvirt.spec.in | 2 + src/Makefile.am | 8 + src/conf/storage_conf.c | 12 +- src/conf/storage_conf.h | 1 + src/storage/storage_backend.c | 6 + src/storage/storage_backend_btrfs.c | 284 ++ src/storage/storage_backend_btrfs.h | 30 +++ tests/storagepoolxml2xmlin/pool-btrfs.xml | 7 + tests/storagepoolxml2xmlout/pool-btrfs.xml| 17 ++ tests/storagepoolxml2xmltest.c| 1 + tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml | 11 + tests/storagevolxml2xmlin/vol-btrfs.xml | 8 + tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml | 26 ++ tests/storagevolxml2xmlout/vol-btrfs.xml | 17 ++ tests/storagevolxml2xmltest.c | 2 + 20 files changed, 525 insertions(+), 3 deletions(-) create mode 100644 src/storage/storage_backend_btrfs.c create mode 100644 src/storage/storage_backend_btrfs.h create mode 100644 tests/storagepoolxml2xmlin/pool-btrfs.xml create mode 100644 tests/storagepoolxml2xmlout/pool-btrfs.xml create mode 100644 tests/storagevolxml2xmlin/vol-btrfs-snapshot.xml create mode 100644 tests/storagevolxml2xmlin/vol-btrfs.xml create mode 100644 tests/storagevolxml2xmlout/vol-btrfs-snapshot.xml create mode 100644 tests/storagevolxml2xmlout/vol-btrfs.xml diff --git a/configure.ac b/configure.ac index f853e03..100c87d 100644 --- a/configure.ac +++ b/configure.ac @@ -1562,6 +1562,8 @@ AC_ARG_WITH([storage-rbd], AC_HELP_STRING([--with-storage-rbd], [with RADOS Block Device backend for the storage driver @:@default=check@:@]),[],[with_storage_rbd=check]) AC_ARG_WITH([storage-sheepdog], AC_HELP_STRING([--with-storage-sheepdog], [with Sheepdog backend for the storage driver @:@default=check@:@]),[],[with_storage_sheepdog=check]) +AC_ARG_WITH([storage-btrfs], + AC_HELP_STRING([--with-storage-btrfs], [with Btrfs backend for the storage driver @:@default=check@:@]),[],[with_storage_btrfs=check]) if test $with_libvirtd = no; then with_storage_dir=no @@ -1774,6 +1776,24 @@ fi AM_CONDITIONAL([WITH_STORAGE_SHEEPDOG], [test $with_storage_sheepdog = yes]) +if test $with_storage_btrfs = yes || test $with_storage_btrfs = check; then + AC_PATH_PROG([BTRFS], [btrfs], [], [$PATH:/sbin:/usr/sbin]) + + if test $with_storage_btrfs = yes ; then +if test -z $BTRFS ; then AC_MSG_ERROR([We need btrfs -command for btrfs storage driver]) ; fi + else +if test -z $BTRFS ; then with_storage_btrfs=no ; fi +if test $with_storage_btrfs = check ; then with_storage_btrfs=yes ; fi + fi + + if test $with_storage_btrfs = yes ; then +AC_DEFINE_UNQUOTED([WITH_STORAGE_BTRFS], 1, [whether Btrfs backend for storage driver is enabled]) +AC_DEFINE_UNQUOTED([BTRFS],[$BTRFS],[Location of btrfs program]) + fi +fi +AM_CONDITIONAL([WITH_STORAGE_BTRFS], [test $with_storage_btrfs = yes]) + + LIBPARTED_CFLAGS= LIBPARTED_LIBS= @@ -1861,7 +1881,7 @@ AC_SUBST([DEVMAPPER_CFLAGS]) AC_SUBST([DEVMAPPER_LIBS]) with_storage=no -for backend in dir fs lvm iscsi scsi mpath rbd disk; do +for backend in dir fs lvm iscsi scsi mpath rbd disk btrfs; do if eval test \$with_storage_$backend = yes; then with_storage=yes break @@ -2573,6 +2593,7 @@ AC_MSG_NOTICE([ mpath: $with_storage_mpath]) AC_MSG_NOTICE([Disk: $with_storage_disk]) AC_MSG_NOTICE([ RBD: $with_storage_rbd]) AC_MSG_NOTICE([Sheepdog: $with_storage_sheepdog]) +AC_MSG_NOTICE([ Btrfs: $with_storage_btrfs]) AC_MSG_NOTICE([]) AC_MSG_NOTICE([Security Drivers]) AC_MSG_NOTICE([]) diff --git a/docs/drivers.html.in b/docs/drivers.html.in index 7aa44f3..d9eb6d7 100644
Re: [libvirt] [PATCH] virNetDevVethCreate: assign names based on mac address by default
On Thu, Aug 29, 2013 at 10:10:36AM +0200, Michal Privoznik wrote: On 28.08.2013 23:05, Oskari Saarenmaa wrote: Interface names do not have to be numerical (or veth + number) and trying to assign them to that format is susceptible to race conditions. Instead, assign the parent interface name according to the mac address (the last three bytes) if no name was given by the caller and use the parent interface name plus 'p' for the container name. I don't think it is a good idea. What it user defines MAC addresses in a way that the last three bytes are the same? E.g. 00:11:22:33:44:55 00:22:11:33:44:55 (there are plenty of possibilities). With your code we would create veth334455 for the first domain and there's nothing left for the other one but eyes to cry. Sure, it's possible for the user to assign addresses like this, but I believe most mac addresses are assigned randomly, that's what libvirt does by default, no? Also, it's possible for the user to override the interface name if they want to. Currently it's only possible to set the first interface name, but there's no way to set the name of the second interface which causes failures because of races. In any case it makes sense to set the second interface's name based on the first interface's name so we don't have to try to come up with two different unique names. Moreover, there's no race now, as we use global lock to mutually exclude call to virNetDevVethCreate. Although, I must admit it's only within a single driver, I think. So if there are two domains starting (one in qemu driver the other one in lxc, for instance) there can be race. But this should be solved in a different way then. The virNetDevVethCreate should be turned into virObjectLockable and a veth should be allocated by calling a method. I ran into this issue repeatedly when I tried to start multiple LXC domains simultaneously from my application (https://github.com/melor/poni) which runs libvirt operations in their own threads. / Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] virNetDevVethCreate: assign names based on mac address by default
29.08.2013 11:36, Gao feng kirjoitti: On 08/29/2013 04:20 PM, Oskari Saarenmaa wrote: On Thu, Aug 29, 2013 at 10:10:36AM +0200, Michal Privoznik wrote: On 28.08.2013 23:05, Oskari Saarenmaa wrote: Interface names do not have to be numerical (or veth + number) and trying to assign them to that format is susceptible to race conditions. Instead, assign the parent interface name according to the mac address (the last three bytes) if no name was given by the caller and use the parent interface name plus 'p' for the container name. I don't think it is a good idea. What it user defines MAC addresses in a way that the last three bytes are the same? E.g. 00:11:22:33:44:55 00:22:11:33:44:55 (there are plenty of possibilities). With your code we would create veth334455 for the first domain and there's nothing left for the other one but eyes to cry. Sure, it's possible for the user to assign addresses like this, but I believe most mac addresses are assigned randomly, that's what libvirt does by default, no? Also, it's possible for the user to override the interface name if they want to. Currently it's only possible to set the first interface name, but there's no way to set the name of the second interface which causes failures because of races. In any case it makes sense to set the second interface's name based on the first interface's name so we don't have to try to come up with two different unique names. Moreover, there's no race now, as we use global lock to mutually exclude call to virNetDevVethCreate. Although, I must admit it's only within a single driver, I think. So if there are two domains starting (one in qemu driver the other one in lxc, for instance) there can be race. But this should be solved in a different way then. The virNetDevVethCreate should be turned into virObjectLockable and a veth should be allocated by calling a method. I ran into this issue repeatedly when I tried to start multiple LXC domains simultaneously from my application (https://github.com/melor/poni) which runs libvirt operations in their own threads. It's easy to be resolved. give the parentVeth and containerVeth a name by default. At the moment virLXCProcessSetupInterfaceBridged always passes a NULL for the container interface name and while it would be possible to make it configurable like the parent interface name, I believe it makes more sense to just do the right thing by default. The suggested patch also greatly simplifies name selection by removing the loops trying to find a supposedly unused interface name. If you don't like using mac address in the interface name we could just replace it with a random string with a loop checking that the selected name wasn't in use when it was selected. The current approach of trying to use a name with a low number just doesn't work in an environment where another process does the same thing. / Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCH] virNetDevVethCreate: assign names based on mac address by default
29.08.2013 12:11, Gao feng kirjoitti: The suggested patch also greatly simplifies name selection by removing the loops trying to find a supposedly unused interface name. If you don't like using mac address in the interface name we could just replace it with a random string with a loop checking that the selected name wasn't in use when it was selected. The current approach of trying to use a name with a low number just doesn't work in an environment where another process does the same thing. Use mac address to create an unique name is not a good idea, I suggest you pass an index to virLXCProcessSetupInterfaceBridged. use this index and domain_name to create an unique name. Such as, for the first net interface for domain fedora19 parentVeth's name is fedora19-host-1, containerVeth is fedora19-container-1 for the second interface, parentVeth's name is fedora19-host-2, containerVeth is fedora19-container-2 ... find out a proper name for these veth device :) Interface names are limited to 16 characters, so we can't really use the full domain name there and I suppose it makes sense to prefix them with veth. That leaves 12 characters for the which is enough for a random name that will effectively never collide with anything else, but I'd still prefer naming them according to the mac address. The randomly generated address with 3 random bytes is unique enough to be used as a mac address in a network - it should be unique enough to be used as an interface name on a single host (and you can still override it in domain configuration if you want to.) / Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCHv2] virNetDevVethCreate: assign container if name based on parent if name
Replace the loop trying to find a free veth interface name for the container by assigning the container if name to parent name + 'p' by default. Interface name selection logic is susceptible to race conditions, so try to select just one name by default and use that as a template for the second name. The parent name can also be overriden in domain configuration. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- v2: generate first name as before (if it wasn't given by the caller), and use the parent if name as a template for the container if name so that the caller doesn't have to select two names (which is not possible at the moment.) src/util/virnetdevveth.c | 12 +++- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/util/virnetdevveth.c b/src/util/virnetdevveth.c index 039767f..91e2829 100644 --- a/src/util/virnetdevveth.c +++ b/src/util/virnetdevveth.c @@ -114,20 +114,14 @@ int virNetDevVethCreate(char** veth1, char** veth2) } argv[3] = *veth1; -while (*veth2 == NULL) { -if ((vethDev = virNetDevVethGetFreeName(veth2, vethDev)) 0) { +if (*veth2 == NULL) { +/* Append a 'p' to veth1 if name */ +if (virAsprintf(veth2, %sp, *veth1) 0) { if (veth1_alloc) VIR_FREE(*veth1); goto cleanup; } -/* Just make sure they didn't accidentally get same name */ -if (STREQ(*veth1, *veth2)) { -vethDev++; -VIR_FREE(*veth2); -continue; -} - VIR_DEBUG(Assigned guest: %s, *veth2); veth2_alloc = true; } -- 1.8.3.1 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
Re: [libvirt] [PATCHv2] virNetDevVethCreate: assign container if name based on parent if name
On Thu, Aug 29, 2013 at 11:28:43AM +0100, Daniel P. Berrange wrote: On Thu, Aug 29, 2013 at 01:00:15PM +0300, Oskari Saarenmaa wrote: Replace the loop trying to find a free veth interface name for the container by assigning the container if name to parent name + 'p' by default. Interface name selection logic is susceptible to race conditions, so try to select just one name by default and use that as a template for the second name. The parent name can also be overriden in domain configuration. This doesn't do anything to solve the race condition AFAICT. You still have the window between finding a free name, and calling the ip command to allocate it. That's true, but I think this change makes sense as a standalone change for now; it makes sure libvirt uses the caller assigned names (if any) which let's the caller work around this by selecting the name they like. What we need here is a change that will catch the failure from the ip command, and then go back to the start and find free names and retry the ip command. I don't like this approach. It would require us to parse ip's stderr which would be quite a bit of fragile code to handle a case that shouldn't ever happen. We should just select a unique interface name in the first place, I don't see any benefits in trying to force interface names to use low integers in their names. / Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] RFC: btrfs storage pool using subvolumes snapshots
I wrote an experimental btrfs storage pool which uses subvolumes (and optionally snapshots) as storage volumes in LXC domains. The code is available at https://github.com/saaros/libvirt/compare/btrfs-storage but it's still missing some features like quotas for the subvolumes (currently the capacity definition for volumes is ignored) and doesn't have any documentation so far. Sample usage: mkdir /virtual; mkfs.btrfs /dev/vdb; mount -t btrfs /dev/vdb /virtual virsh pool-create-as testpool btrfs --target /virtual virsh vol-create-as testpool vanilla 0 echo vanilla /virtual/vanilla/test virsh vol-create-as testpool test 0 --backing-vol vanilla cat /virtual/test/test btrfs subvolume list /virtual Does this look like a useful feature and does it make sense to implement it as a new storage pool type, or should it be merged into an existing one? I looked at the existing ones and couldn't really figure out how to make it fit nicely in any of them. As far as I could tell none of the existing storage pools or volumes offered a way to create a new copy-on-write volume for easy use in LXC domains using the libvirt API. With the new btrfs pool I was able to replace KVM domains using qcow2 volumes with LXC + btrfs with very little changes to the application code. Cheers, Oskari -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list
[libvirt] [PATCH] virNetDevVethCreate: assign names based on mac address by default
Interface names do not have to be numerical (or veth + number) and trying to assign them to that format is susceptible to race conditions. Instead, assign the parent interface name according to the mac address (the last three bytes) if no name was given by the caller and use the parent interface name plus 'p' for the container name. Signed-off-by: Oskari Saarenmaa o...@ohmu.fi --- src/lxc/lxc_process.c| 2 +- src/util/virnetdevveth.c | 81 +--- src/util/virnetdevveth.h | 6 ++-- 3 files changed, 19 insertions(+), 70 deletions(-) diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 4835bd5..956bbdb 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -243,7 +243,7 @@ char *virLXCProcessSetupInterfaceBridged(virConnectPtr conn, VIR_DEBUG(calling vethCreate()); parentVeth = net-ifname; -if (virNetDevVethCreate(parentVeth, containerVeth) 0) +if (virNetDevVethCreate(parentVeth, containerVeth, net-mac) 0) goto cleanup; VIR_DEBUG(parentVeth: %s, containerVeth: %s, parentVeth, containerVeth); diff --git a/src/util/virnetdevveth.c b/src/util/virnetdevveth.c index 039767f..c0e3e93 100644 --- a/src/util/virnetdevveth.c +++ b/src/util/virnetdevveth.c @@ -38,96 +38,46 @@ /* Functions */ /** - * virNetDevVethGetFreeName: - * @veth: pointer to store returned name for veth device - * @startDev: device number to start at (x in vethx) - * - * Looks in /sys/class/net/ to find the first available veth device - * name. - * - * Returns non-negative device number on success or -1 in case of error - */ -static int virNetDevVethGetFreeName(char **veth, int startDev) -{ -int devNum = startDev-1; -char *path = NULL; - -VIR_DEBUG(Find free from veth%d, startDev); -do { -VIR_FREE(path); -++devNum; -if (virAsprintf(path, /sys/class/net/veth%d/, devNum) 0) -return -1; -VIR_DEBUG(Probe %s, path); -} while (virFileExists(path)); -VIR_FREE(path); - -if (virAsprintf(veth, veth%d, devNum) 0) -return -1; - -return devNum; -} - -/** * virNetDevVethCreate: * @veth1: pointer to name for parent end of veth pair * @veth2: pointer to return name for container end of veth pair + * @mac: mac address of the device * * Creates a veth device pair using the ip command: * ip link add veth1 type veth peer name veth2 - * If veth1 points to NULL on entry, it will be a valid interface on - * return. veth2 should point to NULL on entry. * - * NOTE: If veth1 and veth2 names are not specified, ip will auto assign - * names. There seems to be two problems here - - * 1) There doesn't seem to be a way to determine the names of the - * devices that it creates. They show up in ip link show and - * under /sys/class/net/ however there is no guarantee that they - * are the devices that this process just created. - * 2) Once one of the veth devices is moved to another namespace, it - * is no longer visible in the parent namespace. This seems to - * confuse the name assignment causing it to fail with File exists. - * Because of these issues, this function currently allocates names - * prior to using the ip command, and returns any allocated names - * to the caller. + * If veth1 or veth2 points to NULL on entry, they will be a valid interface + * on return. The name is generated based on the mac address given. * * Returns 0 on success or -1 in case of error */ -int virNetDevVethCreate(char** veth1, char** veth2) +int virNetDevVethCreate(char** veth1, char** veth2, const virMacAddrPtr mac) { -int rc = -1; const char *argv[] = { ip, link, add, NULL, type, veth, peer, name, NULL, NULL }; -int vethDev = 0; bool veth1_alloc = false; bool veth2_alloc = false; VIR_DEBUG(Host: %s guest: %s, NULLSTR(*veth1), NULLSTR(*veth2)); if (*veth1 == NULL) { -if ((vethDev = virNetDevVethGetFreeName(veth1, vethDev)) 0) -goto cleanup; +/* Use the last three bytes of the mac address as our if name */ +if (virAsprintf(veth1, veth%02x%02x%02x, +mac-addr[3], mac-addr[4], mac-addr[5]) 0) +return -1; VIR_DEBUG(Assigned host: %s, *veth1); veth1_alloc = true; -vethDev++; } argv[3] = *veth1; -while (*veth2 == NULL) { -if ((vethDev = virNetDevVethGetFreeName(veth2, vethDev)) 0) { +if (*veth2 == NULL) { +/* Append a 'p' to veth1 if name */ +if (virAsprintf(veth2, %sp, *veth1) 0) { if (veth1_alloc) VIR_FREE(*veth1); -goto cleanup; -} - -/* Just make sure they didn't accidentally get same name */ -if (STREQ(*veth1, *veth2)) { -vethDev++; -VIR_FREE(*veth2); -continue; +return -1
[libvirt] [PATCHv3] Add unsafe cache mode support for disk driver
QEMU 0.13 introduced cache=unsafe for -drive, this patch exposes it in the libvirt layer. * Introduced a new QEMU capability flag ($prefix_CACHE_UNSAFE), as even if $prefix_CACHE_V2 is set, we can't known if unsafe is supported. * Improved the reliability of qemu cache type detection. --- Updated patch based on Eric Blake's comments and rebased it to c4111bd0 docs/formatdomain.html.in | 13 +-- docs/schemas/domaincommon.rng |1 + src/conf/domain_conf.c |3 +- src/conf/domain_conf.h |1 + src/qemu/qemu_capabilities.c | 14 ++-- src/qemu/qemu_capabilities.h |2 + src/qemu/qemu_command.c| 14 +++- tests/qemuargv2xmltest.c |1 + tests/qemuhelptest.c |3 ++ .../qemuxml2argv-disk-drive-cache-unsafe.args |5 +++ .../qemuxml2argv-disk-drive-cache-unsafe.xml | 33 tests/qemuxml2argvtest.c |3 ++ tools/virsh.pod|4 +- 13 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-drive-cache-unsafe.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-drive-cache-unsafe.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 0a7abaf..0628d1c 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -996,10 +996,15 @@ li The optional codecache/code attribute controls the cache mechanism, possible values are default, none, -writethrough, writeback, and directsync. directsync -is like writethrough, but it bypasses the host page -cache. -span class=sinceSince 0.6.0/span +writethrough, writeback, directsync (like +writethrough, but it bypasses the host page cache) and +unsafe (host may cache all disk io and sync requests from +guest are ignored). +span class=since + Since 0.6.0, + directsync since 0.9.5, + unsafe since 0.9.7 +/span /li li The optional codeerror_policy/code attribute controls diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d0da41c..be98be0 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -848,6 +848,7 @@ valuewriteback/value valuewritethrough/value valuedirectsync/value +valueunsafe/value /choice /attribute /define diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 7463d7c..a918679 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -164,7 +164,8 @@ VIR_ENUM_IMPL(virDomainDiskCache, VIR_DOMAIN_DISK_CACHE_LAST, none, writethrough, writeback, - directsync) + directsync, + unsafe) VIR_ENUM_IMPL(virDomainDiskErrorPolicy, VIR_DOMAIN_DISK_ERROR_POLICY_LAST, default, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 371f270..86b4c79 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -192,6 +192,7 @@ enum virDomainDiskCache { VIR_DOMAIN_DISK_CACHE_WRITETHRU, VIR_DOMAIN_DISK_CACHE_WRITEBACK, VIR_DOMAIN_DISK_CACHE_DIRECTSYNC, +VIR_DOMAIN_DISK_CACHE_UNSAFE, VIR_DOMAIN_DISK_CACHE_LAST }; diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 850d46e..8e20e3f 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -137,6 +137,8 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST, usb-redir, usb-hub, no-shutdown, + + cache-unsafe, /* 75 */ ); struct qemu_feature_flags { @@ -912,12 +914,16 @@ qemuCapsComputeCmdFlags(const char *help, else if (strstr(help, -domid)) qemuCapsSet(flags, QEMU_CAPS_DOMID); if (strstr(help, -drive)) { +const char *cache = strstr(help, cache=); + qemuCapsSet(flags, QEMU_CAPS_DRIVE); -if (strstr(help, cache=) -!strstr(help, cache=on|off)) { -qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_V2); -if (strstr(help, directsync)) +if (cache (p = strchr(cache, ']'))) { +if (memmem(cache, p - cache, on|off, sizeof(on|off) - 1) == NULL) +qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_V2); +if (memmem(cache, p - cache, directsync, sizeof(directsync) - 1)) qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_DIRECTSYNC); +if (memmem(cache, p - cache, unsafe, sizeof(unsafe) - 1)) +qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_UNSAFE);
[libvirt] [PATCHv2] Add unsafe cache mode support for disk driver
QEMU 0.13 introduced cache=unsafe for -drive, this patch exposes it in the libvirt layer. * Introduced a new QEMU capability flag ($prefix_CACHE_UNSAFE), as even if $prefix_CACHE_V2 is set, we can't known if unsafe is supported. * qemuhelptest prints test case name on failure. --- Updated patch based on Osier Yang's comments and rebased it to 3abadf82 docs/formatdomain.html.in |7 ++-- docs/schemas/domaincommon.rng |1 + src/conf/domain_conf.c |3 +- src/conf/domain_conf.h |1 + src/qemu/qemu_capabilities.c |4 ++ src/qemu/qemu_capabilities.h |2 + src/qemu/qemu_command.c| 14 +++- tests/qemuargv2xmltest.c |1 + tests/qemuhelptest.c | 19 ++- .../qemuxml2argv-disk-drive-cache-unsafe.args |5 +++ .../qemuxml2argv-disk-drive-cache-unsafe.xml | 33 tests/qemuxml2argvtest.c |3 ++ tools/virsh.pod|4 +- 13 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-drive-cache-unsafe.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-drive-cache-unsafe.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 0a7abaf..8087327 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -996,9 +996,10 @@ li The optional codecache/code attribute controls the cache mechanism, possible values are default, none, -writethrough, writeback, and directsync. directsync -is like writethrough, but it bypasses the host page -cache. +writethrough, writeback, directsync (like +writethrough, but it bypasses the host page cache) and +unsafe (host may cache all disk io and sync requests from +guest are ignored). span class=sinceSince 0.6.0/span /li li diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d0da41c..be98be0 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -848,6 +848,7 @@ valuewriteback/value valuewritethrough/value valuedirectsync/value +valueunsafe/value /choice /attribute /define diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index eebcba0..f38c1d8 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -164,7 +164,8 @@ VIR_ENUM_IMPL(virDomainDiskCache, VIR_DOMAIN_DISK_CACHE_LAST, none, writethrough, writeback, - directsync) + directsync, + unsafe) VIR_ENUM_IMPL(virDomainDiskErrorPolicy, VIR_DOMAIN_DISK_ERROR_POLICY_LAST, default, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 371f270..86b4c79 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -192,6 +192,7 @@ enum virDomainDiskCache { VIR_DOMAIN_DISK_CACHE_WRITETHRU, VIR_DOMAIN_DISK_CACHE_WRITEBACK, VIR_DOMAIN_DISK_CACHE_DIRECTSYNC, +VIR_DOMAIN_DISK_CACHE_UNSAFE, VIR_DOMAIN_DISK_CACHE_LAST }; diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 850d46e..fcb9c8b 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -137,6 +137,8 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST, usb-redir, usb-hub, no-shutdown, + + cache-unsafe, /* 75 */ ); struct qemu_feature_flags { @@ -918,6 +920,8 @@ qemuCapsComputeCmdFlags(const char *help, qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_V2); if (strstr(help, directsync)) qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_DIRECTSYNC); +if (strstr(help, |unsafe)) +qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_UNSAFE); } if (strstr(help, format=)) qemuCapsSet(flags, QEMU_CAPS_DRIVE_FORMAT); diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 74d3ab2..ae3de90 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -112,6 +112,8 @@ enum qemuCapsFlags { QEMU_CAPS_USB_HUB = 73, /* -device usb-hub */ QEMU_CAPS_NO_SHUTDOWN = 74, /* usable -no-shutdown */ +QEMU_CAPS_DRIVE_CACHE_UNSAFE = 75, /* Is cache=unsafe supported? */ + QEMU_CAPS_LAST, /* this must always be the last item */ }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index 0adc56a..9174a5f 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -66,14 +66,16 @@ VIR_ENUM_IMPL(qemuDiskCacheV1,
[libvirt] [PATCH] Add unsafe cache mode support for disk driver
QEMU 0.13 introduced cache=unsafe for -drive, this patch exposes it in the libvirt layer. * Introduced a new QEMU capability flag ($prefix_CACHE_UNSAFE), as even if $prefix_CACHE_V2 is set, we can't known if unsafe is supported. * qemuhelptest prints test case name on failure. --- docs/formatdomain.html.in |7 ++-- docs/schemas/domaincommon.rng |1 + src/conf/domain_conf.c |3 +- src/conf/domain_conf.h |1 + src/qemu/qemu_capabilities.c |3 ++ src/qemu/qemu_capabilities.h |1 + src/qemu/qemu_command.c| 15 - tests/qemuargv2xmltest.c |1 + tests/qemuhelptest.c | 19 ++- .../qemuxml2argv-disk-drive-cache-unsafe.args |5 +++ .../qemuxml2argv-disk-drive-cache-unsafe.xml | 33 tests/qemuxml2argvtest.c |3 ++ tools/virsh.pod|4 +- 13 files changed, 80 insertions(+), 16 deletions(-) create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-drive-cache-unsafe.args create mode 100644 tests/qemuxml2argvdata/qemuxml2argv-disk-drive-cache-unsafe.xml diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 0a7abaf..8087327 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -996,9 +996,10 @@ li The optional codecache/code attribute controls the cache mechanism, possible values are default, none, -writethrough, writeback, and directsync. directsync -is like writethrough, but it bypasses the host page -cache. +writethrough, writeback, directsync (like +writethrough, but it bypasses the host page cache) and +unsafe (host may cache all disk io and sync requests from +guest are ignored). span class=sinceSince 0.6.0/span /li li diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index d0da41c..be98be0 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -848,6 +848,7 @@ valuewriteback/value valuewritethrough/value valuedirectsync/value +valueunsafe/value /choice /attribute /define diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 7476447..21f4c6b 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -164,7 +164,8 @@ VIR_ENUM_IMPL(virDomainDiskCache, VIR_DOMAIN_DISK_CACHE_LAST, none, writethrough, writeback, - directsync) + directsync, + unsafe) VIR_ENUM_IMPL(virDomainDiskErrorPolicy, VIR_DOMAIN_DISK_ERROR_POLICY_LAST, default, diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 371f270..86b4c79 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -192,6 +192,7 @@ enum virDomainDiskCache { VIR_DOMAIN_DISK_CACHE_WRITETHRU, VIR_DOMAIN_DISK_CACHE_WRITEBACK, VIR_DOMAIN_DISK_CACHE_DIRECTSYNC, +VIR_DOMAIN_DISK_CACHE_UNSAFE, VIR_DOMAIN_DISK_CACHE_LAST }; diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c index 36f47a9..13f1aea 100644 --- a/src/qemu/qemu_capabilities.c +++ b/src/qemu/qemu_capabilities.c @@ -136,6 +136,7 @@ VIR_ENUM_IMPL(qemuCaps, QEMU_CAPS_LAST, pci-ohci, usb-redir, usb-hub, + cache-unsafe, ); struct qemu_feature_flags { @@ -918,6 +919,8 @@ qemuCapsComputeCmdFlags(const char *help, if (strstr(help, directsync)) qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_DIRECTSYNC); } +if (strstr(help, |unsafe)) +qemuCapsSet(flags, QEMU_CAPS_DRIVE_CACHE_UNSAFE); if (strstr(help, format=)) qemuCapsSet(flags, QEMU_CAPS_DRIVE_FORMAT); if (strstr(help, readonly=)) diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h index 96b7a3b..97fda0c 100644 --- a/src/qemu/qemu_capabilities.h +++ b/src/qemu/qemu_capabilities.h @@ -110,6 +110,7 @@ enum qemuCapsFlags { QEMU_CAPS_PCI_OHCI = 71, /* -device pci-ohci */ QEMU_CAPS_USB_REDIR = 72, /* -device usb-redir */ QEMU_CAPS_USB_HUB = 73, /* -device usb-hub */ +QEMU_CAPS_DRIVE_CACHE_UNSAFE = 74, /* Is cache=unsafe supported? */ QEMU_CAPS_LAST, /* this must always be the last item */ }; diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c index e8b1157..372f9fb 100644 --- a/src/qemu/qemu_command.c +++ b/src/qemu/qemu_command.c @@ -66,14 +66,16 @@ VIR_ENUM_IMPL(qemuDiskCacheV1, VIR_DOMAIN_DISK_CACHE_LAST, off, off, /*
[libvirt] [PATCH] remote/ssh: optional keyfile parameter.
New optional parameter keyfile for ssh transport allows the user to select the private key to be used to authenticate to the remote host. --- docs/remote.html.in| 16 src/remote/remote_driver.c |9 - src/rpc/virnetclient.c |4 +++- src/rpc/virnetclient.h |1 + src/rpc/virnetsocket.c |3 +++ src/rpc/virnetsocket.h |1 + tests/virnetsockettest.c | 12 7 files changed, 44 insertions(+), 2 deletions(-) diff --git a/docs/remote.html.in b/docs/remote.html.in index 39d65aa..b554950 100644 --- a/docs/remote.html.in +++ b/docs/remote.html.in @@ -275,6 +275,22 @@ Note that parameter values must be td colspan=2/ td Example: codenetcat=/opt/netcat/bin/nc/code /td /tr + + tr +td + codekeyfile/code +/td +td ssh /td +td + The name of the private key file to use to authentication to the remote + machine. If this option is not used the default keys are used. +/td + /tr + tr +td colspan=2/ +td Example: codekeyfile=/root/.ssh/example_key/code /td + /tr + tr td codeno_verify/code diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index c2f8bbd..3878fc9 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -343,7 +343,7 @@ doRemoteOpen (virConnectPtr conn, char *name = NULL, *command = NULL, *sockname = NULL, *netcat = NULL; char *port = NULL, *authtype = NULL, *username = NULL; int no_verify = 0, no_tty = 0; -char *pkipath = NULL; +char *pkipath = NULL, *keyfile = NULL; /* Return code from this function, and the private data. */ int retcode = VIR_DRV_OPEN_ERROR; @@ -416,6 +416,11 @@ doRemoteOpen (virConnectPtr conn, netcat = strdup (var-value); if (!netcat) goto out_of_memory; var-ignore = 1; +} else if (STRCASEEQ (var-name, keyfile)) { +VIR_FREE(keyfile); +keyfile = strdup (var-value); +if (!keyfile) goto out_of_memory; +var-ignore = 1; } else if (STRCASEEQ (var-name, no_verify)) { no_verify = atoi (var-value); var-ignore = 1; @@ -573,6 +578,7 @@ doRemoteOpen (virConnectPtr conn, no_tty, no_verify, netcat ? netcat : nc, +keyfile, sockname))) goto failed; @@ -672,6 +678,7 @@ doRemoteOpen (virConnectPtr conn, VIR_FREE(sockname); VIR_FREE(authtype); VIR_FREE(netcat); +VIR_FREE(keyfile); VIR_FREE(username); VIR_FREE(port); VIR_FREE(pkipath); diff --git a/src/rpc/virnetclient.c b/src/rpc/virnetclient.c index d3965c6..1bda763 100644 --- a/src/rpc/virnetclient.c +++ b/src/rpc/virnetclient.c @@ -189,11 +189,13 @@ virNetClientPtr virNetClientNewSSH(const char *nodename, bool noTTY, bool noVerify, const char *netcat, + const char *keyfile, const char *path) { virNetSocketPtr sock; -if (virNetSocketNewConnectSSH(nodename, service, binary, username, noTTY, noVerify, netcat, path, sock) 0) +if (virNetSocketNewConnectSSH(nodename, service, binary, username, noTTY, + noVerify, netcat, keyfile, path, sock) 0) return NULL; return virNetClientNew(sock, NULL); diff --git a/src/rpc/virnetclient.h b/src/rpc/virnetclient.h index 6acdf50..3e5659c 100644 --- a/src/rpc/virnetclient.h +++ b/src/rpc/virnetclient.h @@ -46,6 +46,7 @@ virNetClientPtr virNetClientNewSSH(const char *nodename, bool noTTY, bool noVerify, const char *netcat, + const char *keyfile, const char *path); virNetClientPtr virNetClientNewExternal(const char **cmdargv); diff --git a/src/rpc/virnetsocket.c b/src/rpc/virnetsocket.c index 7ea1ab7..57373a0 100644 --- a/src/rpc/virnetsocket.c +++ b/src/rpc/virnetsocket.c @@ -578,6 +578,7 @@ int virNetSocketNewConnectSSH(const char *nodename, bool noTTY, bool noVerify, const char *netcat, + const char *keyfile, const char *path, virNetSocketPtr *retsock) { @@ -594,6 +595,8 @@ int virNetSocketNewConnectSSH(const char *nodename, virCommandAddArgList(cmd, -p, service, NULL);
[libvirt] [PATCHv2] remote/ssh: support for no_verify.
Set StrictHostKeyChecking=no to auto-accept new ssh host keys if the no_verify extra parameter was specified. This won't disable host key checking for already known hosts. Includes a test and documentation. --- Thanks for the review, here's an updated patch. docs/remote.html.in|9 +++-- src/remote/remote_driver.c |1 + src/rpc/virnetclient.c |3 ++- src/rpc/virnetclient.h |1 + src/rpc/virnetsocket.c |3 +++ src/rpc/virnetsocket.h |1 + tests/virnetsockettest.c | 22 +++--- 7 files changed, 34 insertions(+), 6 deletions(-) diff --git a/docs/remote.html.in b/docs/remote.html.in index f6a0683..39d65aa 100644 --- a/docs/remote.html.in +++ b/docs/remote.html.in @@ -279,9 +279,14 @@ Note that parameter values must be td codeno_verify/code /td -td tls /td -td - If set to a non-zero value, this disables client checks of the +td ssh, tls /td +td + SSH: If set to a non-zero value, this disables client's strict host key + checking making it auto-accept new host keys. Existing host keys will + still be validated. + br/ + br/ + TLS: If set to a non-zero value, this disables client checks of the server's certificate. Note that to disable server checks of the client's certificate or IP address you must a href=#Remote_libvirtd_configurationchange the libvirtd diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index 5c0457e..6921c15 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -571,6 +571,7 @@ doRemoteOpen (virConnectPtr conn, command, username, no_tty, +no_verify, netcat ? netcat : nc, sockname))) goto failed; diff --git a/src/rpc/virnetclient.c b/src/rpc/virnetclient.c index 6a112ee..b9f0fc8 100644 --- a/src/rpc/virnetclient.c +++ b/src/rpc/virnetclient.c @@ -187,12 +187,13 @@ virNetClientPtr virNetClientNewSSH(const char *nodename, const char *binary, const char *username, bool noTTY, + bool noVerify, const char *netcat, const char *path) { virNetSocketPtr sock; -if (virNetSocketNewConnectSSH(nodename, service, binary, username, noTTY, netcat, path, sock) 0) +if (virNetSocketNewConnectSSH(nodename, service, binary, username, noTTY, noVerify, netcat, path, sock) 0) return NULL; return virNetClientNew(sock, NULL); diff --git a/src/rpc/virnetclient.h b/src/rpc/virnetclient.h index de0782c..6acdf50 100644 --- a/src/rpc/virnetclient.h +++ b/src/rpc/virnetclient.h @@ -44,6 +44,7 @@ virNetClientPtr virNetClientNewSSH(const char *nodename, const char *binary, const char *username, bool noTTY, + bool noVerify, const char *netcat, const char *path); diff --git a/src/rpc/virnetsocket.c b/src/rpc/virnetsocket.c index 3392047..41d9954 100644 --- a/src/rpc/virnetsocket.c +++ b/src/rpc/virnetsocket.c @@ -576,6 +576,7 @@ int virNetSocketNewConnectSSH(const char *nodename, const char *binary, const char *username, bool noTTY, + bool noVerify, const char *netcat, const char *path, virNetSocketPtr *retsock) @@ -596,6 +597,8 @@ int virNetSocketNewConnectSSH(const char *nodename, if (noTTY) virCommandAddArgList(cmd, -T, -o, BatchMode=yes, -e, none, NULL); +if (noVerify) +virCommandAddArgList(cmd, -o, StrictHostKeyChecking=no, NULL); virCommandAddArgList(cmd, nodename, netcat ? netcat : nc, -U, path, NULL); diff --git a/src/rpc/virnetsocket.h b/src/rpc/virnetsocket.h index 356d6c6..5f882ac 100644 --- a/src/rpc/virnetsocket.h +++ b/src/rpc/virnetsocket.h @@ -67,6 +67,7 @@ int virNetSocketNewConnectSSH(const char *nodename, const char *binary, const char *username, bool noTTY, + bool noVerify, const char *netcat, const char *path, virNetSocketPtr
[libvirt] [PATCH] remote/ssh: support for no_verify.
Set StrictHostKeyChecking=no to auto-accept new ssh host keys if the no_verify extra parameter was specified. This won't disable host key checking for already known hosts. --- src/remote/remote_driver.c |1 + src/rpc/virnetclient.c |3 ++- src/rpc/virnetclient.h |1 + src/rpc/virnetsocket.c |3 +++ src/rpc/virnetsocket.h |1 + tests/virnetsockettest.c |2 ++ 6 files changed, 10 insertions(+), 1 deletions(-) diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c index f318740..a2f54c8 100644 --- a/src/remote/remote_driver.c +++ b/src/remote/remote_driver.c @@ -571,6 +571,7 @@ doRemoteOpen (virConnectPtr conn, command, username, no_tty, +no_verify, netcat ? netcat : nc, sockname))) goto failed; diff --git a/src/rpc/virnetclient.c b/src/rpc/virnetclient.c index b551b99..fc0fef8 100644 --- a/src/rpc/virnetclient.c +++ b/src/rpc/virnetclient.c @@ -187,12 +187,13 @@ virNetClientPtr virNetClientNewSSH(const char *nodename, const char *binary, const char *username, bool noTTY, + bool noVerify, const char *netcat, const char *path) { virNetSocketPtr sock; -if (virNetSocketNewConnectSSH(nodename, service, binary, username, noTTY, netcat, path, sock) 0) +if (virNetSocketNewConnectSSH(nodename, service, binary, username, noTTY, noVerify, netcat, path, sock) 0) return NULL; return virNetClientNew(sock, NULL); diff --git a/src/rpc/virnetclient.h b/src/rpc/virnetclient.h index de0782c..6acdf50 100644 --- a/src/rpc/virnetclient.h +++ b/src/rpc/virnetclient.h @@ -44,6 +44,7 @@ virNetClientPtr virNetClientNewSSH(const char *nodename, const char *binary, const char *username, bool noTTY, + bool noVerify, const char *netcat, const char *path); diff --git a/src/rpc/virnetsocket.c b/src/rpc/virnetsocket.c index 4b0c2ee..e827b4f 100644 --- a/src/rpc/virnetsocket.c +++ b/src/rpc/virnetsocket.c @@ -576,6 +576,7 @@ int virNetSocketNewConnectSSH(const char *nodename, const char *binary, const char *username, bool noTTY, + bool noVerify, const char *netcat, const char *path, virNetSocketPtr *retsock) @@ -596,6 +597,8 @@ int virNetSocketNewConnectSSH(const char *nodename, if (noTTY) virCommandAddArgList(cmd, -T, -o, BatchMode=yes, -e, none, NULL); +if (noVerify) +virCommandAddArgList(cmd, -oStrictHostKeyChecking=no, NULL); virCommandAddArgList(cmd, nodename, netcat ? netcat : nc, -U, path, NULL); diff --git a/src/rpc/virnetsocket.h b/src/rpc/virnetsocket.h index 356d6c6..5f882ac 100644 --- a/src/rpc/virnetsocket.h +++ b/src/rpc/virnetsocket.h @@ -67,6 +67,7 @@ int virNetSocketNewConnectSSH(const char *nodename, const char *binary, const char *username, bool noTTY, + bool noVerify, const char *netcat, const char *path, virNetSocketPtr *addr); diff --git a/tests/virnetsockettest.c b/tests/virnetsockettest.c index f6c7274..87f3dfa 100644 --- a/tests/virnetsockettest.c +++ b/tests/virnetsockettest.c @@ -377,6 +377,7 @@ struct testSSHData { const char *binary; const char *username; bool noTTY; +bool noVerify; const char *netcat; const char *path; @@ -397,6 +398,7 @@ static int testSocketSSH(const void *opaque) data-binary, data-username, data-noTTY, + data-noVerify, data-netcat, data-path, csock) 0) -- 1.7.5.4 -- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list