I report a bug [Bug 1227257] . In the environment libvirt-1.2.16.tar.gz + qemu-img version 2.1.2 + ceph version 0.94.1. libvirt pool will become inactive after one client does vol.delete and the other does pool.refresh in the same pool simultaneously.
The reason is that rbd_list and rbd_open are not wrapped in an atomic operation, but two seperate operations. For example, two clients are operating in the same pool at the same time. One client does rbd_list, and got 10 rbd images, meanwhile, the other client deletes one of the rbd image. In this situation, when the first client does next operation, such as rbd_open , the command may fail, because the rbd image has been removed. I write a testcase in python to reproduce this problem as follow(also have put it in the attachment): ################################## import libvirt import sys import time import sys #coding:utf-8 QEMU_URL = 'qemu:///system' VOL_TEMPLATE=''' <volume> <name>{vol}</name> <key>{pool}/{vol}</key> <source> </source> <capacity unit='MB'>{cap_size}</capacity> <allocation unit='MB'>{alloc_size}</allocation> <target> <path>rbd:{pool}/{vol}</path> <format type='unknown'/> <permissions> <mode>00</mode> <owner>0</owner> <group>0</group> </permissions> </target> </volume>''' def create_vol(pool_name, vol_name, cap_size, alloc_size): conn = libvirt.open(QEMU_URL) if conn == None: print 'Failed to open connection to the hypervisor' sys.exit(1) try: pool = conn.storagePoolLookupByName(pool_name) pool.refresh() template = VOL_TEMPLATE.format(pool=pool_name, vol=vol_name, cap_size=cap_size, alloc_size=alloc_size) pool.createXML(template, 0) except: print 'Failed to open pool' sys.exit(1) finally: if conn is not None: conn.close() def destroy_vol(pool_name, vol_name): conn = libvirt.open(QEMU_URL) if conn == None: sys.exit(1) pool = conn.storagePoolLookupByName(pool_name) pool.refresh(0) vol = pool.storageVolLookupByName(vol_name) if vol is not None: vol.delete(0) if conn is not None: conn.close() if sys.argv[2] == 'create': for i in range(1, 20): volname = 'pool-down-test-%s-%d' % (sys.argv[1], i) print 'create %s' % volname create_vol('capacity', volname, '500', '500') elif sys.argv[2] == 'destroy': for i in range(1, 20): volname = 'pool-down-test-%s-%d' % (sys.argv[1], i) print 'destroy %s' % volname destroy_vol('capacity', volname) else: print 'Usage: python vol-test.py clientId OPER' print ' ' print 'where clientId : a num/string used to distinguish different client' print ' OPER : create/destroy' Patch for this is as fllow: diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c index ae4bcb3..24fbc84 100644 --- a/src/storage/storage_backend_rbd.c +++ b/src/storage/storage_backend_rbd.c @@ -266,6 +266,46 @@ static int virStorageBackendRBDCloseRADOSConn(virStorageBackendRBDStatePtr ptr) return ret; } +static int volStorageBackendRBDVolIsExist(char *volname, virStorageBackendRBDStatePtr ptr) +{ + int ret = -1; + char *name, *names = NULL; + size_t max_size = 1024; + int len = -1; + + while (true) { + if (VIR_ALLOC_N(names, max_size) < 0) + goto cleanup; + + len = rbd_list(ptr->ioctx, names, &max_size); + if (len >= 0) + break; + if (len != -ERANGE) { + VIR_WARN("%s", _("A problem occurred while listing RBD images")); + goto cleanup; + } + VIR_FREE(names); + } + + for (name = names; name < names + max_size;) { + + if (STREQ(name, "")) + break; + + name += strlen(name) + 1; + if (STREQ(volname, name)) { + VIR_ERROR("RBD images '%s' is exist, but cannot open it", volname); + ret = -2; + break; + } + } + ret = 0; + +cleanup: + VIR_FREE(names); + return ret; +} + static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol, virStoragePoolObjPtr pool, virStorageBackendRBDStatePtr ptr) @@ -276,8 +316,15 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol, r = rbd_open(ptr->ioctx, vol->name, &image, NULL); if (r < 0) { - virReportSystemError(-r, _("failed to open the RBD image '%s'"), - vol->name); + VIR_DEBUG("failed to open RBD image '%s', check if it was still exist in its pool",\ + vol->name); + if (volStorageBackendRBDVolIsExist(vol->name, ptr) == 0) { + VIR_DEBUG("vol '%s' may be removed by the other rbd client", vol->name); + ret = 0; + } else { + virReportSystemError(-r, _("failed to open the RBD image '%s'"), + vol->name); + } return ret; }
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list