Combined patch list: diff-cbt-add-changed-block-trace-infrastructure diff-cbt-fixup-use-after-free-inside-__blk_cbt_set diff-cbt-use-propper-mem-allocation-context diff-cbt-support-blockdevice-size-update-v2 diff-cbt-blk_cbt_update_size-add-block_dev-sanity-check diff-cbt-ignore-device-shrink diff-block-cbt-fix-mistype-statement diff-cbt-add-get_once-feature diff-cbt-fix-bytes-to-block-conversion-bug diff-cbt-add-missed-mutex_unlock
Only minor context fixes from original patches https://jira.sw.ru/browse/PSBM-34156 Signed-off-by: Dmitry Monakhov <dmonak...@openvz.org> --- block/Kconfig | 8 + block/Makefile | 1 + block/blk-cbt.c | 605 +++++++++++++++++++++++++++++++++++++++++++++++ block/blk-core.c | 1 + block/blk-sysfs.c | 1 + block/ioctl.c | 9 +- drivers/md/dm.c | 2 +- fs/block_dev.c | 9 +- include/linux/blkdev.h | 14 + include/linux/fs.h | 1 + include/uapi/linux/fs.h | 34 +++ 11 files changed, 682 insertions(+), 3 deletions(-) create mode 100644 block/blk-cbt.c diff --git a/block/Kconfig b/block/Kconfig index a7e40a7..3d11f0c 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -105,6 +105,14 @@ source "block/partitions/Kconfig" endmenu +config BLK_DEV_CBT + bool "Block layer changed block tracking support" + ---help--- + Block layer changed block tracking support, It can be used to optimize + device backup,copy. + + If unsure, say N. + endif # BLOCK config BLOCK_COMPAT diff --git a/block/Makefile b/block/Makefile index 21f4618..44f9426 100644 --- a/block/Makefile +++ b/block/Makefile @@ -19,3 +19,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o +obj-$(CONFIG_BLK_DEV_CBT) += blk-cbt.o diff --git a/block/blk-cbt.c b/block/blk-cbt.c new file mode 100644 index 0000000..99d4a76 --- /dev/null +++ b/block/blk-cbt.c @@ -0,0 +1,605 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/cpu.h> +#include <linux/cpuset.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/pagemap.h> +#include <linux/vmalloc.h> +#include <asm/atomic.h> +#include <asm/uaccess.h> + +#define CBT_MAX_EXTENTS (UINT_MAX / sizeof(struct blk_user_cbt_extent)) +#define NR_PAGES(bits) (((bits) + PAGE_SIZE*8 - 1) / (PAGE_SIZE*8)) +#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) + +static __cacheline_aligned_in_smp DEFINE_MUTEX(cbt_mutex); + +struct cbt_extent{ + blkcnt_t start; + blkcnt_t len; +}; + +struct cbt_info { + __u8 uuid[16]; + struct request_queue *queue; + blkcnt_t block_max; + blkcnt_t block_bits; + unsigned long flags; + + struct rcu_head rcu; + unsigned int count; + struct cbt_extent __percpu *cache; + struct page **map; + spinlock_t lock; +}; + + +enum CBT_FLAGS +{ + CBT_ERROR = 0, + CBT_DEAD = 1, + CBT_NOCACHE = 2, +}; +static void cbt_release_callback(struct rcu_head *head); +static void cbt_flush_cache(struct cbt_info *cbt); + +static inline void spin_lock_page(struct page *page) +{ + while(!trylock_page(page)) + cpu_relax(); +} + +static void set_bits(void *bm, int cur, int len, bool is_set) +{ + __u32 *addr; + __u32 pattern = is_set? 0xffffffff : 0; + + len = cur + len; + while (cur < len) { + if ((cur & 31) == 0 && (len - cur) >= 32) { + /* fast path: set whole word at once */ + addr = bm + (cur >> 3); + + *addr = pattern; + cur += 32; + continue; + } + if (is_set) + set_bit(cur, bm); + else + clear_bit(cur, bm); + cur++; + } +} + +static int __blk_cbt_set(struct cbt_info *cbt, blkcnt_t block, + blkcnt_t count, bool in_rcu, bool set) +{ + struct page *page; + + if (unlikely(block + count > cbt->block_max)) { + printk("WARN: %s eof access block:%lld, len: %lld, max:%lld\n", + __FUNCTION__, (unsigned long long) block, + (unsigned long long)count, + (unsigned long long)cbt->block_max); + set_bit(CBT_ERROR, &cbt->flags); + return -EINVAL; + } + + while(count) { + unsigned long idx = block >> (PAGE_SHIFT + 3); + unsigned long off = block & (BITS_PER_PAGE -1); + unsigned long len = count & (BITS_PER_PAGE -1); + + if (off + len > BITS_PER_PAGE) + len = BITS_PER_PAGE - off; + page = rcu_dereference(cbt->map[idx]); + if (page) { + spin_lock_page(page); + set_bits(page_address(page), off, len, set); + unlock_page(page); + count -= len; + block += len; + continue; + } else { + if (!set) { + len = count & (BITS_PER_PAGE -1); + count -= len; + block += len; + continue; + } + } + /* Page not allocated yet. Synchronization required */ + spin_lock_irq(&cbt->lock); + if (likely(!test_bit(CBT_DEAD, &cbt->flags))) { + cbt->count++; + } else { + struct cbt_info *new = rcu_dereference(cbt->queue->cbt); + + spin_unlock_irq(&cbt->lock); + /* was cbt updated ? */ + if (new != cbt) { + cbt = new; + continue; + } else { + break; + } + } + spin_unlock_irq(&cbt->lock); + if (in_rcu) + rcu_read_unlock(); + page = alloc_page(GFP_NOIO|__GFP_ZERO); + if (in_rcu) + rcu_read_lock(); + spin_lock_irq(&cbt->lock); + if (unlikely(!cbt->count-- && test_bit(CBT_DEAD, &cbt->flags))) { + spin_unlock_irq(&cbt->lock); + call_rcu(&cbt->rcu, &cbt_release_callback); + if (page) + __free_page(page); + break; + } + if (unlikely(!page)) { + set_bit(CBT_ERROR, &cbt->flags); + spin_unlock_irq(&cbt->lock); + return -ENOMEM; + } + cbt->map[idx] = page; + page = NULL; + spin_unlock_irq(&cbt->lock); + } + return 0; +} + +static void blk_cbt_add(struct request_queue *q, blkcnt_t start, blkcnt_t len) +{ + struct cbt_info *cbt; + struct cbt_extent *ex; + struct cbt_extent old; + blkcnt_t end; + /* Check per-cpu cache */ + + rcu_read_lock(); + cbt = rcu_dereference(q->cbt); + if (unlikely(!cbt)) + goto out_rcu; + + if (unlikely(test_bit(CBT_ERROR, &cbt->flags))) + goto out_rcu; + end = (start + len + (1 << cbt->block_bits) -1) >> cbt->block_bits; + start >>= cbt->block_bits; + len = end - start; + if (unlikely(test_bit(CBT_NOCACHE, &cbt->flags))) { + __blk_cbt_set(cbt, start, len, 1, 1); + goto out_rcu; + } + ex = this_cpu_ptr(cbt->cache); + if (ex->start + ex->len == start) { + ex->len += len; + goto out_rcu; + } + old = *ex; + ex->start = start; + ex->len = len; + + if (likely(old.len)) + __blk_cbt_set(cbt, old.start, old.len, 1, 1); +out_rcu: + rcu_read_unlock(); +} + +inline void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio) +{ + if (!q->cbt || bio_data_dir(bio) == READ || !bio->bi_size) + return; + + blk_cbt_add(q, bio->bi_sector << 9, bio->bi_size); +} + +static struct cbt_info* do_cbt_alloc(struct request_queue *q, __u8 *uuid, + loff_t size, loff_t blocksize) +{ + struct cbt_info *cbt; + struct cbt_extent *ex; + int i; + + + cbt = kzalloc(sizeof(*cbt), GFP_KERNEL); + if (!cbt) + return ERR_PTR(-ENOMEM); + + cbt->block_bits = ilog2(blocksize); + cbt->block_max = (size + blocksize) >> cbt->block_bits; + spin_lock_init(&cbt->lock); + memcpy(cbt->uuid, uuid, sizeof(cbt->uuid)); + cbt->cache = alloc_percpu(struct cbt_extent); + if (!cbt->cache) + goto err_cbt; + + for_each_possible_cpu(i) { + ex = per_cpu_ptr(cbt->cache, i); + memset(ex, 0, sizeof (*ex)); + } + + cbt->map = vmalloc(NR_PAGES(cbt->block_max) * sizeof(void*)); + if (!cbt->map) + goto err_pcpu; + + memset(cbt->map, 0, NR_PAGES(cbt->block_max) * sizeof(void*)); + cbt->queue = q; + return cbt; +err_pcpu: + free_percpu(cbt->cache); +err_cbt: + kfree(cbt); + return ERR_PTR(-ENOMEM); +} + + +void blk_cbt_update_size(struct block_device *bdev) +{ + struct request_queue *q; + struct cbt_info *new, *cbt; + unsigned long to_cpy, idx; + unsigned bsz; + loff_t new_sz = i_size_read(bdev->bd_inode); + int in_use = 0; + + if (!bdev->bd_disk || !bdev_get_queue(bdev)) + return; + + q = bdev_get_queue(bdev); + mutex_lock(&cbt_mutex); + cbt = q->cbt; + if (!cbt) { + mutex_unlock(&cbt_mutex); + return; + } + bsz = 1 << cbt->block_bits; + if ((new_sz + bsz) >> cbt->block_bits <= cbt->block_max) + goto err_mtx; + + new = do_cbt_alloc(q, cbt->uuid, new_sz, bsz); + if (IS_ERR(new)) { + set_bit(CBT_ERROR, &cbt->flags); + goto err_mtx; + } + to_cpy = NR_PAGES(new->block_max); + set_bit(CBT_NOCACHE, &cbt->flags); + cbt_flush_cache(cbt); + spin_lock_irq(&cbt->lock); + set_bit(CBT_DEAD, &cbt->flags); + for (idx = 0; idx < to_cpy; idx++){ + new->map[idx] = cbt->map[idx]; + if (new->map[idx]) + get_page(new->map[idx]); + } + rcu_assign_pointer(q->cbt, new); + in_use = cbt->count; + spin_unlock(&cbt->lock); + if (!in_use) + call_rcu(&cbt->rcu, &cbt_release_callback); +err_mtx: + mutex_unlock(&cbt_mutex); + + +} + +static int cbt_ioc_init(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc) +{ + struct request_queue *q; + struct blk_user_cbt_info ci; + struct cbt_info *cbt; + int ret = 0; + + if (copy_from_user(&ci, ucbt_ioc, sizeof(ci))) + return -EFAULT; + + if (((ci.ci_blksize -1) & ci.ci_blksize)) + return -EINVAL; + + q = bdev_get_queue(bdev); + mutex_lock(&cbt_mutex); + if (q->cbt) { + ret = -EBUSY; + goto err_mtx; + } + cbt = do_cbt_alloc(q, ci.ci_uuid, i_size_read(bdev->bd_inode), ci.ci_blksize); + if (IS_ERR(cbt)) + ret = PTR_ERR(cbt); + else + rcu_assign_pointer(q->cbt, cbt); +err_mtx: + mutex_unlock(&cbt_mutex); + return ret; +} + +static void cbt_release_callback(struct rcu_head *head) +{ + struct cbt_info *cbt; + int nr_pages, i; + + cbt = container_of(head, struct cbt_info, rcu); + nr_pages = NR_PAGES(cbt->block_max); + for (i = 0; i < nr_pages; i++) + if (cbt->map[i]) + __free_page(cbt->map[i]); + + vfree(cbt->map); + free_percpu(cbt->cache); + kfree(cbt); +} + +void blk_cbt_release(struct request_queue *q) +{ + struct cbt_info *cbt; + int in_use = 0; + + cbt = q->cbt; + if (!cbt) + return; + spin_lock(&cbt->lock); + set_bit(CBT_DEAD, &cbt->flags); + rcu_assign_pointer(q->cbt, NULL); + in_use = cbt->count; + spin_unlock(&cbt->lock); + if (in_use) + call_rcu(&cbt->rcu, &cbt_release_callback); +} + +static int cbt_ioc_stop(struct block_device *bdev) +{ + struct request_queue *q; + + mutex_lock(&cbt_mutex); + q = bdev_get_queue(bdev); + if(!q->cbt) { + mutex_unlock(&cbt_mutex); + return -EINVAL; + } + blk_cbt_release(q); + mutex_unlock(&cbt_mutex); + return 0; +} + +static inline void __cbt_flush_cpu_cache(void *ptr) +{ + struct cbt_info *cbt = (struct cbt_info *) ptr; + struct cbt_extent *ex = this_cpu_ptr(cbt->cache); + + if (ex->len) { + __blk_cbt_set(cbt, ex->start, ex->len, 0, 1); + ex->start += ex->len; + ex->len = 0; + } +} + +static void cbt_flush_cache(struct cbt_info *cbt) +{ + on_each_cpu(__cbt_flush_cpu_cache, cbt, 1); +} + +static void cbt_find_next_extent(struct cbt_info *cbt, blkcnt_t block, struct cbt_extent *ex) +{ + unsigned long off, off2, idx; + struct page *page; + bool found = 0; + + ex->start = cbt->block_max; + ex->len = 0; + + idx = block >> (PAGE_SHIFT + 3); + while (block < cbt->block_max) { + off = block & (BITS_PER_PAGE -1); + page = rcu_dereference(cbt->map[idx]); + if (!page) { + if (found) + break; + goto next; + } + spin_lock_page(page); + /* Find extent start */ + if (!found) { + ex->start = find_next_bit(page_address(page), BITS_PER_PAGE, off); + if (ex->start != BITS_PER_PAGE) { + off = ex->start; + ex->start += idx << (PAGE_SHIFT + 3); + found = 1; + } else { + unlock_page(page); + goto next; + } + } + if (found) { + off2 = find_next_zero_bit(page_address(page), BITS_PER_PAGE, off); + ex->len += off2 - off; + if (off2 != BITS_PER_PAGE) { + unlock_page(page); + break; + } + } + unlock_page(page); + next: + idx++; + block = idx << (PAGE_SHIFT + 3); + continue; + } +} + +static int cbt_ioc_get(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc) +{ + struct request_queue *q; + struct blk_user_cbt_info ci; + struct blk_user_cbt_extent __user *cur_u_ex; + struct blk_user_cbt_extent u_ex; + struct cbt_info *cbt; + struct cbt_extent ex; + blkcnt_t block , end; + int ret = 0; + + if (copy_from_user(&ci, ucbt_ioc, sizeof(ci))) + return -EFAULT; + if (ci.ci_flags & ~CI_FLAG_ONCE) + return -EINVAL; + if (ci.ci_extent_count > CBT_MAX_EXTENTS) + return -EINVAL; + + cur_u_ex = (struct blk_user_cbt_extent __user*) + ((char *)ucbt_ioc + sizeof(struct blk_user_cbt_info)); + + if (ci.ci_extent_count != 0 && + !access_ok(VERIFY_WRITE, cur_u_ex, + ci.ci_extent_count * sizeof(struct blk_user_cbt_extent))){ + return -EFAULT; + } + q = bdev_get_queue(bdev); + mutex_lock(&cbt_mutex); + cbt = q->cbt; + if (!cbt) { + mutex_unlock(&cbt_mutex); + return -EINVAL; + } + if ((ci.ci_start >> cbt->block_bits) > cbt->block_max) { + mutex_unlock(&cbt_mutex); + return -EINVAL; + } + if (test_bit(CBT_ERROR, &cbt->flags)) { + mutex_unlock(&cbt_mutex); + return -EIO; + } + cbt_flush_cache(cbt); + + memcpy(&ci.ci_uuid, cbt->uuid, sizeof(cbt->uuid)); + ci.ci_blksize = 1UL << cbt->block_bits; + block = ci.ci_start >> cbt->block_bits; + end = (ci.ci_start + ci.ci_length) >> cbt->block_bits; + if (end > cbt->block_max) + end = cbt->block_max; + + while (ci.ci_mapped_extents < ci.ci_extent_count) { + cbt_find_next_extent(cbt, block, &ex); + if (!ex.len || ex.start > end) { + ret = 0; + break; + } + u_ex.ce_physical = ex.start << cbt->block_bits; + u_ex.ce_length = ex.len << cbt->block_bits; + if (copy_to_user(cur_u_ex, &u_ex, sizeof(u_ex))) { + ret = -EFAULT; + break; + } + if (ci.ci_flags & CI_FLAG_ONCE) + __blk_cbt_set(cbt, ex.start, ex.len, 0, 0); + cur_u_ex++; + ci.ci_mapped_extents++; + block = ex.start + ex.len; + } + mutex_unlock(&cbt_mutex); + if (!ret && copy_to_user(ucbt_ioc, &ci, sizeof(ci))) + ret = -EFAULT; + + return ret; +} + +static int cbt_ioc_set(struct block_device *bdev, struct blk_user_cbt_info __user *ucbt_ioc, bool set) +{ + struct request_queue *q = bdev_get_queue(bdev); + struct cbt_info *cbt; + struct blk_user_cbt_info ci; + struct blk_user_cbt_extent __user u_ex, *cur_u_ex, *end; + int ret = 0; + + if (copy_from_user(&ci, ucbt_ioc, sizeof(ci))) + return -EFAULT; + if (ci.ci_extent_count > CBT_MAX_EXTENTS) + return -EINVAL; + if (ci.ci_extent_count < ci.ci_mapped_extents) + return -EINVAL; + + cur_u_ex = (struct blk_user_cbt_extent __user*) + ((char *)ucbt_ioc + sizeof(struct blk_user_cbt_info)); + end = cur_u_ex + ci.ci_mapped_extents; + if (!access_ok(VERIFY_READ, cur_u_ex, + ci.ci_mapped_extents * sizeof(struct blk_user_cbt_extent))) + return -EFAULT; + + mutex_lock(&cbt_mutex); + cbt = q->cbt; + if (!cbt) { + mutex_unlock(&cbt_mutex); + return -EINVAL; + } + if (ci.ci_flags & CI_FLAG_NEW_UUID) + memcpy(cbt->uuid, &ci.ci_uuid, sizeof(ci.ci_uuid)); + else if (memcmp(cbt->uuid, &ci.ci_uuid, sizeof(ci.ci_uuid))) { + mutex_unlock(&cbt_mutex); + return -EINVAL; + } + if (test_bit(CBT_ERROR, &cbt->flags)) { + mutex_unlock(&cbt_mutex); + return -EIO; + } + + /* Do not care about pcpu caches on set, only in case of clear */ + if (!set) + cbt_flush_cache(cbt); + + while (cur_u_ex < end) { + struct cbt_extent ex; + + if (copy_from_user(&u_ex, cur_u_ex, sizeof(u_ex))) { + ret = -EFAULT; + break; + } + ex.start = u_ex.ce_physical >> cbt->block_bits; + ex.len = (u_ex.ce_length + (1 << cbt->block_bits) -1) >> cbt->block_bits; + if (ex.start > q->cbt->block_max || + ex.start + ex.len > q->cbt->block_max || + ex.len == 0) { + ret = -EINVAL; + break; + } + ret = __blk_cbt_set(cbt, ex.start, ex.len, 0, set); + if (ret) + break; + cur_u_ex++; + } + mutex_unlock(&cbt_mutex); + return ret; +} + +int blk_cbt_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) +{ + struct blk_user_cbt_info __user *ucbt_ioc = (struct blk_user_cbt_info __user *) arg; + + switch(cmd) { + case BLKCBTSTART: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return cbt_ioc_init(bdev, ucbt_ioc); + case BLKCBTSTOP: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + return cbt_ioc_stop(bdev); + case BLKCBTGET: + return cbt_ioc_get(bdev, ucbt_ioc); + case BLKCBTSET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + return cbt_ioc_set(bdev, ucbt_ioc, 1); + case BLKCBTCLR: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + return cbt_ioc_set(bdev, ucbt_ioc, 0); + default: + BUG(); + } + return -ENOTTY; +} diff --git a/block/blk-core.c b/block/blk-core.c index 66f7be3..08ac4d3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1845,6 +1845,7 @@ generic_make_request_checks(struct bio *bio) return false; /* throttled, will be resubmitted later */ trace_block_bio_queue(q, bio); + blk_cbt_bio_queue(q, bio); return true; end_io: diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 10d2058..21bc0c7 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -551,6 +551,7 @@ static void blk_release_queue(struct kobject *kobj) kfree(q->flush_rq); blk_trace_shutdown(q); + blk_cbt_release(q); bdi_destroy(&q->backing_dev_info); diff --git a/block/ioctl.c b/block/ioctl.c index 93a9fdc..c56168b 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -139,7 +139,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user } disk_part_iter_exit(&piter); part_nr_sects_write(part, (sector_t)length); - i_size_write(bdevp->bd_inode, p.length); + bd_write_size(bdevp, p.length); mutex_unlock(&bdevp->bd_mutex); mutex_unlock(&bdev->bd_mutex); bdput(bdevp); @@ -437,6 +437,13 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKTRACETEARDOWN: ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg); break; + case BLKCBTSTART: + case BLKCBTSTOP: + case BLKCBTGET: + case BLKCBTSET: + case BLKCBTCLR: + ret = blk_cbt_ioctl(bdev, cmd, (char __user *)arg); + break; default: ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6bf6815..aca38b5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2338,7 +2338,7 @@ static void __set_size(struct mapped_device *md, sector_t size) { set_capacity(md->disk, size); - i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); + bd_write_size(md->bdev, (loff_t)size << SECTOR_SHIFT); } /* diff --git a/fs/block_dev.c b/fs/block_dev.c index e6a2837..a22f439 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1037,12 +1037,19 @@ int check_disk_change(struct block_device *bdev) EXPORT_SYMBOL(check_disk_change); +void bd_write_size(struct block_device *bdev, loff_t size) +{ + i_size_write(bdev->bd_inode, size); + blk_cbt_update_size(bdev); +} +EXPORT_SYMBOL(bd_write_size); + void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); mutex_lock(&bdev->bd_inode->i_mutex); - i_size_write(bdev->bd_inode, size); + bd_write_size(bdev, size); mutex_unlock(&bdev->bd_inode->i_mutex); while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bea378b..2a05818 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -501,6 +501,9 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif +#ifdef CONFIG_BLK_DEV_CBT + struct cbt_info *cbt; +#endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; struct percpu_counter mq_usage_counter; @@ -1637,6 +1640,17 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g) #endif /* CONFIG_BLK_DEV_INTEGRITY */ +#if defined (CONFIG_BLK_DEV_CBT) +extern void blk_cbt_update_size(struct block_device *bdev); +extern void blk_cbt_release(struct request_queue *q); +extern void blk_cbt_bio_queue(struct request_queue *q, struct bio *bio); +extern int blk_cbt_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg); +#else /* CONFIG_BLK_DEV_CBT */ +#define blk_cbt_update_size(b) (0) +#define blk_cbt_release(q) (0) +#define blk_cbt_bio_queue(q,bio) (0) +#define blk_cbt_ioctl(b,c,a) (-ENOTTY) +#endif /* CONFIG_BLK_DEV_CBT */ struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); diff --git a/include/linux/fs.h b/include/linux/fs.h index 553bca3..7e7bd3f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2370,6 +2370,7 @@ extern int register_blkdev(unsigned int, const char *); extern void unregister_blkdev(unsigned int, const char *); extern struct block_device *bdget(dev_t); extern struct block_device *bdgrab(struct block_device *bdev); +extern void bd_write_size(struct block_device *, loff_t size); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 9b964a5..359bf02 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -153,6 +153,40 @@ struct inodes_stat_t { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +/* Hole from 127..199 */ +struct blk_user_cbt_extent { + __u64 ce_physical; /* physical offset in bytes for the start + * of the extent from the beginning of the disk */ + __u64 ce_length; /* length in bytes for this extent */ + __u64 ce_reserved64[1]; +}; + +struct blk_user_cbt_info { + __u8 ci_uuid[16]; /* Bitmap UUID */ + __u64 ci_start; /* start phisical range of mapping which + userspace wants (in) */ + __u64 ci_length; /* phisical length of mapping which + * userspace wants (in) */ + __u32 ci_blksize; /* cbt logical block size */ + __u32 ci_flags; /* CI_FLAG_* flags for request (in/out) */ + __u32 ci_mapped_extents;/* number of extents that were mapped (out) */ + __u32 ci_extent_count; /* size of fm_extents array (in) */ + __u32 ci_reserved; + struct blk_user_cbt_extent ci_extents[0]; /* array of mapped extents (out) */ +}; + +enum CI_FLAGS +{ + CI_FLAG_ONCE = 1, /* BLKCBTGET will clear bits */ + CI_FLAG_NEW_UUID = 2 /* BLKCBTSET update uuid */ +}; + +#define BLKCBTSTART _IOR(0x12,200, struct blk_user_cbt_info) +#define BLKCBTSTOP _IO(0x12,201) +#define BLKCBTGET _IOWR(0x12,202,struct blk_user_cbt_info) +#define BLKCBTSET _IOR(0x12,203,struct blk_user_cbt_info) +#define BLKCBTCLR _IOR(0x12,204,struct blk_user_cbt_info) + #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ -- 1.7.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel