On 5/1/2024 1:05, Denis V. Lunev via wrote: > Preallocate filter allows to implement really interesting setups. > > Assume that we have > * shared block device, f.e. iSCSI LUN, implemented with some HW device > * clustered LVM on top of it > * QCOW2 image stored inside LVM volume > > This allows very cheap clustered setups with all QCOW2 features intact. > Currently supported setups using QCOW2 with data_file option are not > so cool as snapshots are not allowed, QCOW2 should be placed into some > additional distributed storage and so on. > > Though QCOW2 inside LVM volume has a drawback. The image is growing and > in order to accomodate that image LVM volume is to be resized. This > could be done externally using ENOSPACE event/condition but this is > cumbersome. > > This patch introduces native implementation for such a setup. We should > just put prealloc filter in between QCOW2 format and file nodes. In that > case LVM will be resized at proper moment and that is done effectively > as resizing is done in chinks. > > The patch adds allocation mode for this purpose in order to distinguish > 'fallocate' for ordinary file system and 'truncate'. > > Signed-off-by: Denis V. Lunev <d...@openvz.org> > CC: Alexander Ivanov <alexander.iva...@virtuozzo.com> > CC: Kevin Wolf <kw...@redhat.com> > CC: Hanna Reitz <hre...@redhat.com> > CC: Vladimir Sementsov-Ogievskiy <vsement...@yandex-team.ru> > --- > block/preallocate.c | 50 +++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 48 insertions(+), 2 deletions(-) > > diff --git a/block/preallocate.c b/block/preallocate.c > index 4d82125036..6d31627325 100644 > --- a/block/preallocate.c > +++ b/block/preallocate.c > @@ -33,10 +33,24 @@ > #include "block/block-io.h" > #include "block/block_int.h" > > +typedef enum PreallocateMode { > + PREALLOCATE_MODE_FALLOCATE = 0, > + PREALLOCATE_MODE_TRUNCATE = 1, > + PREALLOCATE_MODE__MAX = 2, > +} PreallocateMode; > + > +static QEnumLookup prealloc_mode_lookup = { > + .array = (const char *const[]) { > + "falloc", > + "truncate", > + }, > + .size = PREALLOCATE_MODE__MAX, > +}; > > typedef struct PreallocateOpts { > int64_t prealloc_size; > int64_t prealloc_align; > + PreallocateMode prealloc_mode; > } PreallocateOpts; > > typedef struct BDRVPreallocateState { > @@ -79,6 +93,7 @@ typedef struct BDRVPreallocateState { > > #define PREALLOCATE_OPT_PREALLOC_ALIGN "prealloc-align" > #define PREALLOCATE_OPT_PREALLOC_SIZE "prealloc-size" > +#define PREALLOCATE_OPT_MODE "mode"
Why not keeping the names consistent, I mean: #define PREALLOCATE_OPT_PREALLOC_MODE "prealloc-mode" > static QemuOptsList runtime_opts = { > .name = "preallocate", > .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), > @@ -94,7 +109,14 @@ static QemuOptsList runtime_opts = { > .type = QEMU_OPT_SIZE, > .help = "how much to preallocate, default 128M", > }, > - { /* end of list */ } > + { > + .name = PREALLOCATE_OPT_MODE, > + .type = QEMU_OPT_STRING, > + .help = "Preallocation mode on image expansion " > + "(allowed values: falloc, truncate)", > + .def_value_str = "falloc", > + }, > + { /* end of list */ }, > }, > }; > > @@ -102,6 +124,8 @@ static bool preallocate_absorb_opts(PreallocateOpts > *dest, QDict *options, > BlockDriverState *child_bs, Error **errp) > { > QemuOpts *opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); > + Error *local_err = NULL; > + char *buf; > > if (!qemu_opts_absorb_qdict(opts, options, errp)) { > return false; > @@ -112,6 +136,17 @@ static bool preallocate_absorb_opts(PreallocateOpts > *dest, QDict *options, > dest->prealloc_size = > qemu_opt_get_size(opts, PREALLOCATE_OPT_PREALLOC_SIZE, 128 * MiB); > > + buf = qemu_opt_get_del(opts, PREALLOCATE_OPT_MODE); > + /* prealloc_mode can be downgraded later during allocate_clusters */ > + dest->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, > + PREALLOCATE_MODE_FALLOCATE, > + &local_err); > + g_free(buf); > + if (local_err != NULL) { > + error_propagate(errp, local_err); > + return false; > + } > + > qemu_opts_del(opts); > > if (!QEMU_IS_ALIGNED(dest->prealloc_align, BDRV_SECTOR_SIZE)) { > @@ -335,9 +370,20 @@ handle_write(BlockDriverState *bs, int64_t offset, > int64_t bytes, > > want_merge_zero = want_merge_zero && (prealloc_start <= offset); > > - ret = bdrv_co_pwrite_zeroes( > + switch (s->opts.prealloc_mode) { > + case PREALLOCATE_MODE_FALLOCATE: > + ret = bdrv_co_pwrite_zeroes( > bs->file, prealloc_start, prealloc_end - prealloc_start, > BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT); > + break; > + case PREALLOCATE_MODE_TRUNCATE: > + ret = bdrv_co_truncate(bs->file, prealloc_end, false, > + PREALLOC_MODE_OFF, 0, NULL); > + break; > + default: > + return false; > + } > + > if (ret < 0) { > s->file_end = ret; > return false;