On Sat, Jun 20, 2020 at 01:51:33AM +0800, Li Guifu via Linux-erofs wrote: > Support segment compression which seperates files in several logic > units (segments) and each segment is compressed independently. > > Advantages: > - more friendly for data differencing; > - it can also be used for parallel compression in the same file later. > > Signed-off-by: Li Guifu <bluce....@aliyun.com> > --- > include/erofs/config.h | 1 + > lib/compress.c | 16 ++++++++++++++-- > lib/config.c | 1 + > man/mkfs.erofs.1 | 4 ++++ > mkfs/main.c | 16 +++++++++++++++- > 5 files changed, 35 insertions(+), 3 deletions(-) > > diff --git a/include/erofs/config.h b/include/erofs/config.h > index 2f09749..995664d 100644 > --- a/include/erofs/config.h > +++ b/include/erofs/config.h > @@ -36,6 +36,7 @@ struct erofs_configure { > char *c_src_path; > char *c_compr_alg_master; > int c_compr_level_master; > + u64 c_compr_seg_size;
Could you please move this variable up a bit? Thanks. char *c_compr_alg_master; u64 c_compr_seg_size; int c_compr_level_master; > int c_force_inodeversion; > /* < 0, xattr disabled and INT_MAX, always use inline xattrs */ > int c_inline_xattr_tolerance; > diff --git a/lib/compress.c b/lib/compress.c > index 6cc68ed..383ee00 100644 > --- a/lib/compress.c > +++ b/lib/compress.c > @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx { > > erofs_blk_t blkaddr; /* pointing to the next blkaddr */ > u16 clusterofs; > + u64 segavail; > }; > > #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \ > @@ -158,13 +159,19 @@ static int vle_compress_one(struct erofs_inode *inode, > while (len) { > bool raw; unsigned int limit; > > + count = min_t(u64, len, ctx->segavail); kill this line. > + if (ctx->segavail <= EROFS_BLKSIZ) { > + if (len < ctx->segavail && !final) > + break; limit = ctx->segavail; > + goto nocompression; > + } > + > if (len <= EROFS_BLKSIZ) { > if (final) > goto nocompression; > break; > } > > - count = len; count = min_t(u64, len, ctx->segavail); > ret = erofs_compress_destsize(h, compressionlevel, > ctx->queue + ctx->head, > &count, dst, EROFS_BLKSIZ); > @@ -174,8 +181,9 @@ static int vle_compress_one(struct erofs_inode *inode, > inode->i_srcpath, > erofs_strerror(ret)); > } > + count = len; kill this line and add limit = EROFS_BLKSIZ; > nocompression: > - ret = write_uncompressed_block(ctx, &len, dst); > + ret = write_uncompressed_block(ctx, &count, dst); ret = write_uncompressed_block(ctx, &count, limit, dst); and update write_uncompressed_block as well. > if (ret < 0) > return ret; > count = ret; > @@ -202,6 +210,9 @@ nocompression: > > ++ctx->blkaddr; > len -= count; > + ctx->segavail -= count; > + if (!ctx->segavail) > + ctx->segavail = cfg.c_compr_seg_size; > > if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) { > const unsigned int qh_aligned = > @@ -422,6 +433,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode) > ctx.head = ctx.tail = 0; > ctx.clusterofs = 0; > remaining = inode->i_size; > + ctx.segavail = cfg.c_compr_seg_size; > > while (remaining) { > const u64 readcount = min_t(u64, remaining, > diff --git a/lib/config.c b/lib/config.c > index da0c260..de982e1 100644 > --- a/lib/config.c > +++ b/lib/config.c > @@ -23,6 +23,7 @@ void erofs_init_configure(void) > cfg.c_force_inodeversion = 0; > cfg.c_inline_xattr_tolerance = 2; > cfg.c_unix_timestamp = -1; > + cfg.c_compr_seg_size = UINT64_MAX; cfg.c_compr_seg_size = -1; since it is a very simple way to assign UINT_MAX by implicit sign extension without taking care for the specific data type. > } > > void erofs_show_config(void) > diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1 > index 891c5a8..b12cb22 100644 > --- a/man/mkfs.erofs.1 > +++ b/man/mkfs.erofs.1 > @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to > output. > Set all files to the given UNIX timestamp. Reproducible builds requires > setting > all to a specific one. > .TP > +.BI "\-S " # > +Set the max input stream size at one compression. The default is unsigned > 64bit MAX. > +It must be algin to EROFS block size(4096). it's hard for end users to type "max 64-bit unsigned value"... I'd suggest "Set max input stream size for each individual segment (disabled if 0). The default value is 0. It should be aligned with blocksize." > +.TP > .BI "\-\-exclude-path=" path > Ignore file that matches the exact literal path. > You may give multiple `--exclude-path' options. > diff --git a/mkfs/main.c b/mkfs/main.c > index 94bf1e6..96cc053 100644 > --- a/mkfs/main.c > +++ b/mkfs/main.c > @@ -61,6 +61,7 @@ static void usage(void) > " -x# set xattr tolerance to # (< 0, disable > xattrs; default 2)\n" > " -EX[,...] X=extended options\n" > " -T# set a fixed UNIX timestamp # to all files\n" > + " -S# set the max input stream size # at one > compression\n" -S# Set max input stream size # for each individual segment\n > " --exclude-path=X avoid including file X (X = exact literal > path)\n" > " --exclude-regex=X avoid including files that match X (X = > regular expression)\n" > #ifdef HAVE_LIBSELINUX > @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) > char *endptr; > int opt, i; > > - while((opt = getopt_long(argc, argv, "d:x:z:E:T:", > + while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:", > long_options, NULL)) != -1) { > switch (opt) { > case 'z': > @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) > return -EINVAL; > } > break; > + case 'S': > + cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0); > + if (*endptr != '\0' || !cfg.c_compr_seg_size) { Disable this if cfg.c_compr_seg_size == 0 > + erofs_err("invalid compress segment size %s", > + optarg); > + return -EINVAL; > + } > + if (cfg.c_compr_seg_size % EROFS_BLKSIZ) { > + erofs_err("segment size:%"PRIu64" should be > align to %u", Could you follow my advice in the previous reply? Although I'm not good at English, but I don't think the above message is _reasonable_. Thanks, Gao Xiang > + cfg.c_compr_seg_size, EROFS_BLKSIZ); > + return -EINVAL; > + } > + break; > case 2: > opt = erofs_parse_exclude_path(optarg, false); > if (opt) { > -- > 2.17.1 >