On Sat, Jun 20, 2020 at 01:51:33AM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
>
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
>
> Signed-off-by: Li Guifu <bluce....@aliyun.com>
> ---
>  include/erofs/config.h |  1 +
>  lib/compress.c         | 16 ++++++++++++++--
>  lib/config.c           |  1 +
>  man/mkfs.erofs.1       |  4 ++++
>  mkfs/main.c            | 16 +++++++++++++++-
>  5 files changed, 35 insertions(+), 3 deletions(-)
>
> diff --git a/include/erofs/config.h b/include/erofs/config.h
> index 2f09749..995664d 100644
> --- a/include/erofs/config.h
> +++ b/include/erofs/config.h
> @@ -36,6 +36,7 @@ struct erofs_configure {
>       char *c_src_path;
>       char *c_compr_alg_master;
>       int c_compr_level_master;
> +     u64 c_compr_seg_size;

Could you please move this variable up a bit? Thanks.

char *c_compr_alg_master;
u64 c_compr_seg_size;
int c_compr_level_master;

>       int c_force_inodeversion;
>       /* < 0, xattr disabled and INT_MAX, always use inline xattrs */
>       int c_inline_xattr_tolerance;
> diff --git a/lib/compress.c b/lib/compress.c
> index 6cc68ed..383ee00 100644
> --- a/lib/compress.c
> +++ b/lib/compress.c
> @@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
>
>       erofs_blk_t blkaddr;    /* pointing to the next blkaddr */
>       u16 clusterofs;
> +     u64 segavail;
>  };
>
>  #define Z_EROFS_LEGACY_MAP_HEADER_SIZE       \
> @@ -158,13 +159,19 @@ static int vle_compress_one(struct erofs_inode *inode,
>       while (len) {
>               bool raw;

unsigned int limit;

>
> +             count = min_t(u64, len, ctx->segavail);

kill this line.

> +             if (ctx->segavail <= EROFS_BLKSIZ) {
> +                     if (len < ctx->segavail && !final)
> +                             break;

limit = ctx->segavail;

> +                     goto nocompression;
> +             }
> +
>               if (len <= EROFS_BLKSIZ) {
>                       if (final)
>                               goto nocompression;
>                       break;
>               }
>
> -             count = len;

count = min_t(u64, len, ctx->segavail);

>               ret = erofs_compress_destsize(h, compressionlevel,
>                                             ctx->queue + ctx->head,
>                                             &count, dst, EROFS_BLKSIZ);
> @@ -174,8 +181,9 @@ static int vle_compress_one(struct erofs_inode *inode,
>                                         inode->i_srcpath,
>                                         erofs_strerror(ret));
>                       }
> +                     count = len;

kill this line and add limit = EROFS_BLKSIZ;

>  nocompression:
> -                     ret = write_uncompressed_block(ctx, &len, dst);
> +                     ret = write_uncompressed_block(ctx, &count, dst);

ret = write_uncompressed_block(ctx, &count, limit, dst);

and update write_uncompressed_block as well.

>                       if (ret < 0)
>                               return ret;
>                       count = ret;
> @@ -202,6 +210,9 @@ nocompression:
>
>               ++ctx->blkaddr;
>               len -= count;
> +             ctx->segavail -= count;
> +             if (!ctx->segavail)
> +                     ctx->segavail = cfg.c_compr_seg_size;
>
>               if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
>                       const unsigned int qh_aligned =
> @@ -422,6 +433,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
>       ctx.head = ctx.tail = 0;
>       ctx.clusterofs = 0;
>       remaining = inode->i_size;
> +     ctx.segavail = cfg.c_compr_seg_size;
>
>       while (remaining) {
>               const u64 readcount = min_t(u64, remaining,
> diff --git a/lib/config.c b/lib/config.c
> index da0c260..de982e1 100644
> --- a/lib/config.c
> +++ b/lib/config.c
> @@ -23,6 +23,7 @@ void erofs_init_configure(void)
>       cfg.c_force_inodeversion = 0;
>       cfg.c_inline_xattr_tolerance = 2;
>       cfg.c_unix_timestamp = -1;
> +     cfg.c_compr_seg_size = UINT64_MAX;

cfg.c_compr_seg_size = -1;

since it is a very simple way to assign UINT_MAX by implicit sign
extension without taking care for the specific data type.

>  }
>
>  void erofs_show_config(void)
> diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
> index 891c5a8..b12cb22 100644
> --- a/man/mkfs.erofs.1
> +++ b/man/mkfs.erofs.1
> @@ -52,6 +52,10 @@ Forcely generate extended inodes (64-byte inodes) to 
> output.
>  Set all files to the given UNIX timestamp. Reproducible builds requires 
> setting
>  all to a specific one.
>  .TP
> +.BI "\-S " #
> +Set the max input stream size at one compression. The default is unsigned 
> 64bit MAX.
> +It must be algin to EROFS block size(4096).

it's hard for end users to type "max 64-bit unsigned value"...

I'd suggest "Set max input stream size for each individual segment (disabled if 
0).
The default value is 0. It should be aligned with blocksize."

> +.TP
>  .BI "\-\-exclude-path=" path
>  Ignore file that matches the exact literal path.
>  You may give multiple `--exclude-path' options.
> diff --git a/mkfs/main.c b/mkfs/main.c
> index 94bf1e6..96cc053 100644
> --- a/mkfs/main.c
> +++ b/mkfs/main.c
> @@ -61,6 +61,7 @@ static void usage(void)
>             " -x#               set xattr tolerance to # (< 0, disable 
> xattrs; default 2)\n"
>             " -EX[,...]         X=extended options\n"
>             " -T#               set a fixed UNIX timestamp # to all files\n"
> +           " -S#               set the max input stream size # at one 
> compression\n"

-S#               Set max input stream size # for each individual segment\n

>             " --exclude-path=X  avoid including file X (X = exact literal 
> path)\n"
>             " --exclude-regex=X avoid including files that match X (X = 
> regular expression)\n"
>  #ifdef HAVE_LIBSELINUX
> @@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>       char *endptr;
>       int opt, i;
>
> -     while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
> +     while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
>                                long_options, NULL)) != -1) {
>               switch (opt) {
>               case 'z':
> @@ -188,6 +189,19 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
>                               return -EINVAL;
>                       }
>                       break;
> +             case 'S':
> +                     cfg.c_compr_seg_size = strtoll(optarg, &endptr, 0);
> +                     if (*endptr != '\0' || !cfg.c_compr_seg_size) {

Disable this if cfg.c_compr_seg_size == 0

> +                             erofs_err("invalid compress segment size %s",
> +                                       optarg);
> +                             return -EINVAL;
> +                     }
> +                     if (cfg.c_compr_seg_size % EROFS_BLKSIZ) {
> +                             erofs_err("segment size:%"PRIu64" should be 
> align to %u",


Could you follow my advice in the previous reply?
Although I'm not good at English, but I don't think the above message is 
_reasonable_.

Thanks,
Gao Xiang

> +                                       cfg.c_compr_seg_size, EROFS_BLKSIZ);
> +                             return -EINVAL;
> +                     }
> +                     break;
>               case 2:
>                       opt = erofs_parse_exclude_path(optarg, false);
>                       if (opt) {
> --
> 2.17.1
>

Reply via email to