Hi Guifu,

On Sun, Jul 05, 2020 at 04:32:30PM +0800, Li Guifu via Linux-erofs wrote:
> Support segment compression which seperates files in several logic
> units (segments) and each segment is compressed independently.
> 
> Advantages:
>  - more friendly for data differencing;
>  - it can also be used for parallel compression in the same file later.
> 
> Signed-off-by: Li Guifu <bluce....@aliyun.com>
> ---
> Changes from v10
> - chang variable uncomprofs to clusterofs which only used
>   when write uncompress block

Could you please test the following patch if you're available?
Does it work?

>From 0436ed04717853351e13d68db6f170f60e25fc12 Mon Sep 17 00:00:00 2001
From: Li Guifu <bluce....@aliyun.com>
Date: Sun, 5 Jul 2020 16:32:30 +0800
Subject: [PATCH v12] erofs-utils: introduce segment compression

Support segment compression which seperates files in several logic
units (segments) and each segment is compressed independently.

Advantages:
 - more friendly for data differencing;
 - it can also be used for parallel compression in the same file later.

Signed-off-by: Li Guifu <bluce....@aliyun.com>
Signed-off-by: Gao Xiang <hsiang...@redhat.com>
---
 include/erofs/config.h |  2 ++
 lib/compress.c         | 38 ++++++++++++++++++++++++++++++--------
 lib/config.c           |  1 +
 man/mkfs.erofs.1       |  4 ++++
 mkfs/main.c            | 12 +++++++++++-
 5 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/include/erofs/config.h b/include/erofs/config.h
index 2f09749..b149633 100644
--- a/include/erofs/config.h
+++ b/include/erofs/config.h
@@ -35,6 +35,8 @@ struct erofs_configure {
        char *c_img_path;
        char *c_src_path;
        char *c_compr_alg_master;
+       u64 c_compr_segsize;
+
        int c_compr_level_master;
        int c_force_inodeversion;
        /* < 0, xattr disabled and INT_MAX, always use inline xattrs */
diff --git a/lib/compress.c b/lib/compress.c
index 6cc68ed..4216fa7 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -32,6 +32,7 @@ struct z_erofs_vle_compress_ctx {
 
        erofs_blk_t blkaddr;    /* pointing to the next blkaddr */
        u16 clusterofs;
+       u64 segavail;
 };
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE \
@@ -124,24 +125,33 @@ static int write_uncompressed_block(struct 
z_erofs_vle_compress_ctx *ctx,
 
        /* reset clusterofs to 0 if permitted */
        if (!erofs_sb_has_lz4_0padding() &&
-           ctx->head >= ctx->clusterofs) {
+           ctx->clusterofs && ctx->head >= ctx->clusterofs) {
                ctx->head -= ctx->clusterofs;
                *len += ctx->clusterofs;
+
+               ctx->segavail += ctx->clusterofs;
+               DBG_BUGON(ctx->segavail > cfg.c_compr_segsize);
+
+               DBG_BUGON(ctx->segavail < EROFS_BLKSIZ);
+               /* so only *len will be the candidate instead of segavail */
+               count = *len;
+
                ctx->clusterofs = 0;
+       } else {
+               count = min_t(u64, ctx->segavail, *len);
        }
 
-       /* write uncompressed data */
-       count = min(EROFS_BLKSIZ, *len);
+       if (count > EROFS_BLKSIZ)
+               count = EROFS_BLKSIZ;
 
+       /* fill zero if the uncompressed block isn't full */
        memcpy(dst, ctx->queue + ctx->head, count);
        memset(dst + count, 0, EROFS_BLKSIZ - count);
 
        erofs_dbg("Writing %u uncompressed data to block %u",
                  count, ctx->blkaddr);
        ret = blk_write(dst, ctx->blkaddr, 1);
-       if (ret)
-               return ret;
-       return count;
+       return ret ? ret : count;
 }
 
 static int vle_compress_one(struct erofs_inode *inode,
@@ -158,13 +168,20 @@ static int vle_compress_one(struct erofs_inode *inode,
        while (len) {
                bool raw;
 
+               if (ctx->segavail <= EROFS_BLKSIZ) {
+                       if (len < ctx->segavail && !final)
+                               break;
+
+                       goto nocompression;
+               }
+
                if (len <= EROFS_BLKSIZ) {
                        if (final)
                                goto nocompression;
                        break;
                }
 
-               count = len;
+               count = min_t(u64, len, ctx->segavail);
                ret = erofs_compress_destsize(h, compressionlevel,
                                              ctx->queue + ctx->head,
                                              &count, dst, EROFS_BLKSIZ);
@@ -195,8 +212,12 @@ nocompression:
                                return ret;
                        raw = false;
                }
-
                ctx->head += count;
+               DBG_BUGON(ctx->segavail < count);
+               ctx->segavail -= count;
+               if (!ctx->segavail)
+                       ctx->segavail = cfg.c_compr_segsize;
+
                /* write compression indexes for this blkaddr */
                vle_write_indexes(ctx, count, raw);
 
@@ -421,6 +442,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode)
        ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
        ctx.head = ctx.tail = 0;
        ctx.clusterofs = 0;
+       ctx.segavail = cfg.c_compr_segsize;
        remaining = inode->i_size;
 
        while (remaining) {
diff --git a/lib/config.c b/lib/config.c
index da0c260..9d4bea1 100644
--- a/lib/config.c
+++ b/lib/config.c
@@ -23,6 +23,7 @@ void erofs_init_configure(void)
        cfg.c_force_inodeversion = 0;
        cfg.c_inline_xattr_tolerance = 2;
        cfg.c_unix_timestamp = -1;
+       cfg.c_compr_segsize = -1;
 }
 
 void erofs_show_config(void)
diff --git a/man/mkfs.erofs.1 b/man/mkfs.erofs.1
index 891c5a8..8d0fc10 100644
--- a/man/mkfs.erofs.1
+++ b/man/mkfs.erofs.1
@@ -48,6 +48,10 @@ Forcely generate compact inodes (32-byte inodes) to output.
 Forcely generate extended inodes (64-byte inodes) to output.
 .RE
 .TP
+.BI "\-S " #
+Set maximum blocks for each individual compress segment.
+The default is 0 (disabled).
+.TP
 .BI "\-T " #
 Set all files to the given UNIX timestamp. Reproducible builds requires setting
 all to a specific one.
diff --git a/mkfs/main.c b/mkfs/main.c
index 94bf1e6..0265ae9 100644
--- a/mkfs/main.c
+++ b/mkfs/main.c
@@ -61,6 +61,7 @@ static void usage(void)
              " -x#               set xattr tolerance to # (< 0, disable 
xattrs; default 2)\n"
              " -EX[,...]         X=extended options\n"
              " -T#               set a fixed UNIX timestamp # to all files\n"
+             " -S#               Set maximum blocks for each individual 
compress segment\n"
              " --exclude-path=X  avoid including file X (X = exact literal 
path)\n"
              " --exclude-regex=X avoid including files that match X (X = 
regular expression)\n"
 #ifdef HAVE_LIBSELINUX
@@ -138,7 +139,7 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
        char *endptr;
        int opt, i;
 
-       while((opt = getopt_long(argc, argv, "d:x:z:E:T:",
+       while((opt = getopt_long(argc, argv, "d:x:z:E:T:S:",
                                 long_options, NULL)) != -1) {
                switch (opt) {
                case 'z':
@@ -188,6 +189,15 @@ static int mkfs_parse_options_cfg(int argc, char *argv[])
                                return -EINVAL;
                        }
                        break;
+               case 'S':
+                       i = strtoll(optarg, &endptr, 0);
+                       if (*endptr != '\0') {
+                               erofs_err("invalid blocks per compress segment 
%s",
+                                         optarg);
+                               return -EINVAL;
+                       }
+                       cfg.c_compr_segsize = i ? blknr_to_addr(i) : -1;
+                       break;
                case 2:
                        opt = erofs_parse_exclude_path(optarg, false);
                        if (opt) {
-- 
2.24.0



Reply via email to