Many use cases benefit from concentrating inode metadata, such as image filesystems primarily accessed over a network (e.g., EROFS native full container images). Otherwise, scattered on-disk inodes increase network access overhead and make metadata prefetching (so that systems won't be stuck by metadata I/Os due to network failures, for example) difficult to implement.
Usage: `--ZI` or `--ZI=1` Enable inode meta zone; `--ZI=0` Disable inode meta zone (default). Closes: https://lore.kernel.org/r/[email protected] Cc: Hongbo Li <[email protected]> Signed-off-by: Gao Xiang <[email protected]> --- include/erofs/internal.h | 12 ++-- lib/importer.c | 10 ++- lib/inode.c | 46 ++++++++------ lib/liberofs_metabox.h | 14 ++++- lib/metabox.c | 127 ++++++++++++++++++++++++++++++--------- lib/super.c | 10 ++- lib/xattr.c | 2 +- mkfs/main.c | 12 +++- 8 files changed, 168 insertions(+), 65 deletions(-) diff --git a/include/erofs/internal.h b/include/erofs/internal.h index 610650138bee..62594b877151 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -97,7 +97,7 @@ struct erofs_sb_info { u64 total_blocks; u64 primarydevice_blocks; - u32 meta_blkaddr; + s32 meta_blkaddr; u32 xattr_blkaddr; u32 feature_compat; @@ -150,7 +150,7 @@ struct erofs_sb_info { struct erofs_bufmgr *bmgr; struct erofs_xattrmgr *xamgr; struct z_erofs_mgr *zmgr; - struct erofs_metaboxmgr *m2gr; + struct erofs_metamgr *m2gr, *mxgr; struct erofs_packed_inode *packedinode; struct erofs_buffer_head *bh_sb; struct erofs_buffer_head *bh_devt; @@ -309,8 +309,8 @@ static inline bool erofs_inode_in_metabox(struct erofs_inode *inode) static inline erofs_off_t erofs_iloc(struct erofs_inode *inode) { struct erofs_sb_info *sbi = inode->sbi; - erofs_off_t base = erofs_inode_in_metabox(inode) ? 0 : - erofs_pos(sbi, sbi->meta_blkaddr); + s64 base = erofs_inode_in_metabox(inode) ? 0 : + (s64)erofs_pos(sbi, sbi->meta_blkaddr); return base + ((inode->nid & EROFS_DIRENT_NID_MASK) << EROFS_ISLOTBITS); } @@ -434,8 +434,8 @@ int erofs_mkfs_init_devices(struct erofs_sb_info *sbi, unsigned int devices); int erofs_write_device_table(struct erofs_sb_info *sbi); int erofs_enable_sb_chksum(struct erofs_sb_info *sbi, u32 *crc); int erofs_superblock_csum_verify(struct erofs_sb_info *sbi); -int erofs_mkfs_format_fs(struct erofs_sb_info *sbi, - unsigned int blkszbits, unsigned int dsunit); +int erofs_mkfs_format_fs(struct erofs_sb_info *sbi, unsigned int blkszbits, + unsigned int dsunit, bool metazone); int erofs_mkfs_load_fs(struct erofs_sb_info *sbi, unsigned int dsunit); /* namei.c */ diff --git a/lib/importer.c b/lib/importer.c index c73dde2529b7..958a433b9eaa 100644 --- a/lib/importer.c +++ b/lib/importer.c @@ -69,8 +69,8 @@ int erofs_importer_init(struct erofs_importer *im) goto out_err; } - subsys = "metabox"; - err = erofs_metabox_init(sbi); + subsys = "metadata"; + err = erofs_metadata_init(sbi); if (err) goto out_err; @@ -107,6 +107,10 @@ int erofs_importer_flush_all(struct erofs_importer *im) if (err) return err; + err = erofs_metazone_flush(sbi); + if (err) + return err; + fsalignblks = im->params->fsalignblks ? roundup_pow_of_two(im->params->fsalignblks) : 1; sbi->primarydevice_blocks = roundup(erofs_mapbh(sbi->bmgr, NULL), @@ -128,6 +132,6 @@ void erofs_importer_exit(struct erofs_importer *im) struct erofs_sb_info *sbi = im->sbi; z_erofs_dedupe_ext_exit(); - erofs_metabox_exit(sbi); + erofs_metadata_exit(sbi); erofs_packedfile_exit(sbi); } diff --git a/lib/inode.c b/lib/inode.c index 09b2e507c609..64f6bc34610f 100644 --- a/lib/inode.c +++ b/lib/inode.c @@ -376,18 +376,19 @@ erofs_nid_t erofs_lookupnid(struct erofs_inode *inode) { struct erofs_buffer_head *const bh = inode->bh; struct erofs_sb_info *sbi = inode->sbi; - erofs_off_t off, meta_offset; + erofs_off_t off; + s64 meta_offset; erofs_nid_t nid; if (bh && inode->nid == EROFS_NID_UNALLOCATED) { erofs_mapbh(NULL, bh->block); off = erofs_btell(bh, false); - if (!inode->in_metabox) { - meta_offset = erofs_pos(sbi, sbi->meta_blkaddr); - DBG_BUGON(off < meta_offset); - } else { + if (inode->in_metabox) { meta_offset = 0; + } else { + meta_offset = (s64)erofs_pos(sbi, sbi->meta_blkaddr); + DBG_BUGON(off < meta_offset && !sbi->m2gr); } nid = (off - meta_offset) >> EROFS_ISLOTBITS; @@ -718,8 +719,8 @@ int erofs_iflush(struct erofs_inode *inode) struct erofs_sb_info *sbi = inode->sbi; struct erofs_buffer_head *bh = inode->bh; erofs_off_t off = erofs_iloc(inode); - struct erofs_bufmgr *ibmgr = inode->in_metabox ? - erofs_metabox_bmgr(sbi) : sbi->bmgr; + struct erofs_bufmgr *ibmgr = + erofs_metadata_bmgr(sbi, inode->in_metabox) ?: sbi->bmgr; union { struct erofs_inode_compact dic; struct erofs_inode_extended die; @@ -921,12 +922,9 @@ static int erofs_prepare_inode_buffer(struct erofs_importer *im, if (inode->extent_isize) inodesize = roundup(inodesize, 8) + inode->extent_isize; - if (!erofs_is_special_identifier(inode->i_srcpath) && - erofs_metabox_bmgr(sbi)) + if (!erofs_is_special_identifier(inode->i_srcpath) && sbi->mxgr) inode->in_metabox = true; - - if (inode->in_metabox) - ibmgr = erofs_metabox_bmgr(sbi) ?: sbi->bmgr; + ibmgr = erofs_metadata_bmgr(sbi, inode->in_metabox) ?: sbi->bmgr; if (inode->datalayout == EROFS_INODE_FLAT_PLAIN) goto noinline; @@ -1000,8 +998,8 @@ static int erofs_bh_flush_write_inline(struct erofs_buffer_head *bh) { struct erofs_inode *const inode = bh->fsprivate; struct erofs_sb_info *sbi = inode->sbi; - struct erofs_bufmgr *ibmgr = inode->in_metabox ? - erofs_metabox_bmgr(sbi) : sbi->bmgr; + struct erofs_bufmgr *ibmgr = + erofs_metadata_bmgr(sbi, inode->in_metabox) ?: sbi->bmgr; const erofs_off_t off = erofs_btell(bh, false); int ret; @@ -1360,21 +1358,29 @@ static void erofs_fixup_meta_blkaddr(struct erofs_inode *root) const erofs_off_t rootnid_maxoffset = 0xffff << EROFS_ISLOTBITS; struct erofs_buffer_head *const bh = root->bh; struct erofs_sb_info *sbi = root->sbi; - erofs_off_t meta_offset = 0; + int bsz = erofs_blksiz(sbi); + int meta_offset; erofs_off_t off; erofs_mapbh(NULL, bh->block); off = erofs_btell(bh, false); - if (!root->in_metabox && off > rootnid_maxoffset) - meta_offset = round_up(off - rootnid_maxoffset, - erofs_blksiz(sbi)); - else if (root->in_metabox && !erofs_sb_has_48bit(sbi)) { + if (!root->in_metabox) { + if (!off) { + DBG_BUGON(!sbi->m2gr); + DBG_BUGON(sbi->meta_blkaddr != -1); + meta_offset = -bsz; /* avoid NID 0 */ + } else if (off > rootnid_maxoffset) { + meta_offset = round_up(off - rootnid_maxoffset, bsz); + sbi->meta_blkaddr = erofs_blknr(sbi, meta_offset); + } else { + meta_offset = 0; + } + } else if (!erofs_sb_has_48bit(sbi)) { sbi->build_time = sbi->epoch; sbi->epoch = max_t(s64, 0, (s64)sbi->build_time - UINT32_MAX); sbi->build_time -= sbi->epoch; erofs_sb_set_48bit(sbi); } - sbi->meta_blkaddr = erofs_blknr(sbi, meta_offset); root->nid = ((off - meta_offset) >> EROFS_ISLOTBITS) | ((u64)root->in_metabox << EROFS_DIRENT_NID_METABOX_BIT); } diff --git a/lib/liberofs_metabox.h b/lib/liberofs_metabox.h index d8896c01c298..bf4051cf18e2 100644 --- a/lib/liberofs_metabox.h +++ b/lib/liberofs_metabox.h @@ -4,6 +4,8 @@ #include "erofs/internal.h" +#define EROFS_META_NEW_ADDR ((u32)-1ULL) + extern const char *erofs_metabox_identifier; #define EROFS_METABOX_INODE erofs_metabox_identifier @@ -12,11 +14,17 @@ static inline bool erofs_is_metabox_inode(struct erofs_inode *inode) return inode->i_srcpath == EROFS_METABOX_INODE; } +static inline bool erofs_has_meta_zone(struct erofs_sb_info *sbi) +{ + return sbi->m2gr || sbi->meta_blkaddr == EROFS_META_NEW_ADDR; +} + struct erofs_importer; -void erofs_metabox_exit(struct erofs_sb_info *sbi); -int erofs_metabox_init(struct erofs_sb_info *sbi); -struct erofs_bufmgr *erofs_metabox_bmgr(struct erofs_sb_info *sbi); +void erofs_metadata_exit(struct erofs_sb_info *sbi); +int erofs_metadata_init(struct erofs_sb_info *sbi); +struct erofs_bufmgr *erofs_metadata_bmgr(struct erofs_sb_info *sbi, bool mbox); int erofs_metabox_iflush(struct erofs_importer *im); +int erofs_metazone_flush(struct erofs_sb_info *sbi); #endif diff --git a/lib/metabox.c b/lib/metabox.c index bf188f6db0f5..37267ddb73cf 100644 --- a/lib/metabox.c +++ b/lib/metabox.c @@ -2,81 +2,152 @@ #include <stdlib.h> #include "erofs/inode.h" #include "erofs/importer.h" +#include "erofs/print.h" #include "liberofs_cache.h" #include "liberofs_private.h" #include "liberofs_metabox.h" const char *erofs_metabox_identifier = "metabox"; -struct erofs_metaboxmgr { +struct erofs_metamgr { struct erofs_vfile vf; struct erofs_bufmgr *bmgr; }; -void erofs_metabox_exit(struct erofs_sb_info *sbi) +static void erofs_metamgr_exit(struct erofs_metamgr *m2gr) { - struct erofs_metaboxmgr *m2gr = sbi->m2gr; - - if (!m2gr) - return; DBG_BUGON(!m2gr->bmgr); erofs_buffer_exit(m2gr->bmgr); erofs_io_close(&m2gr->vf); free(m2gr); } - -int erofs_metabox_init(struct erofs_sb_info *sbi) +static int erofs_metamgr_init(struct erofs_sb_info *sbi, + struct erofs_metamgr *m2gr) { - struct erofs_metaboxmgr *m2gr; int ret; - if (!erofs_sb_has_metabox(sbi)) - return 0; - - m2gr = malloc(sizeof(*m2gr)); - if (!m2gr) - return -ENOMEM; - ret = erofs_tmpfile(); if (ret < 0) - goto out_err; + return ret; m2gr->vf = (struct erofs_vfile){ .fd = ret }; m2gr->bmgr = erofs_buffer_init(sbi, 0, &m2gr->vf); - if (m2gr->bmgr) { + if (!m2gr->bmgr) + return -ENOMEM; + return 0; +} + +void erofs_metadata_exit(struct erofs_sb_info *sbi) +{ + if (sbi->m2gr) { + erofs_metamgr_exit(sbi->m2gr); + sbi->m2gr = NULL; + } + if (sbi->mxgr) { + erofs_metamgr_exit(sbi->mxgr); + sbi->mxgr = NULL; + } +} + +int erofs_metadata_init(struct erofs_sb_info *sbi) +{ + struct erofs_metamgr *m2gr; + int ret; + + if (!sbi->m2gr && sbi->meta_blkaddr == EROFS_META_NEW_ADDR) { + m2gr = malloc(sizeof(*m2gr)); + if (!m2gr) + return -ENOMEM; + ret = erofs_metamgr_init(sbi, m2gr); + if (ret) + goto err_free; sbi->m2gr = m2gr; - return 0; } - ret = -ENOMEM; -out_err: + + if (!sbi->mxgr && erofs_sb_has_metabox(sbi)) { + m2gr = malloc(sizeof(*m2gr)); + if (!m2gr) + return -ENOMEM; + ret = erofs_metamgr_init(sbi, m2gr); + if (ret) + goto err_free; + sbi->mxgr = m2gr; + } + return 0; +err_free: free(m2gr); return ret; } -struct erofs_bufmgr *erofs_metabox_bmgr(struct erofs_sb_info *sbi) +struct erofs_bufmgr *erofs_metadata_bmgr(struct erofs_sb_info *sbi, bool mbox) { - return sbi->m2gr ? sbi->m2gr->bmgr : NULL; + if (mbox) { + if (sbi->mxgr) + return sbi->mxgr->bmgr; + } else if (sbi->m2gr) { + return sbi->m2gr->bmgr; + } + return NULL; } int erofs_metabox_iflush(struct erofs_importer *im) { struct erofs_sb_info *sbi = im->sbi; - struct erofs_metaboxmgr *m2gr = sbi->m2gr; + struct erofs_metamgr *mxgr = sbi->mxgr; struct erofs_inode *inode; int err; - if (!m2gr || !erofs_sb_has_metabox(sbi)) + if (!mxgr || !erofs_sb_has_metabox(sbi)) return -EINVAL; - err = erofs_bflush(m2gr->bmgr, NULL); + err = erofs_bflush(mxgr->bmgr, NULL); if (err) return err; - if (erofs_io_lseek(&m2gr->vf, 0, SEEK_END) <= 0) + if (erofs_io_lseek(&mxgr->vf, 0, SEEK_END) <= 0) return 0; - inode = erofs_mkfs_build_special_from_fd(im, m2gr->vf.fd, + inode = erofs_mkfs_build_special_from_fd(im, mxgr->vf.fd, EROFS_METABOX_INODE); sbi->metabox_nid = erofs_lookupnid(inode); erofs_iput(inode); return 0; } + +int erofs_metazone_flush(struct erofs_sb_info *sbi) +{ + struct erofs_metamgr *m2gr = sbi->m2gr; + struct erofs_buffer_head *bh; + struct erofs_bufmgr *m2bgr; + erofs_blk_t meta_blkaddr; + u64 length, pos_out; + int ret, count; + + if (!m2gr) + return 0; + m2bgr = m2gr->bmgr; + + ret = erofs_bflush(m2bgr, NULL); + if (ret) + return ret; + + length = erofs_mapbh(m2bgr, NULL) << sbi->blkszbits; + bh = erofs_balloc(sbi->bmgr, DATA, length, 0); + if (!bh) + return PTR_ERR(bh); + + erofs_mapbh(NULL, bh->block); + pos_out = erofs_btell(bh, false); + meta_blkaddr = pos_out >> sbi->blkszbits; + do { + count = min_t(erofs_off_t, length, INT_MAX); + ret = erofs_io_xcopy(sbi->bmgr->vf, pos_out, + &m2gr->vf, count, false); + if (ret < 0) + break; + pos_out += count; + } while (length -= count); + bh->op = &erofs_drop_directly_bhops; + erofs_bdrop(bh, false); + sbi->meta_blkaddr += meta_blkaddr; + return 0; +} diff --git a/lib/super.c b/lib/super.c index 9760265aa754..d626c7cdc76f 100644 --- a/lib/super.c +++ b/lib/super.c @@ -8,6 +8,7 @@ #include "erofs/xattr.h" #include "liberofs_cache.h" #include "liberofs_compress.h" +#include "liberofs_metabox.h" static bool check_layout_compatibility(struct erofs_sb_info *sbi, struct erofs_super_block *dsb) @@ -418,8 +419,8 @@ out: return 0; } -int erofs_mkfs_format_fs(struct erofs_sb_info *sbi, - unsigned int blkszbits, unsigned int dsunit) +int erofs_mkfs_format_fs(struct erofs_sb_info *sbi, unsigned int blkszbits, + unsigned int dsunit, bool metazone) { struct erofs_buffer_head *bh; struct erofs_bufmgr *bmgr; @@ -430,7 +431,10 @@ int erofs_mkfs_format_fs(struct erofs_sb_info *sbi, return -ENOMEM; sbi->bmgr = bmgr; bmgr->dsunit = dsunit; - + if (metazone) + sbi->meta_blkaddr = EROFS_META_NEW_ADDR; + else + sbi->meta_blkaddr = 0; bh = erofs_reserve_sb(bmgr); if (IS_ERR(bh)) return PTR_ERR(bh); diff --git a/lib/xattr.c b/lib/xattr.c index fc22c817f136..8f0332b44a02 100644 --- a/lib/xattr.c +++ b/lib/xattr.c @@ -828,7 +828,7 @@ int erofs_xattr_flush_name_prefixes(struct erofs_importer *im, bool plain) if (!plain) { if (erofs_sb_has_metabox(sbi)) { - bmgr = erofs_metabox_bmgr(sbi); + bmgr = erofs_metadata_bmgr(sbi, true); vf = bmgr->vf; } else if (may_fragments) { erofs_sb_set_fragments(sbi); diff --git a/mkfs/main.c b/mkfs/main.c index 4de298b6dedd..76bf84348364 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -101,6 +101,7 @@ static struct option long_options[] = { {"oci", optional_argument, NULL, 534}, #endif {"zD", optional_argument, NULL, 536}, + {"ZI", optional_argument, NULL, 537}, {0, 0, 0, 0}, }; @@ -176,6 +177,7 @@ static void usage(int argc, char **argv) " --mkfs-time the timestamp is applied as build time only\n" " -UX use a given filesystem UUID\n" " --zD[=<0|1>] specify directory compression: 0=disable [default], 1=enable\n" + " --ZI[=<0|1>] specify the separate inode metadata zone availability: 0=disable [default], 1=enable\n" " --all-root make all files owned by root\n" #ifdef EROFS_MT_ENABLED " --async-queue-limit=# specify the maximum number of entries in the multi-threaded job queue\n" @@ -269,6 +271,7 @@ static void version(void) static struct erofsmkfs_cfg { /* < 0, xattr disabled and >= INT_MAX, always use inline xattrs */ long inlinexattr_tolerance; + bool inode_metazone; } mkfscfg = { .inlinexattr_tolerance = 2, }; @@ -1412,6 +1415,12 @@ static int mkfs_parse_options_cfg(struct erofs_importer_params *params, else params->compress_dir = false; break; + case 537: + if (!optarg || strcmp(optarg, "1")) + mkfscfg.inode_metazone = true; + else + mkfscfg.inode_metazone = false; + break; case 'V': version(); exit(0); @@ -1787,7 +1796,8 @@ int main(int argc, char **argv) } if (!incremental_mode) - err = erofs_mkfs_format_fs(&g_sbi, mkfs_blkszbits, dsunit); + err = erofs_mkfs_format_fs(&g_sbi, mkfs_blkszbits, dsunit, + mkfscfg.inode_metazone); else err = erofs_mkfs_load_fs(&g_sbi, dsunit); if (err) -- 2.43.5
