If cluster has small numbers of blocks used it is reasonable to relocate such blocks regardless to inode's quality and free whole cluster.
https://jira.sw.ru/browse/PSBM-46563 Signed-off-by: Dmitry Monakhov <dmonak...@openvz.org> --- misc/e4defrag2.c | 54 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 45 insertions(+), 9 deletions(-) diff --git a/misc/e4defrag2.c b/misc/e4defrag2.c index 797a342..9206c89 100644 --- a/misc/e4defrag2.c +++ b/misc/e4defrag2.c @@ -279,6 +279,7 @@ enum spext_flags SP_FL_DIRLOCAL = 0x20, SP_FL_CSUM = 0x40, SP_FL_FMAP = 0x80, + SP_FL_TP_RELOC = 0x100, }; struct rb_fhandle @@ -383,6 +384,7 @@ struct defrag_context unsigned cluster_size; unsigned ief_reloc_cluster; unsigned weight_scale; + unsigned tp_weight_scale; unsigned extents_quality; }; @@ -1098,6 +1100,7 @@ static int scan_inode_pass3(struct defrag_context *dfx, int fd, int is_old = 0; int is_rdonly = 0; __u64 ief_blocks = 0; + __u64 tp_blocks = 0; __u32 ino_flags = 0; __u64 size_blk = dfx_sz2b(dfx, stat->st_size); __u64 used_blk = dfx_sz2b(dfx, stat->st_blocks << 9); @@ -1158,13 +1161,16 @@ static int scan_inode_pass3(struct defrag_context *dfx, int fd, } if (se->flags & SP_FL_IEF_RELOC) ief_blocks += fec->fec_map[i].len; + if (se->flags & SP_FL_TP_RELOC) + tp_blocks += fec->fec_map[i].len; + fmap_csum_ext(fec->fec_map + i, &csum); } if (fest.local_ex == fec->fec_extents) ino_flags |= SP_FL_LOCAL; - if (ief_blocks) { + if (ief_blocks || tp_blocks) { /* * Even if some extents belong to IEF cluster, it is not a good * idea to relocate the whole file. From other point of view, @@ -1182,6 +1188,13 @@ static int scan_inode_pass3(struct defrag_context *dfx, int fd, "size_blk:%lld used_blk:%lld\n", __func__, stat->st_ino, ief_blocks, size_blk, used_blk); + } else if (tp_blocks * 4 > size_blk) { + ino_flags |= SP_FL_IEF_RELOC | SP_FL_TP_RELOC; + if (debug_flag & DBG_SCAN && ief_blocks != size_blk) + printf("%s Force add %lu to IEF/TP set ief:%lld " + "size_blk:%lld used_blk:%lld\n", + __func__, stat->st_ino, ief_blocks, + size_blk, used_blk); } else if (debug_flag & DBG_SCAN) { printf("%s Reject %lu from IEF set ief:%lld " "size_blk:%lld used_blk:%lld\n", @@ -1592,6 +1605,7 @@ static void pass3_prep(struct defrag_context *dfx) unsigned good = 0; unsigned count = 0; unsigned ief_ok = 0; + unsigned force_reloc = 0; if (verbose) printf("Pass3_prep: Scan and rate cached extents\n"); @@ -1610,18 +1624,29 @@ static void pass3_prep(struct defrag_context *dfx) print_spex("\t\t\t", ex); if (prev_cluster != cluster) { - ief_ok = 0; + force_reloc = ief_ok = 0; + /* Is cluster has enough RO(good) data blocks ?*/ if (dfx->cluster_size >= used * dfx->weight_scale && - good * 1000 >= count * dfx->extents_quality && - cluster_node) { + good * 1000 >= count * dfx->extents_quality) + ief_ok = 1; + + /* Thin provision corner case: If cluster has low number + * of data blocks it should be relocated regardless to + * block's quality in order to improve space efficency */ + if (dfx->cluster_size >= used * dfx->tp_weight_scale) { + ief_ok = 1; + force_reloc = 1; + } + + if (ief_ok && cluster_node) { while (cluster_node != node) { struct spextent *se = node_to_spextent(cluster_node); - ief_ok = 1; se->flags |= SP_FL_IEF_RELOC; + if (force_reloc) + se->flags |= SP_FL_TP_RELOC; if (debug_flag & DBG_TREE) print_spex("\t\t\t->IEF", se); - ext_to_move++; blocks_to_move += se->count; cluster_node = @@ -2010,7 +2035,7 @@ static int do_iaf_defrag_one(struct defrag_context *dfx, int dirfd, const char * } if (st2.st_ino != stat->st_ino) { - if (debug_flag & DBG_RT) + if (debug_flag & DBG_RT) fprintf(stderr, "%s: Race while reopen\n", __func__); goto out_fd; } @@ -2183,6 +2208,7 @@ static int ief_defrag_group(struct defrag_context *dfx, dgrp_t idx) struct donor_info donor; struct group_info * group = dfx->group[idx]; __u64 blocks; + int force_local = 0; /* * Prepare stage * Walk inodes in block order in order to warm up the page cache @@ -2278,6 +2304,7 @@ static int ief_defrag_group(struct defrag_context *dfx, dgrp_t idx) donor.offset = 0; next_cluster: blocks = 0; + force_local = dfx->ief_force_local; /* Divide inodes in to reallocation clusters */ for (rfh = group->next; rfh != NULL; rfh = rfh->next) { assert(!(rfh->flags & SP_FL_IGNORE)); @@ -2286,6 +2313,8 @@ next_cluster: break; blocks += rfh->fec->fec_map[rfh->fec->fec_extents -1].lblk + rfh->fec->fec_map[rfh->fec->fec_extents -1].len; + if (rfh->flags & SP_FL_TP_RELOC) + force_local = 0; } prev = rfh; @@ -2293,7 +2322,7 @@ next_cluster: if (!blocks) return 0; - ret = prepare_donor(dfx, idx, &donor, blocks, dfx->ief_force_local, 2); + ret = prepare_donor(dfx, idx, &donor, blocks, force_local, 2); if (ret) { if (debug_flag & DBG_SCAN) fprintf(stderr, "%s group:%u Can not allocate donor" @@ -2415,6 +2444,7 @@ static void usage(void) fprintf(stderr, "\t-m: dump fs and memory statistics at the end\n"); fprintf(stderr, "\t-n: dry run\n"); fprintf(stderr, "\t-s: scale factor\n"); + fprintf(stderr, "\t-S: thin provision scale factor\n"); fprintf(stderr, "\t-q: defragmentation quality factor\n"); fprintf(stderr, "\t-t: interpret inodes modified earlier than N seconds ago as RO files\n"); fprintf(stderr, "\t-T: same as '-t' but use an absolute value\n"); @@ -2432,6 +2462,7 @@ int main(int argc, char *argv[]) int cluster_size = 1 << 20; int reloc_cluster_size = 0; int scale = 2; + int tp_scale = 32; /* 1/32 ==> 3% */ int quality = 700; dgrp_t nr_grp; int flex_bg = 0; @@ -2440,7 +2471,7 @@ int main(int argc, char *argv[]) add_error_table(&et_ext2_error_table); gettimeofday(&time_start, 0); - while ((c = getopt(argc, argv, "a:C:c:d:fF:hlmnt:s:T:vq:")) != EOF) { + while ((c = getopt(argc, argv, "a:C:c:d:fF:hlmnt:s:S:T:vq:")) != EOF) { switch (c) { case 'a': min_frag_size = strtoul(optarg, &end, 0); @@ -2486,6 +2517,10 @@ int main(int argc, char *argv[]) scale = strtoul(optarg, &end, 0); break; + case 'S': + tp_scale = strtoul(optarg, &end, 0); + break; + case 'q': quality = strtoul(optarg, &end, 0); if (quality > 1000) @@ -2557,6 +2592,7 @@ int main(int argc, char *argv[]) dfx.iaf_cluster_size = min_frag_size >> dfx.blocksize_bits; dfx.weight_scale = scale; + dfx.tp_weight_scale = tp_scale; dfx.extents_quality = quality; dfx.ro_fs = dry_run; dfx.sp_root = RB_ROOT; -- 1.7.1 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel