Re: [PATCH 9/9] nilfs2: prevent starvation of segments protected by snapshots

2015-03-13 Thread Ryusuke Konishi
On Tue, 24 Feb 2015 20:01:44 +0100, Andreas Rohner wrote:
 It doesn't really matter if the number of reclaimable blocks for a
 segment is inaccurate, as long as the overall performance is better than
 the simple timestamp algorithm and starvation is prevented.
 
 The following steps will lead to starvation of a segment:
 
 1. The segment is written
 2. A snapshot is created
 3. The files in the segment are deleted and the number of live
blocks for the segment is decremented to a very low value
 4. The GC tries to free the segment, but there are no reclaimable
blocks, because they are all protected by the snapshot. To prevent an
infinite loop the GC has to adjust the number of live blocks to the
correct value.
 5. The snapshot is converted to a checkpoint and the blocks in the
segment are now reclaimable.
 6. The GC will never attemt to clean the segment again, because of it
incorrectly shows up as having a high number of live blocks.
 
 To prevent this, the already existing padding field of the SUFILE entry
 is used to track the number of snapshot blocks in the segment. This
 number is only set by the GC, since it collects the necessary
 information anyway. So there is no need, to track which block belongs to
 which segment. In step 4 of the list above the GC will set the new field
 su_nsnapshot_blks. In step 5 all entries in the SUFILE are checked and
 entries with a big su_nsnapshot_blks field get their su_nlive_blks field
 reduced.
 
 Signed-off-by: Andreas Rohner andreas.roh...@gmx.net
 ---
  fs/nilfs2/cpfile.c|   5 ++
  fs/nilfs2/segbuf.c|   1 +
  fs/nilfs2/segbuf.h|   1 +
  fs/nilfs2/segment.c   |   7 ++-
  fs/nilfs2/sufile.c| 114 
 ++
  fs/nilfs2/sufile.h|   4 +-
  fs/nilfs2/the_nilfs.h |   7 +++
  include/linux/nilfs2_fs.h |  12 +++--
  8 files changed, 136 insertions(+), 15 deletions(-)
 
 diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
 index 0d58075..6b61fd7 100644
 --- a/fs/nilfs2/cpfile.c
 +++ b/fs/nilfs2/cpfile.c
 @@ -28,6 +28,7 @@
  #include linux/nilfs2_fs.h
  #include mdt.h
  #include cpfile.h
 +#include sufile.h
  
  
  static inline unsigned long
 @@ -703,6 +704,7 @@ static int nilfs_cpfile_clear_snapshot(struct inode 
 *cpfile, __u64 cno)
   struct nilfs_cpfile_header *header;
   struct nilfs_checkpoint *cp;
   struct nilfs_snapshot_list *list;
 + struct the_nilfs *nilfs = cpfile-i_sb-s_fs_info;
   __u64 next, prev;
   void *kaddr;
   int ret;
 @@ -784,6 +786,9 @@ static int nilfs_cpfile_clear_snapshot(struct inode 
 *cpfile, __u64 cno)
   mark_buffer_dirty(header_bh);
   nilfs_mdt_mark_dirty(cpfile);
  
 + if (nilfs_feature_track_snapshots(nilfs))
 + nilfs_sufile_fix_starving_segs(nilfs-ns_sufile);
 +
   brelse(prev_bh);
  
   out_next:
 diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
 index bbd807b..a98c576 100644
 --- a/fs/nilfs2/segbuf.c
 +++ b/fs/nilfs2/segbuf.c
 @@ -59,6 +59,7 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct 
 super_block *sb)
   segbuf-sb_super_root = NULL;
   segbuf-sb_nlive_blks_added = 0;
   segbuf-sb_nlive_blks_diff = 0;
 + segbuf-sb_nsnapshot_blks = 0;
  
   init_completion(segbuf-sb_bio_event);
   atomic_set(segbuf-sb_err, 0);
 diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
 index 4e994f7..7a462c4 100644
 --- a/fs/nilfs2/segbuf.h
 +++ b/fs/nilfs2/segbuf.h
 @@ -85,6 +85,7 @@ struct nilfs_segment_buffer {
   unsignedsb_rest_blocks;
   __u32   sb_nlive_blks_added;
   __s64   sb_nlive_blks_diff;
 + __u32   sb_nsnapshot_blks;
  
   /* Buffers */
   struct list_headsb_segsum_buffers;
 diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
 index 16c7c36..b976198 100644
 --- a/fs/nilfs2/segment.c
 +++ b/fs/nilfs2/segment.c
 @@ -1381,6 +1381,7 @@ static void nilfs_segctor_update_segusage(struct 
 nilfs_sc_info *sci,
   (segbuf-sb_pseg_start - segbuf-sb_fseg_start);
   ret = nilfs_sufile_set_segment_usage(sufile, segbuf-sb_segnum,
live_blocks,
 +  segbuf-sb_nsnapshot_blks,
sci-sc_seg_ctime);
   WARN_ON(ret); /* always succeed because the segusage is dirty */
  
 @@ -1405,7 +1406,7 @@ static void nilfs_cancel_segusage(struct list_head 
 *logs,
   segbuf = NILFS_FIRST_SEGBUF(logs);
   ret = nilfs_sufile_set_segment_usage(sufile, segbuf-sb_segnum,
segbuf-sb_pseg_start -
 -  segbuf-sb_fseg_start, 0);
 +  segbuf-sb_fseg_start, 0, 0);
   WARN_ON(ret); /* always succeed because the segusage is dirty */
  
   if 

Re: [PATCH 5/9] nilfs2: add simple tracking of block deletions and updates

2015-03-13 Thread Ryusuke Konishi
On Tue, 24 Feb 2015 20:01:40 +0100, Andreas Rohner wrote:
 This patch adds simple tracking of block deletions and updates for
 all files except the DAT- and the SUFILE-Metadatafiles. It uses the
 fact, that for every block, NILFS2 keeps an entry in the DAT-File
 and stores the checkpoint where it was created and deleted or
 overwritten. So whenever a block is deleted or overwritten
 nilfs_dat_commit_end() is called to update the DAT-Entry. At this
 point this patch simply decrements the su_nlive_blks field of the
 corresponding segment. The value of su_nlive_blks is set at segment
 creation time.
 
 The blocks of the DAT-File cannot be counted this way, because it
 does not contain any entries about itself, so the function
 nilfs_dat_commit_end() is not called when its blocks are deleted or
 overwritten.
 
 The SUFILE cannot be counted this way, because it would lead to a
 deadlock. When nilfs_dat_commit_end() is called, the bmap-b_sem is
 held by code way up the call chain. To decrement the SUFILE entry
 the same semaphore has to be aquired. So if the DAT-Entry belongs to
 the SUFILE both semaphores are the same and a deadlock will occur.
 But it works for any other file. So by excluding the SUFILE from
 being counted by the extra parameter count_blocks a deadlock can be
 avoided.
 
 With the above changes the code does not pass the lock dependency
 checks of the kernel, because all the locks have the same class and
 the order in which the locks are taken is different. Usually it is:
 
 1. down_write(NILFS_MDT(sufile)-mi_sem);
 2. down_write(bmap-b_sem);
 
 Now it can also be reversed, which leads to failed checks:
 
 1. down_write(bmap-b_sem); /* lock of a file other than SUFILE */
 2. down_write(NILFS_MDT(sufile)-mi_sem);
 
 But this is safe as long as the first lock down_write(bmap-b_sem)
 doesn't belong to the SUFILE.
 
 It is also possible, that two bmap-b_sem locks have to be taken at
 the same time:
 
 1. down_write(bmap-b_sem); /* lock of a file other than SUFILE */
 2. down_write(bmap-b_sem); /* lock of SUFILE */
 
 Since bmap-b_sem of normal files and the bmap-b_sem of the
 SUFILE have the same lock class, the above behavior would also lead
 to a warning.
 
 Because of this, it is necessary to introduce two new lock classes
 for the SUFILE. So the bmap-b_sem of the SUFILE gets its own lock
 class and the NILFS_MDT(sufile)-mi_sem as well.
 
 A new feature compatibility flag
 NILFS_FEATURE_COMPAT_TRACK_LIVE_BLKS was added, so that the new
 features introduced by this patch can be enabled or disabled at any
 time.
 
 Signed-off-by: Andreas Rohner andreas.roh...@gmx.net
 ---
  fs/nilfs2/bmap.c  |  8 +++-
  fs/nilfs2/bmap.h  |  5 +++--
  fs/nilfs2/btree.c |  4 +++-
  fs/nilfs2/dat.c   | 25 -
  fs/nilfs2/dat.h   |  7 +--
  fs/nilfs2/direct.c|  4 +++-
  fs/nilfs2/mdt.c   |  5 -
  fs/nilfs2/segbuf.c|  1 +
  fs/nilfs2/segbuf.h|  1 +
  fs/nilfs2/segment.c   | 25 +
  fs/nilfs2/the_nilfs.c |  4 
  fs/nilfs2/the_nilfs.h | 16 
  include/linux/nilfs2_fs.h |  4 +++-
  13 files changed, 91 insertions(+), 18 deletions(-)
 
 diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
 index aadbd0b..ecd62ba 100644
 --- a/fs/nilfs2/bmap.c
 +++ b/fs/nilfs2/bmap.c
 @@ -467,6 +467,7 @@ __u64 nilfs_bmap_find_target_in_group(const struct 
 nilfs_bmap *bmap)
  
  static struct lock_class_key nilfs_bmap_dat_lock_key;
  static struct lock_class_key nilfs_bmap_mdt_lock_key;
 +static struct lock_class_key nilfs_bmap_sufile_lock_key;
  
  /**
   * nilfs_bmap_read - read a bmap from an inode
 @@ -498,12 +499,17 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct 
 nilfs_inode *raw_inode)
   lockdep_set_class(bmap-b_sem, nilfs_bmap_dat_lock_key);
   break;
   case NILFS_CPFILE_INO:
 - case NILFS_SUFILE_INO:
   bmap-b_ptr_type = NILFS_BMAP_PTR_VS;
   bmap-b_last_allocated_key = 0;
   bmap-b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
   lockdep_set_class(bmap-b_sem, nilfs_bmap_mdt_lock_key);
   break;
 + case NILFS_SUFILE_INO:
 + bmap-b_ptr_type = NILFS_BMAP_PTR_VS;
 + bmap-b_last_allocated_key = 0;
 + bmap-b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
 + lockdep_set_class(bmap-b_sem, nilfs_bmap_sufile_lock_key);
 + break;
   case NILFS_IFILE_INO:
   lockdep_set_class(bmap-b_sem, nilfs_bmap_mdt_lock_key);
   /* Fall through */
 diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
 index b89e680..718c814 100644
 --- a/fs/nilfs2/bmap.h
 +++ b/fs/nilfs2/bmap.h
 @@ -222,8 +222,9 @@ static inline void nilfs_bmap_commit_end_ptr(struct 
 nilfs_bmap *bmap,
struct inode *dat)
  {
   if (dat)
 - nilfs_dat_commit_end(dat, req-bpr_req,
 -

Re: [PATCH 3/9] nilfs2: extend SUFILE on-disk format to enable counting of live blocks

2015-03-13 Thread Ryusuke Konishi
On Tue, 24 Feb 2015 20:01:38 +0100, Andreas Rohner wrote:
 *buf,
   int cleansi, cleansu, dirtysi, dirtysu;
   long ncleaned = 0, ndirtied = 0;
   int ret = 0;
 + bool sup_ext = (supsz = NILFS_EXT_SUINFO_UPDATE_SIZE);
 + bool su_ext = nilfs_sufile_ext_supported(sufile);
  
   if (unlikely(nsup == 0))
   return ret;
 @@ -926,6 +949,9 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, 
 void *buf,
   (~0UL  __NR_NILFS_SUINFO_UPDATE_FIELDS))
   || (nilfs_suinfo_update_nblocks(sup) 
   sup-sup_sui.sui_nblocks 
 + nilfs-ns_blocks_per_segment)
 + || (nilfs_suinfo_update_nlive_blks(sup)  sup_ext 
 + sup-sup_sui.sui_nlive_blks 
   nilfs-ns_blocks_per_segment))
   return -EINVAL;
   }
 @@ -953,6 +979,14 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, 
 void *buf,
   if (nilfs_suinfo_update_nblocks(sup))
   su-su_nblocks = cpu_to_le32(sup-sup_sui.sui_nblocks);
  
 + if (nilfs_suinfo_update_nlive_blks(sup)  sup_ext  su_ext)
 + su-su_nlive_blks =
 + cpu_to_le32(sup-sup_sui.sui_nlive_blks);
 +
 + if (nilfs_suinfo_update_nlive_lastmod(sup)  sup_ext  su_ext)
 + su-su_nlive_lastmod =
 + cpu_to_le64(sup-sup_sui.sui_nlive_lastmod);
 +
   if (nilfs_suinfo_update_flags(sup)) {
   /*
* Active flag is a virtual flag projected by running
 diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
 index c446325..d56498b 100644
 --- a/fs/nilfs2/sufile.h
 +++ b/fs/nilfs2/sufile.h
 @@ -28,6 +28,11 @@
  #include linux/nilfs2_fs.h
  #include mdt.h
  
 +static inline int
 +nilfs_sufile_ext_supported(const struct inode *sufile)
 +{
 + return NILFS_MDT(sufile)-mi_entry_size = NILFS_EXT_SEGMENT_USAGE_SIZE;
 +}
  
  static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
  {
 diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
 index ff3fea3..5d83c55 100644
 --- a/include/linux/nilfs2_fs.h
 +++ b/include/linux/nilfs2_fs.h
 @@ -220,9 +220,11 @@ struct nilfs_super_block {
   * If there is a bit set in the incompatible feature set that the kernel
   * doesn't know about, it should refuse to mount the filesystem.
   */
 -#define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT  0x0001ULL
 +#define NILFS_FEATURE_COMPAT_SUFILE_EXTENSION(1ULL  0)

This feature name is not good.  sufile can be extended more in a future.
You should name it based on the meaning of the extension of this time.

As I mentioned in another patch, I think this could be unified to the
TRACK_LIVE_BLKS feature that a later patch adds since the live block
counting of this patchset is inherently depending on the extention of
sufile.

  
 -#define NILFS_FEATURE_COMPAT_SUPP0ULL
 +#define NILFS_FEATURE_COMPAT_RO_BLOCK_COUNT  (1ULL  0)
 +

Regards,
Ryusuke Konishi
--
To unsubscribe from this list: send the line unsubscribe linux-nilfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/9] nilfs2: use modification cache to improve performance

2015-03-13 Thread Ryusuke Konishi
On Tue, 24 Feb 2015 20:01:41 +0100, Andreas Rohner wrote:
 This patch adds a small cache to accumulate the small decrements of
 the number of live blocks in a segment usage entry. If for example a
 large file is deleted, the segment usage entry has to be updated for
 every single block. But for every decrement, a MDT write lock has to
 be aquired, which blocks the entire SUFILE and effectively turns
 this lock into a global lock for the whole file system.
 
 The cache tries to ameliorate this situation by adding up the
 decrements and increments for a given number of segments and
 applying the changes all at once. Because the changes are
 accumulated in memory and not immediately written to the SUFILE, the
 afore mentioned lock only needs to be aquired, if the cache is full
 or at the end of the respective operation.
 
 To effectively get the pointer to the modification cache from the
 high level operations down to the update of the individual blocks in
 nilfs_dat_commit_end(), a new pointer b_private was added to struct
 nilfs_bmap.
 
 Signed-off-by: Andreas Rohner andreas.roh...@gmx.net
 ---
  fs/nilfs2/bmap.c| 76 
 +
  fs/nilfs2/bmap.h| 11 +++-
  fs/nilfs2/btree.c   |  2 +-
  fs/nilfs2/direct.c  |  2 +-
  fs/nilfs2/inode.c   | 22 +---
  fs/nilfs2/segment.c | 26 +++---
  fs/nilfs2/segment.h |  3 +++
  7 files changed, 132 insertions(+), 10 deletions(-)
 
 diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
 index ecd62ba..927acb7 100644
 --- a/fs/nilfs2/bmap.c
 +++ b/fs/nilfs2/bmap.c
 @@ -288,6 +288,43 @@ int nilfs_bmap_truncate(struct nilfs_bmap *bmap, 
 unsigned long key)
  }
  
  /**
 + * nilfs_bmap_truncate_with_mc - truncate a bmap to a specified key
 + * @bmap: bmap
 + * @mc: modification cache
 + * @key: key
 + *
 + * Description: nilfs_bmap_truncate_with_mc() removes key-record pairs whose
 + * keys are greater than or equal to @key from @bmap. It has the same
 + * functionality as nilfs_bmap_truncate(), but allows the passing
 + * of a modification cache to update segment usage information.
 + *
 + * Return Value: On success, 0 is returned. On error, one of the following
 + * negative error codes is returned.
 + *
 + * %-EIO - I/O error.
 + *
 + * %-ENOMEM - Insufficient amount of memory available.
 + */
 +int nilfs_bmap_truncate_with_mc(struct nilfs_bmap *bmap,
 + struct nilfs_sufile_mod_cache *mc,
 + unsigned long key)
 +{
 + int ret;
 +
 + down_write(bmap-b_sem);
 +
 + bmap-b_private = mc;
 +
 + ret = nilfs_bmap_do_truncate(bmap, key);
 +
 + bmap-b_private = NULL;
 +
 + up_write(bmap-b_sem);
 +
 + return nilfs_bmap_convert_error(bmap, __func__, ret);
 +}
 +
 +/**
   * nilfs_bmap_clear - free resources a bmap holds
   * @bmap: bmap
   *
 @@ -328,6 +365,43 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct 
 buffer_head *bh)
  }
  
  /**
 + * nilfs_bmap_propagate_with_mc - propagate dirty state
 + * @bmap: bmap
 + * @mc: modification cache
 + * @bh: buffer head
 + *
 + * Description: nilfs_bmap_propagate_with_mc() marks the buffers that 
 directly
 + * or indirectly refer to the block specified by @bh dirty. It has
 + * the same functionality as nilfs_bmap_propagate(), but allows the passing
 + * of a modification cache to update segment usage information.
 + *
 + * Return Value: On success, 0 is returned. On error, one of the following
 + * negative error codes is returned.
 + *
 + * %-EIO - I/O error.
 + *
 + * %-ENOMEM - Insufficient amount of memory available.
 + */
 +int nilfs_bmap_propagate_with_mc(struct nilfs_bmap *bmap,
 +  struct nilfs_sufile_mod_cache *mc,
 +  struct buffer_head *bh)
 +{
 + int ret;
 +
 + down_write(bmap-b_sem);
 +
 + bmap-b_private = mc;
 +
 + ret = bmap-b_ops-bop_propagate(bmap, bh);
 +
 + bmap-b_private = NULL;
 +
 + up_write(bmap-b_sem);
 +
 + return nilfs_bmap_convert_error(bmap, __func__, ret);
 +}

These bmap functions are really bad.  The mod cache argument has no
meaning with regard to block mapping operation.  I really hope we
don't have to add these variants by hiding the cache in sufile.

 +
 +/**
   * nilfs_bmap_lookup_dirty_buffers -
   * @bmap: bmap
   * @listp: pointer to buffer head list
 @@ -490,6 +564,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct 
 nilfs_inode *raw_inode)
  
   init_rwsem(bmap-b_sem);
   bmap-b_state = 0;
 + bmap-b_private = NULL;
   bmap-b_inode = NILFS_BMAP_I(bmap)-vfs_inode;
   switch (bmap-b_inode-i_ino) {
   case NILFS_DAT_INO:
 @@ -551,6 +626,7 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
   bmap-b_last_allocated_key = 0;
   bmap-b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
   bmap-b_state = 0;
 + bmap-b_private = NULL;
   nilfs_btree_init_gc(bmap);
  }
  
 diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h

Re: [PATCH 2/9] nilfs2: add simple cache for modifications to SUFILE

2015-03-13 Thread Ryusuke Konishi
On Tue, 24 Feb 2015 20:01:37 +0100, Andreas Rohner wrote:
 This patch adds a simple, small cache that can be used to accumulate
 modifications to SUFILE entries. This is for example useful for
 keeping track of reclaimable blocks, because most of the
 modifications consist of small increments or decrements. By adding
 these up and temporarily storing them in a small cache, the
 performance can be improved. Additionally lock contention is
 reduced.
 
 Signed-off-by: Andreas Rohner andreas.roh...@gmx.net
 ---
  fs/nilfs2/sufile.c | 178 
 +
  fs/nilfs2/sufile.h |  44 +
  2 files changed, 222 insertions(+)
 
 diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
 index 1e8cac6..a369c30 100644
 --- a/fs/nilfs2/sufile.c
 +++ b/fs/nilfs2/sufile.c
 @@ -1168,6 +1168,184 @@ out_sem:
  }
  
  /**
 + * nilfs_sufile_mc_init - inits segusg modification cache
 + * @mc: modification cache
 + * @capacity: maximum capacity of the mod cache
 + *
 + * Description: Allocates memory for an array of nilfs_sufile_mod structures
 + * according to @capacity. This memory must be freed with
 + * nilfs_sufile_mc_destroy().
 + *
 + * Return Value: On success, 0 is returned. On error, one of the following
 + * negative error codes is returned.
 + *
 + * %-ENOMEM - Insufficient amount of memory available.
 + *
 + * %-EINVAL - Invalid capacity.
 + */
 +int nilfs_sufile_mc_init(struct nilfs_sufile_mod_cache *mc, size_t capacity)
 +{
 + mc-mc_capacity = capacity;
 + if (!capacity)
 + return -EINVAL;
 +
 + mc-mc_mods = kmalloc(capacity * sizeof(struct nilfs_sufile_mod),
 +   GFP_KERNEL);

GFP_NOFS must be used instead of GFP_KERNEL to avoid initiating other
filesystem operations.

The abbreviation mc is not good, which is already used as the
abbreviation of minimum clean in userland.

 + if (!mc-mc_mods)
 + return -ENOMEM;
 +
 + mc-mc_size = 0;
 +
 + return 0;
 +}
 +
 +/**
 + * nilfs_sufile_mc_add - add signed value to segusg modification cache
 + * @mc: modification cache
 + * @segnum: segment number
 + * @value: signed value (can be positive and negative)
 + *
 + * Description: nilfs_sufile_mc_add() tries to add a pair of @segnum and
 + * @value to the modification cache. If the cache already contains a
 + * segment number equal to @segnum, then @value is simply added to the
 + * existing value. This way thousands of small modifications can be
 + * accumulated into one value. If @segnum cannot be found and the
 + * capacity allows it, a new element is added to the cache. If the
 + * capacity is reached an error value is returned.
 + *
 + * Return Value: On success, 0 is returned. On error, one of the following
 + * negative error codes is returned.
 + *
 + * %-ENOSPC - The mod cache has reached its capacity and must be flushed.
 + */
 +static inline int nilfs_sufile_mc_add(struct nilfs_sufile_mod_cache *mc,
 +   __u64 segnum, __s64 value)
 +{
 + struct nilfs_sufile_mod *mods = mc-mc_mods;
 + int i;
 +
 + for (i = 0; i  mc-mc_size; ++i, ++mods) {
 + if (mods-m_segnum == segnum) {
 + mods-m_value += value;
 + return 0;
 + }
 + }
 +
 + if (mc-mc_size  mc-mc_capacity) {
 + mods-m_segnum = segnum;
 + mods-m_value = value;
 + mc-mc_size++;
 + return 0;
 + }
 +
 + return -ENOSPC;
 +}
 +
 +/**
 + * nilfs_sufile_mc_clear - set mc_size to 0
 + * @mc: modification cache
 + *
 + * Description: nilfs_sufile_mc_clear() sets mc_size to 0, which enables
 + * nilfs_sufile_mc_add() to overwrite the elements in @mc.
 + */
 +static inline void nilfs_sufile_mc_clear(struct nilfs_sufile_mod_cache *mc)
 +{
 + mc-mc_size = 0;
 +}
 +
 +/**
 + * nilfs_sufile_mc_reset - clear cache and add one element
 + * @mc: modification cache
 + * @segnum: segment number
 + * @value: signed value (can be positive and negative)
 + *
 + * Description: Clears the modification cache in @mc and adds a new pair of
 + * @segnum and @value to it at the same time.
 + */
 +static inline void nilfs_sufile_mc_reset(struct nilfs_sufile_mod_cache *mc,
 +  __u64 segnum, __s64 value)
 +{
 + struct nilfs_sufile_mod *mods = mc-mc_mods;
 +
 + mods-m_segnum = segnum;
 + mods-m_value = value;
 + mc-mc_size = 1;
 +}

The name of this function is confusing.  Actual meaning of this
function is reset and add, and that can be replaced with mc_clear
and mc_add.  Remove this function to simplify interface.

Regards,
Ryusuke Konishi

 +/**
 + * nilfs_sufile_mc_flush - flush modification cache
 + * @sufile: inode of segment usage file
 + * @mc: modification cache
 + * @dofunc: primitive operation for the update
 + *
 + * Description: nilfs_sufile_mc_flush() flushes the cached modifications
 + * and applies them to the segment usages on disk. It