[PATCH 15/16] f2fs: add recovery routines for roll-forward

2012-10-05 Thread 김재극
This adds roll-forward routines to recover fsynced data.

- F2FS uses basically roll-back model with checkpointing.

- In order to implement fsync(), there are two approaches as follows.

1. A roll-back model with checkpointing at every fsync()
 : This is a naive method, but suffers from very low performance.

2. A roll-forward model
 : F2FS adopts this model where all the fsynced data should be recovered, which
   were written after checkpointing was done. In order to figure out the data,
   F2FS keeps a "fsync" mark in direct node blocks. In addition, F2FS remains
   the location of next node block in each direct node block for reconstructing
   the chain of node blocks during the recovery.

- In order to enhance the performance, F2FS keeps a "dentry" mark also in direct
  node blocks. If this is set during the recovery, F2FS replays adding a dentry.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/recovery.c |  372 
 1 file changed, 372 insertions(+)
 create mode 100644 fs/f2fs/recovery.c

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
new file mode 100644
index 000..95455b1
--- /dev/null
+++ b/fs/f2fs/recovery.c
@@ -0,0 +1,372 @@
+/**
+ * fs/f2fs/recovery.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+
+static struct kmem_cache *fsync_entry_slab;
+
+bool space_for_roll_forward(struct f2fs_sb_info *sbi)
+{
+   if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
+   > sbi->user_block_count)
+   return false;
+   return true;
+}
+
+static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
+   nid_t ino)
+{
+   struct list_head *this;
+   struct fsync_inode_entry *entry;
+
+   list_for_each(this, head) {
+   entry = list_entry(this, struct fsync_inode_entry, list);
+   if (entry->inode->i_ino == ino)
+   return entry;
+   }
+   return NULL;
+}
+
+static int recover_dentry(struct page *ipage, struct inode *inode)
+{
+   struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
+   struct f2fs_inode *raw_inode = &(raw_node->i);
+   struct dentry dent, parent;
+   struct f2fs_dir_entry *de;
+   struct page *page;
+   struct inode *dir;
+   int err = 0;
+
+   if (!raw_node->footer.dentry)
+   goto out;
+
+   dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino));
+   if (IS_ERR(dir)) {
+   err = -EINVAL;
+   goto out;
+   }
+
+   parent.d_inode = dir;
+   dent.d_parent = 
+   dent.d_name.len = le32_to_cpu(raw_inode->i_namelen);
+   dent.d_name.name = raw_inode->i_name;
+
+   de = f2fs_find_entry(dir, _name, );
+   if (de) {
+   kunmap(page);
+   f2fs_put_page(page, 0);
+   } else {
+   f2fs_add_link(, inode);
+   }
+   iput(dir);
+out:
+   kunmap(ipage);
+   return err;
+}
+
+static int recover_inode(struct inode *inode, struct page *node_page)
+{
+   void *kaddr = page_address(node_page);
+   struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
+   struct f2fs_inode *raw_inode = &(raw_node->i);
+
+   inode->i_mode = le32_to_cpu(raw_inode->i_mode);
+   i_size_write(inode, le64_to_cpu(raw_inode->i_size));
+   inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
+   inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
+   inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
+
+   return recover_dentry(node_page, inode);
+}
+
+static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
+{
+   unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
+   struct curseg_info *curseg;
+   struct page *page;
+   block_t blkaddr;
+   int err = 0;
+
+   /* get node pages in the current segment */
+   curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+   blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
+
+   /* read node page */
+   page = alloc_page(GFP_NOFS | __GFP_ZERO);
+   if (IS_ERR(page))
+   return PTR_ERR(page);
+   lock_page(page);
+
+   while (1) {
+   struct fsync_inode_entry *entry;
+
+   if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
+   goto out;
+
+   if (cp_ver != cpver_of_node(page))
+   goto out;
+
+   if (!is_fsync_dnode(page))
+   goto next;
+
+   entry = get_fsync_inode(head, 

[PATCH 16/16] f2fs: update Kconfig and Makefile

2012-10-05 Thread 김재극
This adds Makefile and Kconfig for f2fs, and updates Makefile and Kconfig files
in the fs directory.

Signed-off-by: Jaegeuk Kim 
---
 fs/Kconfig   |1 +
 fs/Makefile  |1 +
 fs/f2fs/Kconfig  |   55 ++
 fs/f2fs/Makefile |6 ++
 4 files changed, 63 insertions(+)
 create mode 100644 fs/f2fs/Kconfig
 create mode 100644 fs/f2fs/Makefile

diff --git a/fs/Kconfig b/fs/Kconfig
index f95ae3a..e352b37 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -220,6 +220,7 @@ source "fs/pstore/Kconfig"
 source "fs/sysv/Kconfig"
 source "fs/ufs/Kconfig"
 source "fs/exofs/Kconfig"
+source "fs/f2fs/Kconfig"
 
 endif # MISC_FILESYSTEMS
 
diff --git a/fs/Makefile b/fs/Makefile
index 2fb9779..e09edb5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -122,6 +122,7 @@ obj-$(CONFIG_DEBUG_FS)  += debugfs/
 obj-$(CONFIG_OCFS2_FS) += ocfs2/
 obj-$(CONFIG_BTRFS_FS) += btrfs/
 obj-$(CONFIG_GFS2_FS)   += gfs2/
+obj-$(CONFIG_F2FS_FS)  += f2fs/
 obj-y  += exofs/ # Multiple modules
 obj-$(CONFIG_CEPH_FS)  += ceph/
 obj-$(CONFIG_PSTORE)   += pstore/
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
new file mode 100644
index 000..8821c6a
--- /dev/null
+++ b/fs/f2fs/Kconfig
@@ -0,0 +1,55 @@
+config F2FS_FS
+   tristate "F2FS filesystem support (EXPERIMENTAL)"
+   depends on EXPERIMENTAL
+   help
+ F2FS is based on Log-structured File System (LFS), which supports
+ versatile "flash-friendly" features. The design has been focused on
+ addressing the fundamental issues in LFS, which are snowball effect
+ of wandering tree and high cleaning overhead.
+
+ Since flash-based storages show different characteristics according to
+ the internal geometry or flash memory management schemes aka FTL, F2FS
+ and tools support various parameters not only for configuring on-disk
+ layout, but also for selecting allocation and cleaning algorithms.
+
+ If unsure, say N.
+
+config F2FS_STAT_FS
+   bool "F2FS Status Information"
+   depends on F2FS_FS
+   default y
+   help
+ /proc/fs/f2fs/ contains information about partitions mounted as f2fs.
+ For each partition, a corresponding directory, named as its device
+ name, is provided with the following proc entries.
+
+ f2fs_stat major file system information managed by f2fs currently
+ f2fs_sit_stat average SIT information about whole segments
+ f2fs_mem_stat current memory footprint consumed by f2fs
+
+ e.g., in /proc/fs/f2fs/sdb1/
+
+config F2FS_FS_XATTR
+   bool "F2FS extended attributes"
+   depends on F2FS_FS
+   default y
+   help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+  for details).
+
+ If unsure, say N.
+
+config F2FS_FS_POSIX_ACL
+   bool "F2FS Access Control Lists"
+   depends on F2FS_FS_XATTR
+   select FS_POSIX_ACL
+   default y
+   help
+ Posix Access Control Lists (ACLs) support permissions for users and
+ gourps beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website .
+
+ If you don't know what Access Control Lists are, say N
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
new file mode 100644
index 000..72fcf9a
--- /dev/null
+++ b/fs/f2fs/Makefile
@@ -0,0 +1,6 @@
+obj-$(CONFIG_F2FS_FS) += f2fs.o
+
+f2fs-y := dir.o file.o inode.o namei.o hash.o super.o
+f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
+f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
+f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
-- 
1.7.9.5




---
Jaegeuk Kim
Samsung



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 14/16] f2fs: add garbage collection functions

2012-10-05 Thread 김재극
This adds on-demand and background cleaning functions.

- The basic background cleaning policy is trying to do cleaning jobs as much as
  possible whenever the system is idle. Once the background cleaning is done,
  the cleaner sleeps an amount of time not to interfere with VFS calls. The time
  is dynamically adjusted according to the status of whole segments, which is
  decreased when the following conditions are satisfied.

  . GC is not conducted currently, and
  . IO subsystem is idle by checking the number of requets in bdev's request
 list, and
  . There are enough dirty segments.

  Otherwise, the time is increased incrementally until to the maximum time.
  Note that, min and max times are 10 secs and 30 secs by default.

- F2FS adopts a default victim selection policy where background cleaning uses
  a cost-benefit algorithm, while on-demand cleaning uses a greedy algorithm.

- The method of moving data during the cleaning is slightly different between
  background and on-demand cleaning schemes. In the case of background cleaning,
  F2FS loads the data, and marks them as dirty. Then, F2FS expects that the data
  will be moved by flusher or VM. In the case of on-demand cleaning, F2FS should
  move the data right away.

- In order to identify valid blocks in a victim segment, F2FS scans the bitmap
  of the segment managed as an SIT entry.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/gc.c | 1140 ++
 fs/f2fs/gc.h |  203 +++
 2 files changed, 1343 insertions(+)
 create mode 100644 fs/f2fs/gc.c
 create mode 100644 fs/f2fs/gc.h

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
new file mode 100644
index 000..05f173c
--- /dev/null
+++ b/fs/f2fs/gc.c
@@ -0,0 +1,1140 @@
+/**
+ * fs/f2fs/gc.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+#include "gc.h"
+
+static LIST_HEAD(f2fs_stat_list);
+static struct kmem_cache *winode_slab;
+
+static int gc_thread_func(void *data)
+{
+   struct f2fs_sb_info *sbi = data;
+   wait_queue_head_t *wq = >gc_thread->gc_wait_queue_head;
+   long wait_ms;
+
+   wait_ms = GC_THREAD_MIN_SLEEP_TIME;
+
+   do {
+   if (try_to_freeze())
+   continue;
+   else
+   wait_event_interruptible_timeout(*wq,
+   kthread_should_stop(),
+   msecs_to_jiffies(wait_ms));
+   if (kthread_should_stop())
+   break;
+
+   f2fs_balance_fs(sbi);
+
+   if (!test_opt(sbi, BG_GC))
+   continue;
+
+   /*
+* [GC triggering condition]
+* 0. GC is not conducted currently.
+* 1. There are enough dirty segments.
+* 2. IO subsystem is idle by checking the # of writeback pages.
+* 3. IO subsystem is idle by checking the # of requests in
+*bdev's request list.
+*
+* Note) We have to avoid triggering GCs too much frequently.
+* Because it is possible that some segments can be
+* invalidated soon after by user update or deletion.
+* So, I'd like to wait some time to collect dirty segments.
+*/
+   if (!mutex_trylock(>gc_mutex))
+   continue;
+
+   if (!is_idle(sbi)) {
+   wait_ms = increase_sleep_time(wait_ms);
+   mutex_unlock(>gc_mutex);
+   continue;
+   }
+
+   if (has_enough_invalid_blocks(sbi))
+   wait_ms = decrease_sleep_time(wait_ms);
+   else
+   wait_ms = increase_sleep_time(wait_ms);
+
+   sbi->bg_gc++;
+
+   if (f2fs_gc(sbi, 1) == GC_NONE)
+   wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
+   else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
+   wait_ms = GC_THREAD_MAX_SLEEP_TIME;
+
+   } while (!kthread_should_stop());
+   return 0;
+}
+
+int start_gc_thread(struct f2fs_sb_info *sbi)
+{
+   struct f2fs_gc_kthread *gc_th = NULL;
+
+   gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
+   if (!gc_th)
+   return -ENOMEM;
+
+   sbi->gc_thread = gc_th;
+   init_waitqueue_head(>gc_thread->gc_wait_queue_head);
+   sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
+   

[PATCH 13/16] f2fs: add xattr and acl functionalities

2012-10-05 Thread 김재극
This implements xattr and acl functionalities.

- F2FS uses a node page to contain use extended attributes.

Signed-off-by: Changman Lee 
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/acl.c   |  402 +++
 fs/f2fs/acl.h   |   57 
 fs/f2fs/xattr.c |  387 
 fs/f2fs/xattr.h |  142 
 4 files changed, 988 insertions(+)
 create mode 100644 fs/f2fs/acl.c
 create mode 100644 fs/f2fs/acl.h
 create mode 100644 fs/f2fs/xattr.c
 create mode 100644 fs/f2fs/xattr.h

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
new file mode 100644
index 000..f3682a5
--- /dev/null
+++ b/fs/f2fs/acl.c
@@ -0,0 +1,402 @@
+/**
+ * fs/f2fs/acl.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/acl.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include "f2fs.h"
+#include "xattr.h"
+#include "acl.h"
+
+#define get_inode_mode(i)  ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
+   (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
+
+static inline size_t f2fs_acl_size(int count)
+{
+   if (count <= 4) {
+   return sizeof(struct f2fs_acl_header) +
+   count * sizeof(struct f2fs_acl_entry_short);
+   } else {
+   return sizeof(struct f2fs_acl_header) +
+   4 * sizeof(struct f2fs_acl_entry_short) +
+   (count - 4) * sizeof(struct f2fs_acl_entry);
+   }
+}
+
+static inline int f2fs_acl_count(size_t size)
+{
+   ssize_t s;
+   size -= sizeof(struct f2fs_acl_header);
+   s = size - 4 * sizeof(struct f2fs_acl_entry_short);
+   if (s < 0) {
+   if (size % sizeof(struct f2fs_acl_entry_short))
+   return -1;
+   return size / sizeof(struct f2fs_acl_entry_short);
+   } else {
+   if (s % sizeof(struct f2fs_acl_entry))
+   return -1;
+   return s / sizeof(struct f2fs_acl_entry) + 4;
+   }
+}
+
+static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
+{
+   int i, count;
+   struct posix_acl *acl;
+   struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value;
+   struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
+   const char *end = value + size;
+
+   if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
+   return ERR_PTR(-EINVAL);
+
+   count = f2fs_acl_count(size);
+   if (count < 0)
+   return ERR_PTR(-EINVAL);
+   if (count == 0)
+   return NULL;
+
+   acl = posix_acl_alloc(count, GFP_KERNEL);
+   if (!acl)
+   return ERR_PTR(-ENOMEM);
+
+   for (i = 0; i < count; i++) {
+
+   if ((char *)entry > end)
+   goto fail;
+
+   acl->a_entries[i].e_tag  = le16_to_cpu(entry->e_tag);
+   acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm);
+
+   switch (acl->a_entries[i].e_tag) {
+   case ACL_USER:
+   case ACL_GROUP:
+   acl->a_entries[i].e_id = le32_to_cpu(entry->e_id);
+   entry = (struct f2fs_acl_entry *)((char *)entry +
+   sizeof(struct f2fs_acl_entry));
+   break;
+   case ACL_USER_OBJ:
+   case ACL_GROUP_OBJ:
+   case ACL_MASK:
+   case ACL_OTHER:
+   acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
+   entry = (struct f2fs_acl_entry *)((char *)entry +
+   sizeof(struct f2fs_acl_entry_short));
+   break;
+   default:
+   goto fail;
+   }
+   }
+   if ((char *)entry != end)
+   goto fail;
+   return acl;
+fail:
+   posix_acl_release(acl);
+   return ERR_PTR(-EINVAL);
+}
+
+static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
+{
+   struct f2fs_acl_header *f2fs_acl;
+   struct f2fs_acl_entry *entry;
+   int i;
+
+   f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
+   sizeof(struct f2fs_acl_entry), GFP_KERNEL);
+   if (!f2fs_acl)
+   return ERR_PTR(-ENOMEM);
+
+   f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION);
+   entry = (struct f2fs_acl_entry *)(f2fs_acl + 1);
+
+   for (i = 0; i < acl->a_count; i++) {
+
+   entry->e_tag  = cpu_to_le16(acl->a_entries[i].e_tag);
+   entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm);
+

[PATCH 12/16] f2fs: add core directory operations

2012-10-05 Thread 김재극
This adds core functions to find, add, delete, and link dentries.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/dir.c  |  657 
 fs/f2fs/hash.c |   98 +
 2 files changed, 755 insertions(+)
 create mode 100644 fs/f2fs/dir.c
 create mode 100644 fs/f2fs/hash.c

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
new file mode 100644
index 000..edc0610
--- /dev/null
+++ b/fs/f2fs/dir.c
@@ -0,0 +1,657 @@
+/**
+ * fs/f2fs/dir.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include "f2fs.h"
+#include "acl.h"
+
+static unsigned long dir_blocks(struct inode *inode)
+{
+   return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
+   >> PAGE_CACHE_SHIFT;
+}
+
+static unsigned int dir_buckets(unsigned int level)
+{
+   if (level < MAX_DIR_HASH_DEPTH / 2)
+   return 1 << level;
+   else
+   return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
+}
+
+static unsigned int bucket_blocks(unsigned int level)
+{
+   if (level < MAX_DIR_HASH_DEPTH / 2)
+   return 2;
+   else
+   return 4;
+}
+
+static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
+   [F2FS_FT_UNKNOWN]   = DT_UNKNOWN,
+   [F2FS_FT_REG_FILE]  = DT_REG,
+   [F2FS_FT_DIR]   = DT_DIR,
+   [F2FS_FT_CHRDEV]= DT_CHR,
+   [F2FS_FT_BLKDEV]= DT_BLK,
+   [F2FS_FT_FIFO]  = DT_FIFO,
+   [F2FS_FT_SOCK]  = DT_SOCK,
+   [F2FS_FT_SYMLINK]   = DT_LNK,
+};
+
+#define S_SHIFT 12
+static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
+   [S_IFREG >> S_SHIFT]= F2FS_FT_REG_FILE,
+   [S_IFDIR >> S_SHIFT]= F2FS_FT_DIR,
+   [S_IFCHR >> S_SHIFT]= F2FS_FT_CHRDEV,
+   [S_IFBLK >> S_SHIFT]= F2FS_FT_BLKDEV,
+   [S_IFIFO >> S_SHIFT]= F2FS_FT_FIFO,
+   [S_IFSOCK >> S_SHIFT]   = F2FS_FT_SOCK,
+   [S_IFLNK >> S_SHIFT]= F2FS_FT_SYMLINK,
+};
+
+static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
+{
+   mode_t mode = inode->i_mode;
+   de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
+}
+
+static unsigned long dir_block_index(unsigned int level, unsigned int idx)
+{
+   unsigned long i;
+   unsigned long bidx = 0;
+
+   for (i = 0; i < level; i++)
+   bidx += dir_buckets(i) * bucket_blocks(i);
+   bidx += idx * bucket_blocks(level);
+   return bidx;
+}
+
+static bool early_match_name(const char *name, int namelen,
+   f2fs_hash_t namehash, struct f2fs_dir_entry *de)
+{
+   if (le16_to_cpu(de->name_len) != namelen)
+   return false;
+
+   if (le32_to_cpu(de->hash_code) != namehash)
+   return false;
+
+   return true;
+}
+
+static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
+   const char *name, int namelen, int *max_slots,
+   f2fs_hash_t namehash, struct page **res_page)
+{
+   struct f2fs_dir_entry *de;
+   unsigned long bit_pos, end_pos, next_pos;
+   struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
+   int slots;
+
+   bit_pos = find_next_bit_le(_blk->dentry_bitmap,
+   NR_DENTRY_IN_BLOCK, 0);
+   while (bit_pos < NR_DENTRY_IN_BLOCK) {
+   de = _blk->dentry[bit_pos];
+   slots = (le16_to_cpu(de->name_len) + F2FS_NAME_LEN - 1) /
+   F2FS_NAME_LEN;
+
+   if (early_match_name(name, namelen, namehash, de)) {
+   if (!memcmp(dentry_blk->filename[bit_pos],
+   name, namelen)) {
+   *res_page = dentry_page;
+   goto found;
+   }
+   }
+   next_pos = bit_pos + slots;
+   bit_pos = find_next_bit_le(_blk->dentry_bitmap,
+   NR_DENTRY_IN_BLOCK, next_pos);
+   if (bit_pos >= NR_DENTRY_IN_BLOCK)
+   end_pos = NR_DENTRY_IN_BLOCK;
+   else
+   end_pos = bit_pos;
+   if (*max_slots < end_pos - next_pos)
+   *max_slots = end_pos - next_pos;
+   }
+
+   de = NULL;
+   kunmap(dentry_page);
+found:
+   return de;
+}
+
+static struct f2fs_dir_entry *find_in_level(struct inode *dir,
+   unsigned int level, const char *name, int namelen,
+   f2fs_hash_t namehash, struct page **res_page)
+{
+   int s = (namelen + F2FS_NAME_LEN - 1) / 

[PATCH 09/16] f2fs: add address space operations for data

2012-10-05 Thread 김재극
This adds address space operations for data.

- F2FS supports readpages(), writepages(), and direct_IO().

- Because of out-of-place writes, f2fs_direct_IO() does not write data in place.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/data.c |  700 
 1 file changed, 700 insertions(+)
 create mode 100644 fs/f2fs/data.c

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
new file mode 100644
index 000..97d656b
--- /dev/null
+++ b/fs/f2fs/data.c
@@ -0,0 +1,700 @@
+/**
+ * fs/f2fs/data.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+
+/**
+ * Lock ordering for the change of data block address:
+ * ->data_page
+ *  ->node_page
+ *update block addresses in the node page
+ */
+static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
+{
+   struct f2fs_node *rn;
+   __le32 *addr_array;
+   struct page *node_page = dn->node_page;
+   unsigned int ofs_in_node = dn->ofs_in_node;
+
+   wait_on_page_writeback(node_page);
+
+   rn = (struct f2fs_node *)page_address(node_page);
+
+   /* Get physical address of data block */
+   addr_array = blkaddr_in_node(rn);
+   addr_array[ofs_in_node] = cpu_to_le32(new_addr);
+   set_page_dirty(node_page);
+}
+
+int reserve_new_block(struct dnode_of_data *dn)
+{
+   struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
+
+   if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
+   return -EPERM;
+   if (!inc_valid_block_count(sbi, dn->inode, 1))
+   return -ENOSPC;
+
+   __set_data_blkaddr(dn, NEW_ADDR);
+   dn->data_blkaddr = NEW_ADDR;
+   sync_inode_page(dn);
+   return 0;
+}
+
+static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
+   struct buffer_head *bh_result)
+{
+   struct f2fs_inode_info *fi = F2FS_I(inode);
+   struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+   pgoff_t start_fofs, end_fofs;
+   block_t start_blkaddr;
+
+   read_lock(>ext.ext_lock);
+   if (fi->ext.len == 0) {
+   read_unlock(>ext.ext_lock);
+   return 0;
+   }
+
+   sbi->total_hit_ext++;
+   start_fofs = fi->ext.fofs;
+   end_fofs = fi->ext.fofs + fi->ext.len - 1;
+   start_blkaddr = fi->ext.blk_addr;
+
+   if (pgofs >= start_fofs && pgofs <= end_fofs) {
+   unsigned int blkbits = inode->i_sb->s_blocksize_bits;
+   size_t count;
+
+   clear_buffer_new(bh_result);
+   map_bh(bh_result, inode->i_sb,
+   start_blkaddr + pgofs - start_fofs);
+   count = end_fofs - pgofs + 1;
+   if (count < (UINT_MAX >> blkbits))
+   bh_result->b_size = (count << blkbits);
+   else
+   bh_result->b_size = UINT_MAX;
+
+   sbi->read_hit_ext++;
+   read_unlock(>ext.ext_lock);
+   return 1;
+   }
+   read_unlock(>ext.ext_lock);
+   return 0;
+}
+
+void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
+{
+   struct f2fs_inode_info *fi = F2FS_I(dn->inode);
+   pgoff_t fofs, start_fofs, end_fofs;
+   block_t start_blkaddr, end_blkaddr;
+
+   BUG_ON(blk_addr == NEW_ADDR);
+   fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node;
+
+   /* Update the page address in the parent node */
+   __set_data_blkaddr(dn, blk_addr);
+
+   write_lock(>ext.ext_lock);
+
+   start_fofs = fi->ext.fofs;
+   end_fofs = fi->ext.fofs + fi->ext.len - 1;
+   start_blkaddr = fi->ext.blk_addr;
+   end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
+
+   /* Drop and initialize the matched extent */
+   if (fi->ext.len == 1 && fofs == start_fofs)
+   fi->ext.len = 0;
+
+   /* Initial extent */
+   if (fi->ext.len == 0) {
+   if (blk_addr != NULL_ADDR) {
+   fi->ext.fofs = fofs;
+   fi->ext.blk_addr = blk_addr;
+   fi->ext.len = 1;
+   }
+   goto end_update;
+   }
+
+   /* Frone merge */
+   if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
+   fi->ext.fofs--;
+   fi->ext.blk_addr--;
+   fi->ext.len++;
+   goto end_update;
+   }
+
+   /* Back merge */
+   if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
+   fi->ext.len++;
+   goto end_update;
+   }
+
+   /* Split the 

[PATCH 05/16] f2fs: add checkpoint operations

2012-10-05 Thread 김재극
This adds functions required by the checkpoint operations.

Basically, f2fs adopts a roll-back model with checkpoint blocks written in the
CP area. The checkpoint procedure includes as follows.

- write_checkpoint()
1. block_operations() freezes VFS calls.
2. submit cached bios.
3. flush_nat_entries() writes NAT pages updated by dirty NAT entries.
4. flush_sit_entries() writes SIT pages updated by dirty SIT entries.
5. do_checkpoint() writes,
  - checkpoint block (#0)
  - orphan inode blocks
  - summary blocks made by active logs
  - checkpoint block (copy of #0)
6. unblock_opeations()

In order to provide an address space for meta pages, f2fs_sb_info has a special
inode, namely meta_inode. This patch also adds the address space operations for
meta_inode.

Signed-off-by: Chul Lee 
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/checkpoint.c |  791 ++
 1 file changed, 791 insertions(+)
 create mode 100644 fs/f2fs/checkpoint.c

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
new file mode 100644
index 000..2186b82
--- /dev/null
+++ b/fs/f2fs/checkpoint.c
@@ -0,0 +1,791 @@
+/**
+ * fs/f2fs/checkpoint.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "f2fs.h"
+#include "node.h"
+#include "segment.h"
+
+static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *inode_entry_slab;
+
+/**
+ * We guarantee no failure on the returned page.
+ */
+struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+   struct address_space *mapping = sbi->meta_inode->i_mapping;
+   struct page *page = NULL;
+repeat:
+   page = grab_cache_page(mapping, index);
+   if (!page) {
+   cond_resched();
+   goto repeat;
+   }
+
+   /* We wait writeback only inside grab_meta_page() */
+   wait_on_page_writeback(page);
+   SetPageUptodate(page);
+   return page;
+}
+
+/**
+ * We guarantee no failure on the returned page.
+ */
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+   struct address_space *mapping = sbi->meta_inode->i_mapping;
+   struct page *page;
+repeat:
+   page = grab_cache_page(mapping, index);
+   if (!page) {
+   cond_resched();
+   goto repeat;
+   }
+   if (f2fs_readpage(sbi, page, index, READ_SYNC)) {
+   f2fs_put_page(page, 1);
+   goto repeat;
+   }
+   mark_page_accessed(page);
+
+   /* We do not allow returning an errorneous page */
+   return page;
+}
+
+static int f2fs_write_meta_page(struct page *page,
+   struct writeback_control *wbc)
+{
+   struct inode *inode = page->mapping->host;
+   struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
+   int err;
+
+   wait_on_page_writeback(page);
+
+   err = write_meta_page(sbi, page, wbc);
+   if (err) {
+   wbc->pages_skipped++;
+   set_page_dirty(page);
+   }
+
+   dec_page_count(sbi, F2FS_DIRTY_META);
+
+   /* In this case, we should not unlock this page */
+   if (err != AOP_WRITEPAGE_ACTIVATE)
+   unlock_page(page);
+   return err;
+}
+
+static int f2fs_write_meta_pages(struct address_space *mapping,
+   struct writeback_control *wbc)
+{
+   struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
+   struct block_device *bdev = sbi->sb->s_bdev;
+   long written;
+
+   if (wbc->for_kupdate)
+   return 0;
+
+   if (get_pages(sbi, F2FS_DIRTY_META) == 0)
+   return 0;
+
+   /* if mounting is failed, skip writing node pages */
+   mutex_lock(>cp_mutex);
+   written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
+   mutex_unlock(>cp_mutex);
+   wbc->nr_to_write -= written;
+   return 0;
+}
+
+long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+   long nr_to_write)
+{
+   struct address_space *mapping = sbi->meta_inode->i_mapping;
+   pgoff_t index = 0, end = LONG_MAX;
+   struct pagevec pvec;
+   long nwritten = 0;
+   struct writeback_control wbc = {
+   .for_reclaim = 0,
+   };
+
+   pagevec_init(, 0);
+
+   while (index <= end) {
+   int i, nr_pages;
+   nr_pages = pagevec_lookup_tag(, mapping, ,
+   PAGECACHE_TAG_DIRTY,
+   min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+   if (nr_pages == 0)
+   break;
+
+   for (i = 0; i < nr_pages; i++) {
+   

[PATCH 02/16] f2fs: add on-disk layout

2012-10-05 Thread 김재극
This adds a header file describing the on-disk layout of f2fs.

Signed-off-by: Changman Lee 
Signed-off-by: Chul Lee 
Signed-off-by: Jaegeuk Kim 
---
 include/linux/f2fs_fs.h |  359 +++
 1 file changed, 359 insertions(+)
 create mode 100644 include/linux/f2fs_fs.h

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
new file mode 100644
index 000..b17eeec
--- /dev/null
+++ b/include/linux/f2fs_fs.h
@@ -0,0 +1,359 @@
+/**
+ * include/linux/f2fs_fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_FS_H
+#define _LINUX_F2FS_FS_H
+
+#include 
+#include 
+
+#define F2FS_SUPER_MAGIC   0xF2F52010
+#define F2FS_SUPER_OFFSET  1   /* start sector # for sb */
+#define F2FS_BLKSIZE   4096
+
+#define NULL_ADDR  0x0U
+#define NEW_ADDR   -1U
+
+#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num)
+#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
+#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
+
+#define GFP_F2FS_MOVABLE   (__GFP_WAIT | __GFP_IO | __GFP_ZERO)
+
+/*
+ * For superblock
+ */
+struct f2fs_super_block {
+   __le32 magic;   /* Magic Number */
+   __le16 major_ver;   /* Major Version */
+   __le16 minor_ver;   /* Minor Version */
+   __le32 log_sectorsize;  /* log2 (Sector size in bytes) */
+   __le32 log_sectors_per_block;   /* log2 (Number of sectors per block */
+   __le32 log_blocksize;   /* log2 (Block size in bytes) */
+   __le32 log_blocks_per_seg; /* log2 (Number of blocks per segment) */
+   __le32 log_segs_per_sec; /* log2 (Number of segments per section) */
+   __le32 secs_per_zone; /* Number of sections per zone */
+   __le32 checksum_offset; /* Checksum position in this super block */
+   __le64 block_count; /* Total number of blocks */
+   __le32 section_count;   /* Total number of sections */
+   __le32 segment_count;   /* Total number of segments */
+   __le32 segment_count_ckpt; /* Total number of segments
+ in Checkpoint area */
+   __le32 segment_count_sit; /* Total number of segments
+in Segment information table */
+   __le32 segment_count_nat; /* Total number of segments
+in Node address table */
+   /*Total number of segments in Segment summary area */
+   __le32 segment_count_ssa;
+   /* Total number of segments in Main area */
+   __le32 segment_count_main;
+   __le32 failure_safe_block_distance;
+   __le64 segment0_blkaddr;/* Start block address of Segment 0 */
+   __le64 start_segment_checkpoint; /* Start block address of ckpt */
+   __le64 sit_blkaddr; /* Start block address of SIT */
+   __le64 nat_blkaddr; /* Start block address of NAT */
+   __le64 ssa_blkaddr; /* Start block address of SSA */
+   __le64 main_blkaddr;/* Start block address of Main area */
+   __le32 root_ino;/* Root directory inode number */
+   __le32 node_ino;/* node inode number */
+   __le32 meta_ino;/* meta inode number */
+   __le32 volume_serial_number;/* VSN is optional field */
+   __le16 volume_name[8];  /* Volume Name. 8 unicode characters */
+} __packed;
+
+/*
+ * For checkpoint
+ */
+struct f2fs_checkpoint {
+   __le64 checkpoint_ver;  /* Checkpoint block version number */
+   __le64 user_block_count;/* Total number of blocks
+  in Main area excluding the number of
+  reserved blocks */
+   __le64 valid_block_count;   /* Total number of valid blocks
+  in Main area */
+   __le32 rsvd_segment_count;  /* Total number of reserved segments
+  (for garbage collection) */
+   __le32 overprov_segment_count;  /* Total number of overprovision
+  segments */
+   __le32 free_segment_count;  /* Total number of free segments
+  in Main area */
+   __le32 bad_segment_count;   /* Total number of bad segments
+  in Main area */
+   __le32 cur_node_segno[3];   /* Segment number of current node
+  segment */
+   __le16 cur_node_blkoff[3];  /* Current node block offset
+  in the node segment */
+   __le16 nat_upd_blkoff[3];   /* Block offset in current node segment
+ 

[PATCH 01/16] f2fs: add document

2012-10-05 Thread 김재극
This adds a document describing the mount options, proc entries, usage, and
design of Flash-Friendly File System, namely F2FS.

Signed-off-by: Jaegeuk Kim 
---
 Documentation/filesystems/00-INDEX |2 +
 Documentation/filesystems/f2fs.txt |  314 
 2 files changed, 316 insertions(+)
 create mode 100644 Documentation/filesystems/f2fs.txt

diff --git a/Documentation/filesystems/00-INDEX 
b/Documentation/filesystems/00-INDEX
index 8c624a1..ce5fd46 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -48,6 +48,8 @@ ext4.txt
- info, mount options and specifications for the Ext4 filesystem.
 files.txt
- info on file management in the Linux kernel.
+f2fs.txt
+   - info and mount options for the F2FS filesystem.
 fuse.txt
- info on the Filesystem in User SpacE including mount options.
 gfs2.txt
diff --git a/Documentation/filesystems/f2fs.txt 
b/Documentation/filesystems/f2fs.txt
new file mode 100644
index 000..cd3f846
--- /dev/null
+++ b/Documentation/filesystems/f2fs.txt
@@ -0,0 +1,314 @@
+
+WHAT IS Flash-Friendly File System (F2FS)?
+
+
+NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
+been widely being used for ranging from mobile to server systems. Since they 
are
+known to have different characteristics from the conventional rotational disks,
+a file system, an upper layer to the storage device, should adapt to the 
changes
+from the sketch.
+
+F2FS is a file system exploiting NAND flash memory-based storage devices, which
+is based on Log-structured File System (LFS). The design has been focused on
+addressing the fundamental issues in LFS, which are snowball effect of 
wandering
+tree and high cleaning overhead.
+
+Since a NAND flash memory-based storage device shows different characteristic
+according to its internal geometry or flash memory management scheme aka FTL,
+F2FS and its tools support various parameters not only for configuring on-disk
+layout, but also for selecting allocation and cleaning algorithms.
+
+The file system formatting tool, "mkfs.f2fs", is available from the following
+download page: http://sourceforge.net/projects/f2fs-tools/
+
+
+MOUNT OPTIONS
+
+
+background_gc_off  Turn off the cleaning operation, aka garbage collection,
+  in background triggered when I/O subsystem is idle.
+disable_roll_forward   Disable the roll-forward recovery routine during SPOR.
+discardIssue discard/TRIM commands when a segment is cleaned.
+no_heapDisable heap-style segment allocation in which finds 
free
+   segments for data from the beginning of main area, while
+  for node from the end of main area.
+nouser_xattr   Disable Extened User Attributes. Note: xattr is enabled
+   by default if CONFIG_F2FS_FS_XATTR is selected.
+noacl  Disable POSIX Access Control List. Note: acl is enabled
+   by default if CONFIG_F2FS_FS_POSIX_ACL is selected.
+
+
+PROC ENTRIES
+
+
+/proc/fs/f2fs/ contains information about partitions mounted as f2fs. For each
+partition, a corresponding directory, named as its device name, is provided 
with
+the following proc entries.
+
+- f2fs_statmajor file system information managed by f2fs currently
+- f2fs_sit_stataverage SIT information about whole segments
+- f2fs_mem_statcurrent memory footprint consumed by f2fs
+
+e.g., in /proc/fs/f2fs/sdb1/
+
+
+USAGE
+
+
+1. Download userland tools
+
+2. Insmod f2fs.ko module:
+ # insmod f2fs.ko
+
+3. Check the directory trying to mount
+ # mkdir /mnt/f2fs
+
+4. Format the block device, and then mount as f2fs
+ # mkfs.f2fs -l label /dev/block_device
+ # mount -t f2fs /dev/block_device /mnt/f2fs
+
+
+DESIGN
+
+
+On-disk Layout
+--
+
+F2FS divides whole volume into a number of segments each of which size is 2MB 
by
+default. A section is composed of consecutive segments, and a zone consists of 
a
+set of sections.
+
+F2FS maintains logically six log areas. Except SB, all the log areas are 
managed
+in a unit of multiple 

[PATCH 01/16] f2fs: add document

2012-10-05 Thread 김재극
This adds a document describing the mount options, proc entries, usage, and
design of Flash-Friendly File System, namely F2FS.

Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 Documentation/filesystems/00-INDEX |2 +
 Documentation/filesystems/f2fs.txt |  314 
 2 files changed, 316 insertions(+)
 create mode 100644 Documentation/filesystems/f2fs.txt

diff --git a/Documentation/filesystems/00-INDEX 
b/Documentation/filesystems/00-INDEX
index 8c624a1..ce5fd46 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -48,6 +48,8 @@ ext4.txt
- info, mount options and specifications for the Ext4 filesystem.
 files.txt
- info on file management in the Linux kernel.
+f2fs.txt
+   - info and mount options for the F2FS filesystem.
 fuse.txt
- info on the Filesystem in User SpacE including mount options.
 gfs2.txt
diff --git a/Documentation/filesystems/f2fs.txt 
b/Documentation/filesystems/f2fs.txt
new file mode 100644
index 000..cd3f846
--- /dev/null
+++ b/Documentation/filesystems/f2fs.txt
@@ -0,0 +1,314 @@
+
+WHAT IS Flash-Friendly File System (F2FS)?
+
+
+NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
+been widely being used for ranging from mobile to server systems. Since they 
are
+known to have different characteristics from the conventional rotational disks,
+a file system, an upper layer to the storage device, should adapt to the 
changes
+from the sketch.
+
+F2FS is a file system exploiting NAND flash memory-based storage devices, which
+is based on Log-structured File System (LFS). The design has been focused on
+addressing the fundamental issues in LFS, which are snowball effect of 
wandering
+tree and high cleaning overhead.
+
+Since a NAND flash memory-based storage device shows different characteristic
+according to its internal geometry or flash memory management scheme aka FTL,
+F2FS and its tools support various parameters not only for configuring on-disk
+layout, but also for selecting allocation and cleaning algorithms.
+
+The file system formatting tool, mkfs.f2fs, is available from the following
+download page: http://sourceforge.net/projects/f2fs-tools/
+
+
+MOUNT OPTIONS
+
+
+background_gc_off  Turn off the cleaning operation, aka garbage collection,
+  in background triggered when I/O subsystem is idle.
+disable_roll_forward   Disable the roll-forward recovery routine during SPOR.
+discardIssue discard/TRIM commands when a segment is cleaned.
+no_heapDisable heap-style segment allocation in which finds 
free
+   segments for data from the beginning of main area, while
+  for node from the end of main area.
+nouser_xattr   Disable Extened User Attributes. Note: xattr is enabled
+   by default if CONFIG_F2FS_FS_XATTR is selected.
+noacl  Disable POSIX Access Control List. Note: acl is enabled
+   by default if CONFIG_F2FS_FS_POSIX_ACL is selected.
+
+
+PROC ENTRIES
+
+
+/proc/fs/f2fs/ contains information about partitions mounted as f2fs. For each
+partition, a corresponding directory, named as its device name, is provided 
with
+the following proc entries.
+
+- f2fs_statmajor file system information managed by f2fs currently
+- f2fs_sit_stataverage SIT information about whole segments
+- f2fs_mem_statcurrent memory footprint consumed by f2fs
+
+e.g., in /proc/fs/f2fs/sdb1/
+
+
+USAGE
+
+
+1. Download userland tools
+
+2. Insmod f2fs.ko module:
+ # insmod f2fs.ko
+
+3. Check the directory trying to mount
+ # mkdir /mnt/f2fs
+
+4. Format the block device, and then mount as f2fs
+ # mkfs.f2fs -l label /dev/block_device
+ # mount -t f2fs /dev/block_device /mnt/f2fs
+
+
+DESIGN
+
+
+On-disk Layout
+--
+
+F2FS divides whole volume into a number of segments each of which size is 2MB 
by
+default. A section is composed of consecutive segments, and a zone consists of 
a
+set of sections.
+
+F2FS maintains logically six log areas. Except SB, all the log areas are 
managed
+in a 

[PATCH 02/16] f2fs: add on-disk layout

2012-10-05 Thread 김재극
This adds a header file describing the on-disk layout of f2fs.

Signed-off-by: Changman Lee cm224@samsung.com
Signed-off-by: Chul Lee chur@samsung.com
Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 include/linux/f2fs_fs.h |  359 +++
 1 file changed, 359 insertions(+)
 create mode 100644 include/linux/f2fs_fs.h

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
new file mode 100644
index 000..b17eeec
--- /dev/null
+++ b/include/linux/f2fs_fs.h
@@ -0,0 +1,359 @@
+/**
+ * include/linux/f2fs_fs.h
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _LINUX_F2FS_FS_H
+#define _LINUX_F2FS_FS_H
+
+#include linux/pagemap.h
+#include linux/types.h
+
+#define F2FS_SUPER_MAGIC   0xF2F52010
+#define F2FS_SUPER_OFFSET  1   /* start sector # for sb */
+#define F2FS_BLKSIZE   4096
+
+#define NULL_ADDR  0x0U
+#define NEW_ADDR   -1U
+
+#define F2FS_ROOT_INO(sbi) (sbi-root_ino_num)
+#define F2FS_NODE_INO(sbi) (sbi-node_ino_num)
+#define F2FS_META_INO(sbi) (sbi-meta_ino_num)
+
+#define GFP_F2FS_MOVABLE   (__GFP_WAIT | __GFP_IO | __GFP_ZERO)
+
+/*
+ * For superblock
+ */
+struct f2fs_super_block {
+   __le32 magic;   /* Magic Number */
+   __le16 major_ver;   /* Major Version */
+   __le16 minor_ver;   /* Minor Version */
+   __le32 log_sectorsize;  /* log2 (Sector size in bytes) */
+   __le32 log_sectors_per_block;   /* log2 (Number of sectors per block */
+   __le32 log_blocksize;   /* log2 (Block size in bytes) */
+   __le32 log_blocks_per_seg; /* log2 (Number of blocks per segment) */
+   __le32 log_segs_per_sec; /* log2 (Number of segments per section) */
+   __le32 secs_per_zone; /* Number of sections per zone */
+   __le32 checksum_offset; /* Checksum position in this super block */
+   __le64 block_count; /* Total number of blocks */
+   __le32 section_count;   /* Total number of sections */
+   __le32 segment_count;   /* Total number of segments */
+   __le32 segment_count_ckpt; /* Total number of segments
+ in Checkpoint area */
+   __le32 segment_count_sit; /* Total number of segments
+in Segment information table */
+   __le32 segment_count_nat; /* Total number of segments
+in Node address table */
+   /*Total number of segments in Segment summary area */
+   __le32 segment_count_ssa;
+   /* Total number of segments in Main area */
+   __le32 segment_count_main;
+   __le32 failure_safe_block_distance;
+   __le64 segment0_blkaddr;/* Start block address of Segment 0 */
+   __le64 start_segment_checkpoint; /* Start block address of ckpt */
+   __le64 sit_blkaddr; /* Start block address of SIT */
+   __le64 nat_blkaddr; /* Start block address of NAT */
+   __le64 ssa_blkaddr; /* Start block address of SSA */
+   __le64 main_blkaddr;/* Start block address of Main area */
+   __le32 root_ino;/* Root directory inode number */
+   __le32 node_ino;/* node inode number */
+   __le32 meta_ino;/* meta inode number */
+   __le32 volume_serial_number;/* VSN is optional field */
+   __le16 volume_name[8];  /* Volume Name. 8 unicode characters */
+} __packed;
+
+/*
+ * For checkpoint
+ */
+struct f2fs_checkpoint {
+   __le64 checkpoint_ver;  /* Checkpoint block version number */
+   __le64 user_block_count;/* Total number of blocks
+  in Main area excluding the number of
+  reserved blocks */
+   __le64 valid_block_count;   /* Total number of valid blocks
+  in Main area */
+   __le32 rsvd_segment_count;  /* Total number of reserved segments
+  (for garbage collection) */
+   __le32 overprov_segment_count;  /* Total number of overprovision
+  segments */
+   __le32 free_segment_count;  /* Total number of free segments
+  in Main area */
+   __le32 bad_segment_count;   /* Total number of bad segments
+  in Main area */
+   __le32 cur_node_segno[3];   /* Segment number of current node
+  segment */
+   __le16 cur_node_blkoff[3];  /* Current node block offset
+  in the node segment */
+   __le16 

[PATCH 05/16] f2fs: add checkpoint operations

2012-10-05 Thread 김재극
This adds functions required by the checkpoint operations.

Basically, f2fs adopts a roll-back model with checkpoint blocks written in the
CP area. The checkpoint procedure includes as follows.

- write_checkpoint()
1. block_operations() freezes VFS calls.
2. submit cached bios.
3. flush_nat_entries() writes NAT pages updated by dirty NAT entries.
4. flush_sit_entries() writes SIT pages updated by dirty SIT entries.
5. do_checkpoint() writes,
  - checkpoint block (#0)
  - orphan inode blocks
  - summary blocks made by active logs
  - checkpoint block (copy of #0)
6. unblock_opeations()

In order to provide an address space for meta pages, f2fs_sb_info has a special
inode, namely meta_inode. This patch also adds the address space operations for
meta_inode.

Signed-off-by: Chul Lee chur@samsung.com
Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 fs/f2fs/checkpoint.c |  791 ++
 1 file changed, 791 insertions(+)
 create mode 100644 fs/f2fs/checkpoint.c

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
new file mode 100644
index 000..2186b82
--- /dev/null
+++ b/fs/f2fs/checkpoint.c
@@ -0,0 +1,791 @@
+/**
+ * fs/f2fs/checkpoint.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include linux/fs.h
+#include linux/bio.h
+#include linux/mpage.h
+#include linux/writeback.h
+#include linux/blkdev.h
+#include linux/f2fs_fs.h
+#include linux/pagevec.h
+#include linux/swap.h
+
+#include f2fs.h
+#include node.h
+#include segment.h
+
+static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *inode_entry_slab;
+
+/**
+ * We guarantee no failure on the returned page.
+ */
+struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+   struct address_space *mapping = sbi-meta_inode-i_mapping;
+   struct page *page = NULL;
+repeat:
+   page = grab_cache_page(mapping, index);
+   if (!page) {
+   cond_resched();
+   goto repeat;
+   }
+
+   /* We wait writeback only inside grab_meta_page() */
+   wait_on_page_writeback(page);
+   SetPageUptodate(page);
+   return page;
+}
+
+/**
+ * We guarantee no failure on the returned page.
+ */
+struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+   struct address_space *mapping = sbi-meta_inode-i_mapping;
+   struct page *page;
+repeat:
+   page = grab_cache_page(mapping, index);
+   if (!page) {
+   cond_resched();
+   goto repeat;
+   }
+   if (f2fs_readpage(sbi, page, index, READ_SYNC)) {
+   f2fs_put_page(page, 1);
+   goto repeat;
+   }
+   mark_page_accessed(page);
+
+   /* We do not allow returning an errorneous page */
+   return page;
+}
+
+static int f2fs_write_meta_page(struct page *page,
+   struct writeback_control *wbc)
+{
+   struct inode *inode = page-mapping-host;
+   struct f2fs_sb_info *sbi = F2FS_SB(inode-i_sb);
+   int err;
+
+   wait_on_page_writeback(page);
+
+   err = write_meta_page(sbi, page, wbc);
+   if (err) {
+   wbc-pages_skipped++;
+   set_page_dirty(page);
+   }
+
+   dec_page_count(sbi, F2FS_DIRTY_META);
+
+   /* In this case, we should not unlock this page */
+   if (err != AOP_WRITEPAGE_ACTIVATE)
+   unlock_page(page);
+   return err;
+}
+
+static int f2fs_write_meta_pages(struct address_space *mapping,
+   struct writeback_control *wbc)
+{
+   struct f2fs_sb_info *sbi = F2FS_SB(mapping-host-i_sb);
+   struct block_device *bdev = sbi-sb-s_bdev;
+   long written;
+
+   if (wbc-for_kupdate)
+   return 0;
+
+   if (get_pages(sbi, F2FS_DIRTY_META) == 0)
+   return 0;
+
+   /* if mounting is failed, skip writing node pages */
+   mutex_lock(sbi-cp_mutex);
+   written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
+   mutex_unlock(sbi-cp_mutex);
+   wbc-nr_to_write -= written;
+   return 0;
+}
+
+long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
+   long nr_to_write)
+{
+   struct address_space *mapping = sbi-meta_inode-i_mapping;
+   pgoff_t index = 0, end = LONG_MAX;
+   struct pagevec pvec;
+   long nwritten = 0;
+   struct writeback_control wbc = {
+   .for_reclaim = 0,
+   };
+
+   pagevec_init(pvec, 0);
+
+   while (index = end) {
+   int i, nr_pages;
+   nr_pages = pagevec_lookup_tag(pvec, mapping, index,
+   PAGECACHE_TAG_DIRTY,
+   min(end - index, 

[PATCH 09/16] f2fs: add address space operations for data

2012-10-05 Thread 김재극
This adds address space operations for data.

- F2FS supports readpages(), writepages(), and direct_IO().

- Because of out-of-place writes, f2fs_direct_IO() does not write data in place.

Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 fs/f2fs/data.c |  700 
 1 file changed, 700 insertions(+)
 create mode 100644 fs/f2fs/data.c

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
new file mode 100644
index 000..97d656b
--- /dev/null
+++ b/fs/f2fs/data.c
@@ -0,0 +1,700 @@
+/**
+ * fs/f2fs/data.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include linux/fs.h
+#include linux/f2fs_fs.h
+#include linux/buffer_head.h
+#include linux/mpage.h
+#include linux/writeback.h
+#include linux/backing-dev.h
+#include linux/blkdev.h
+#include linux/bio.h
+
+#include f2fs.h
+#include node.h
+#include segment.h
+
+/**
+ * Lock ordering for the change of data block address:
+ * -data_page
+ *  -node_page
+ *update block addresses in the node page
+ */
+static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
+{
+   struct f2fs_node *rn;
+   __le32 *addr_array;
+   struct page *node_page = dn-node_page;
+   unsigned int ofs_in_node = dn-ofs_in_node;
+
+   wait_on_page_writeback(node_page);
+
+   rn = (struct f2fs_node *)page_address(node_page);
+
+   /* Get physical address of data block */
+   addr_array = blkaddr_in_node(rn);
+   addr_array[ofs_in_node] = cpu_to_le32(new_addr);
+   set_page_dirty(node_page);
+}
+
+int reserve_new_block(struct dnode_of_data *dn)
+{
+   struct f2fs_sb_info *sbi = F2FS_SB(dn-inode-i_sb);
+
+   if (is_inode_flag_set(F2FS_I(dn-inode), FI_NO_ALLOC))
+   return -EPERM;
+   if (!inc_valid_block_count(sbi, dn-inode, 1))
+   return -ENOSPC;
+
+   __set_data_blkaddr(dn, NEW_ADDR);
+   dn-data_blkaddr = NEW_ADDR;
+   sync_inode_page(dn);
+   return 0;
+}
+
+static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
+   struct buffer_head *bh_result)
+{
+   struct f2fs_inode_info *fi = F2FS_I(inode);
+   struct f2fs_sb_info *sbi = F2FS_SB(inode-i_sb);
+   pgoff_t start_fofs, end_fofs;
+   block_t start_blkaddr;
+
+   read_lock(fi-ext.ext_lock);
+   if (fi-ext.len == 0) {
+   read_unlock(fi-ext.ext_lock);
+   return 0;
+   }
+
+   sbi-total_hit_ext++;
+   start_fofs = fi-ext.fofs;
+   end_fofs = fi-ext.fofs + fi-ext.len - 1;
+   start_blkaddr = fi-ext.blk_addr;
+
+   if (pgofs = start_fofs  pgofs = end_fofs) {
+   unsigned int blkbits = inode-i_sb-s_blocksize_bits;
+   size_t count;
+
+   clear_buffer_new(bh_result);
+   map_bh(bh_result, inode-i_sb,
+   start_blkaddr + pgofs - start_fofs);
+   count = end_fofs - pgofs + 1;
+   if (count  (UINT_MAX  blkbits))
+   bh_result-b_size = (count  blkbits);
+   else
+   bh_result-b_size = UINT_MAX;
+
+   sbi-read_hit_ext++;
+   read_unlock(fi-ext.ext_lock);
+   return 1;
+   }
+   read_unlock(fi-ext.ext_lock);
+   return 0;
+}
+
+void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
+{
+   struct f2fs_inode_info *fi = F2FS_I(dn-inode);
+   pgoff_t fofs, start_fofs, end_fofs;
+   block_t start_blkaddr, end_blkaddr;
+
+   BUG_ON(blk_addr == NEW_ADDR);
+   fofs = start_bidx_of_node(ofs_of_node(dn-node_page)) + dn-ofs_in_node;
+
+   /* Update the page address in the parent node */
+   __set_data_blkaddr(dn, blk_addr);
+
+   write_lock(fi-ext.ext_lock);
+
+   start_fofs = fi-ext.fofs;
+   end_fofs = fi-ext.fofs + fi-ext.len - 1;
+   start_blkaddr = fi-ext.blk_addr;
+   end_blkaddr = fi-ext.blk_addr + fi-ext.len - 1;
+
+   /* Drop and initialize the matched extent */
+   if (fi-ext.len == 1  fofs == start_fofs)
+   fi-ext.len = 0;
+
+   /* Initial extent */
+   if (fi-ext.len == 0) {
+   if (blk_addr != NULL_ADDR) {
+   fi-ext.fofs = fofs;
+   fi-ext.blk_addr = blk_addr;
+   fi-ext.len = 1;
+   }
+   goto end_update;
+   }
+
+   /* Frone merge */
+   if (fofs == start_fofs - 1  blk_addr == start_blkaddr - 1) {
+   fi-ext.fofs--;
+   fi-ext.blk_addr--;
+   fi-ext.len++;
+   goto end_update;
+   }
+
+   /* Back merge */
+   if (fofs == end_fofs + 1  blk_addr == end_blkaddr + 1) {
+  

[PATCH 12/16] f2fs: add core directory operations

2012-10-05 Thread 김재극
This adds core functions to find, add, delete, and link dentries.

Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 fs/f2fs/dir.c  |  657 
 fs/f2fs/hash.c |   98 +
 2 files changed, 755 insertions(+)
 create mode 100644 fs/f2fs/dir.c
 create mode 100644 fs/f2fs/hash.c

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
new file mode 100644
index 000..edc0610
--- /dev/null
+++ b/fs/f2fs/dir.c
@@ -0,0 +1,657 @@
+/**
+ * fs/f2fs/dir.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include linux/fs.h
+#include linux/f2fs_fs.h
+#include f2fs.h
+#include acl.h
+
+static unsigned long dir_blocks(struct inode *inode)
+{
+   return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
+PAGE_CACHE_SHIFT;
+}
+
+static unsigned int dir_buckets(unsigned int level)
+{
+   if (level  MAX_DIR_HASH_DEPTH / 2)
+   return 1  level;
+   else
+   return 1  ((MAX_DIR_HASH_DEPTH / 2) - 1);
+}
+
+static unsigned int bucket_blocks(unsigned int level)
+{
+   if (level  MAX_DIR_HASH_DEPTH / 2)
+   return 2;
+   else
+   return 4;
+}
+
+static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
+   [F2FS_FT_UNKNOWN]   = DT_UNKNOWN,
+   [F2FS_FT_REG_FILE]  = DT_REG,
+   [F2FS_FT_DIR]   = DT_DIR,
+   [F2FS_FT_CHRDEV]= DT_CHR,
+   [F2FS_FT_BLKDEV]= DT_BLK,
+   [F2FS_FT_FIFO]  = DT_FIFO,
+   [F2FS_FT_SOCK]  = DT_SOCK,
+   [F2FS_FT_SYMLINK]   = DT_LNK,
+};
+
+#define S_SHIFT 12
+static unsigned char f2fs_type_by_mode[S_IFMT  S_SHIFT] = {
+   [S_IFREG  S_SHIFT]= F2FS_FT_REG_FILE,
+   [S_IFDIR  S_SHIFT]= F2FS_FT_DIR,
+   [S_IFCHR  S_SHIFT]= F2FS_FT_CHRDEV,
+   [S_IFBLK  S_SHIFT]= F2FS_FT_BLKDEV,
+   [S_IFIFO  S_SHIFT]= F2FS_FT_FIFO,
+   [S_IFSOCK  S_SHIFT]   = F2FS_FT_SOCK,
+   [S_IFLNK  S_SHIFT]= F2FS_FT_SYMLINK,
+};
+
+static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
+{
+   mode_t mode = inode-i_mode;
+   de-file_type = f2fs_type_by_mode[(mode  S_IFMT)  S_SHIFT];
+}
+
+static unsigned long dir_block_index(unsigned int level, unsigned int idx)
+{
+   unsigned long i;
+   unsigned long bidx = 0;
+
+   for (i = 0; i  level; i++)
+   bidx += dir_buckets(i) * bucket_blocks(i);
+   bidx += idx * bucket_blocks(level);
+   return bidx;
+}
+
+static bool early_match_name(const char *name, int namelen,
+   f2fs_hash_t namehash, struct f2fs_dir_entry *de)
+{
+   if (le16_to_cpu(de-name_len) != namelen)
+   return false;
+
+   if (le32_to_cpu(de-hash_code) != namehash)
+   return false;
+
+   return true;
+}
+
+static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
+   const char *name, int namelen, int *max_slots,
+   f2fs_hash_t namehash, struct page **res_page)
+{
+   struct f2fs_dir_entry *de;
+   unsigned long bit_pos, end_pos, next_pos;
+   struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
+   int slots;
+
+   bit_pos = find_next_bit_le(dentry_blk-dentry_bitmap,
+   NR_DENTRY_IN_BLOCK, 0);
+   while (bit_pos  NR_DENTRY_IN_BLOCK) {
+   de = dentry_blk-dentry[bit_pos];
+   slots = (le16_to_cpu(de-name_len) + F2FS_NAME_LEN - 1) /
+   F2FS_NAME_LEN;
+
+   if (early_match_name(name, namelen, namehash, de)) {
+   if (!memcmp(dentry_blk-filename[bit_pos],
+   name, namelen)) {
+   *res_page = dentry_page;
+   goto found;
+   }
+   }
+   next_pos = bit_pos + slots;
+   bit_pos = find_next_bit_le(dentry_blk-dentry_bitmap,
+   NR_DENTRY_IN_BLOCK, next_pos);
+   if (bit_pos = NR_DENTRY_IN_BLOCK)
+   end_pos = NR_DENTRY_IN_BLOCK;
+   else
+   end_pos = bit_pos;
+   if (*max_slots  end_pos - next_pos)
+   *max_slots = end_pos - next_pos;
+   }
+
+   de = NULL;
+   kunmap(dentry_page);
+found:
+   return de;
+}
+
+static struct f2fs_dir_entry *find_in_level(struct inode *dir,
+   unsigned int level, const char *name, int namelen,
+   f2fs_hash_t namehash, struct page **res_page)
+{
+   int s = (namelen 

[PATCH 13/16] f2fs: add xattr and acl functionalities

2012-10-05 Thread 김재극
This implements xattr and acl functionalities.

- F2FS uses a node page to contain use extended attributes.

Signed-off-by: Changman Lee cm224@samsung.com
Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 fs/f2fs/acl.c   |  402 +++
 fs/f2fs/acl.h   |   57 
 fs/f2fs/xattr.c |  387 
 fs/f2fs/xattr.h |  142 
 4 files changed, 988 insertions(+)
 create mode 100644 fs/f2fs/acl.c
 create mode 100644 fs/f2fs/acl.h
 create mode 100644 fs/f2fs/xattr.c
 create mode 100644 fs/f2fs/xattr.h

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
new file mode 100644
index 000..f3682a5
--- /dev/null
+++ b/fs/f2fs/acl.c
@@ -0,0 +1,402 @@
+/**
+ * fs/f2fs/acl.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * Portions of this code from linux/fs/ext2/acl.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, agr...@suse.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include linux/f2fs_fs.h
+#include f2fs.h
+#include xattr.h
+#include acl.h
+
+#define get_inode_mode(i)  ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
+   (F2FS_I(i)-i_acl_mode) : ((i)-i_mode))
+
+static inline size_t f2fs_acl_size(int count)
+{
+   if (count = 4) {
+   return sizeof(struct f2fs_acl_header) +
+   count * sizeof(struct f2fs_acl_entry_short);
+   } else {
+   return sizeof(struct f2fs_acl_header) +
+   4 * sizeof(struct f2fs_acl_entry_short) +
+   (count - 4) * sizeof(struct f2fs_acl_entry);
+   }
+}
+
+static inline int f2fs_acl_count(size_t size)
+{
+   ssize_t s;
+   size -= sizeof(struct f2fs_acl_header);
+   s = size - 4 * sizeof(struct f2fs_acl_entry_short);
+   if (s  0) {
+   if (size % sizeof(struct f2fs_acl_entry_short))
+   return -1;
+   return size / sizeof(struct f2fs_acl_entry_short);
+   } else {
+   if (s % sizeof(struct f2fs_acl_entry))
+   return -1;
+   return s / sizeof(struct f2fs_acl_entry) + 4;
+   }
+}
+
+static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
+{
+   int i, count;
+   struct posix_acl *acl;
+   struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value;
+   struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
+   const char *end = value + size;
+
+   if (hdr-a_version != cpu_to_le32(F2FS_ACL_VERSION))
+   return ERR_PTR(-EINVAL);
+
+   count = f2fs_acl_count(size);
+   if (count  0)
+   return ERR_PTR(-EINVAL);
+   if (count == 0)
+   return NULL;
+
+   acl = posix_acl_alloc(count, GFP_KERNEL);
+   if (!acl)
+   return ERR_PTR(-ENOMEM);
+
+   for (i = 0; i  count; i++) {
+
+   if ((char *)entry  end)
+   goto fail;
+
+   acl-a_entries[i].e_tag  = le16_to_cpu(entry-e_tag);
+   acl-a_entries[i].e_perm = le16_to_cpu(entry-e_perm);
+
+   switch (acl-a_entries[i].e_tag) {
+   case ACL_USER:
+   case ACL_GROUP:
+   acl-a_entries[i].e_id = le32_to_cpu(entry-e_id);
+   entry = (struct f2fs_acl_entry *)((char *)entry +
+   sizeof(struct f2fs_acl_entry));
+   break;
+   case ACL_USER_OBJ:
+   case ACL_GROUP_OBJ:
+   case ACL_MASK:
+   case ACL_OTHER:
+   acl-a_entries[i].e_id = ACL_UNDEFINED_ID;
+   entry = (struct f2fs_acl_entry *)((char *)entry +
+   sizeof(struct f2fs_acl_entry_short));
+   break;
+   default:
+   goto fail;
+   }
+   }
+   if ((char *)entry != end)
+   goto fail;
+   return acl;
+fail:
+   posix_acl_release(acl);
+   return ERR_PTR(-EINVAL);
+}
+
+static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
+{
+   struct f2fs_acl_header *f2fs_acl;
+   struct f2fs_acl_entry *entry;
+   int i;
+
+   f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl-a_count *
+   sizeof(struct f2fs_acl_entry), GFP_KERNEL);
+   if (!f2fs_acl)
+   return ERR_PTR(-ENOMEM);
+
+   f2fs_acl-a_version = cpu_to_le32(F2FS_ACL_VERSION);
+   entry = (struct f2fs_acl_entry *)(f2fs_acl + 1);
+
+   for (i = 0; i  acl-a_count; i++) {
+
+   entry-e_tag  = cpu_to_le16(acl-a_entries[i].e_tag);
+   entry-e_perm 

[PATCH 14/16] f2fs: add garbage collection functions

2012-10-05 Thread 김재극
This adds on-demand and background cleaning functions.

- The basic background cleaning policy is trying to do cleaning jobs as much as
  possible whenever the system is idle. Once the background cleaning is done,
  the cleaner sleeps an amount of time not to interfere with VFS calls. The time
  is dynamically adjusted according to the status of whole segments, which is
  decreased when the following conditions are satisfied.

  . GC is not conducted currently, and
  . IO subsystem is idle by checking the number of requets in bdev's request
 list, and
  . There are enough dirty segments.

  Otherwise, the time is increased incrementally until to the maximum time.
  Note that, min and max times are 10 secs and 30 secs by default.

- F2FS adopts a default victim selection policy where background cleaning uses
  a cost-benefit algorithm, while on-demand cleaning uses a greedy algorithm.

- The method of moving data during the cleaning is slightly different between
  background and on-demand cleaning schemes. In the case of background cleaning,
  F2FS loads the data, and marks them as dirty. Then, F2FS expects that the data
  will be moved by flusher or VM. In the case of on-demand cleaning, F2FS should
  move the data right away.

- In order to identify valid blocks in a victim segment, F2FS scans the bitmap
  of the segment managed as an SIT entry.

Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 fs/f2fs/gc.c | 1140 ++
 fs/f2fs/gc.h |  203 +++
 2 files changed, 1343 insertions(+)
 create mode 100644 fs/f2fs/gc.c
 create mode 100644 fs/f2fs/gc.h

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
new file mode 100644
index 000..05f173c
--- /dev/null
+++ b/fs/f2fs/gc.c
@@ -0,0 +1,1140 @@
+/**
+ * fs/f2fs/gc.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include linux/fs.h
+#include linux/module.h
+#include linux/backing-dev.h
+#include linux/proc_fs.h
+#include linux/init.h
+#include linux/f2fs_fs.h
+#include linux/kthread.h
+#include linux/delay.h
+#include linux/freezer.h
+#include linux/blkdev.h
+
+#include f2fs.h
+#include node.h
+#include segment.h
+#include gc.h
+
+static LIST_HEAD(f2fs_stat_list);
+static struct kmem_cache *winode_slab;
+
+static int gc_thread_func(void *data)
+{
+   struct f2fs_sb_info *sbi = data;
+   wait_queue_head_t *wq = sbi-gc_thread-gc_wait_queue_head;
+   long wait_ms;
+
+   wait_ms = GC_THREAD_MIN_SLEEP_TIME;
+
+   do {
+   if (try_to_freeze())
+   continue;
+   else
+   wait_event_interruptible_timeout(*wq,
+   kthread_should_stop(),
+   msecs_to_jiffies(wait_ms));
+   if (kthread_should_stop())
+   break;
+
+   f2fs_balance_fs(sbi);
+
+   if (!test_opt(sbi, BG_GC))
+   continue;
+
+   /*
+* [GC triggering condition]
+* 0. GC is not conducted currently.
+* 1. There are enough dirty segments.
+* 2. IO subsystem is idle by checking the # of writeback pages.
+* 3. IO subsystem is idle by checking the # of requests in
+*bdev's request list.
+*
+* Note) We have to avoid triggering GCs too much frequently.
+* Because it is possible that some segments can be
+* invalidated soon after by user update or deletion.
+* So, I'd like to wait some time to collect dirty segments.
+*/
+   if (!mutex_trylock(sbi-gc_mutex))
+   continue;
+
+   if (!is_idle(sbi)) {
+   wait_ms = increase_sleep_time(wait_ms);
+   mutex_unlock(sbi-gc_mutex);
+   continue;
+   }
+
+   if (has_enough_invalid_blocks(sbi))
+   wait_ms = decrease_sleep_time(wait_ms);
+   else
+   wait_ms = increase_sleep_time(wait_ms);
+
+   sbi-bg_gc++;
+
+   if (f2fs_gc(sbi, 1) == GC_NONE)
+   wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
+   else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
+   wait_ms = GC_THREAD_MAX_SLEEP_TIME;
+
+   } while (!kthread_should_stop());
+   return 0;
+}
+
+int start_gc_thread(struct f2fs_sb_info *sbi)
+{
+   struct f2fs_gc_kthread *gc_th = NULL;
+
+   gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
+   if (!gc_th)
+   return -ENOMEM;
+
+   

[PATCH 15/16] f2fs: add recovery routines for roll-forward

2012-10-05 Thread 김재극
This adds roll-forward routines to recover fsynced data.

- F2FS uses basically roll-back model with checkpointing.

- In order to implement fsync(), there are two approaches as follows.

1. A roll-back model with checkpointing at every fsync()
 : This is a naive method, but suffers from very low performance.

2. A roll-forward model
 : F2FS adopts this model where all the fsynced data should be recovered, which
   were written after checkpointing was done. In order to figure out the data,
   F2FS keeps a fsync mark in direct node blocks. In addition, F2FS remains
   the location of next node block in each direct node block for reconstructing
   the chain of node blocks during the recovery.

- In order to enhance the performance, F2FS keeps a dentry mark also in direct
  node blocks. If this is set during the recovery, F2FS replays adding a dentry.

Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 fs/f2fs/recovery.c |  372 
 1 file changed, 372 insertions(+)
 create mode 100644 fs/f2fs/recovery.c

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
new file mode 100644
index 000..95455b1
--- /dev/null
+++ b/fs/f2fs/recovery.c
@@ -0,0 +1,372 @@
+/**
+ * fs/f2fs/recovery.c
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include linux/fs.h
+#include linux/f2fs_fs.h
+#include f2fs.h
+#include node.h
+#include segment.h
+
+static struct kmem_cache *fsync_entry_slab;
+
+bool space_for_roll_forward(struct f2fs_sb_info *sbi)
+{
+   if (sbi-last_valid_block_count + sbi-alloc_valid_block_count
+sbi-user_block_count)
+   return false;
+   return true;
+}
+
+static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
+   nid_t ino)
+{
+   struct list_head *this;
+   struct fsync_inode_entry *entry;
+
+   list_for_each(this, head) {
+   entry = list_entry(this, struct fsync_inode_entry, list);
+   if (entry-inode-i_ino == ino)
+   return entry;
+   }
+   return NULL;
+}
+
+static int recover_dentry(struct page *ipage, struct inode *inode)
+{
+   struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
+   struct f2fs_inode *raw_inode = (raw_node-i);
+   struct dentry dent, parent;
+   struct f2fs_dir_entry *de;
+   struct page *page;
+   struct inode *dir;
+   int err = 0;
+
+   if (!raw_node-footer.dentry)
+   goto out;
+
+   dir = f2fs_iget(inode-i_sb, le32_to_cpu(raw_inode-i_pino));
+   if (IS_ERR(dir)) {
+   err = -EINVAL;
+   goto out;
+   }
+
+   parent.d_inode = dir;
+   dent.d_parent = parent;
+   dent.d_name.len = le32_to_cpu(raw_inode-i_namelen);
+   dent.d_name.name = raw_inode-i_name;
+
+   de = f2fs_find_entry(dir, dent.d_name, page);
+   if (de) {
+   kunmap(page);
+   f2fs_put_page(page, 0);
+   } else {
+   f2fs_add_link(dent, inode);
+   }
+   iput(dir);
+out:
+   kunmap(ipage);
+   return err;
+}
+
+static int recover_inode(struct inode *inode, struct page *node_page)
+{
+   void *kaddr = page_address(node_page);
+   struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
+   struct f2fs_inode *raw_inode = (raw_node-i);
+
+   inode-i_mode = le32_to_cpu(raw_inode-i_mode);
+   i_size_write(inode, le64_to_cpu(raw_inode-i_size));
+   inode-i_atime.tv_sec = le32_to_cpu(raw_inode-i_atime);
+   inode-i_ctime.tv_sec = le32_to_cpu(raw_inode-i_ctime);
+   inode-i_mtime.tv_sec = le32_to_cpu(raw_inode-i_mtime);
+
+   return recover_dentry(node_page, inode);
+}
+
+static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
+{
+   unsigned long long cp_ver = le64_to_cpu(sbi-ckpt-checkpoint_ver);
+   struct curseg_info *curseg;
+   struct page *page;
+   block_t blkaddr;
+   int err = 0;
+
+   /* get node pages in the current segment */
+   curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
+   blkaddr = START_BLOCK(sbi, curseg-segno) + curseg-next_blkoff;
+
+   /* read node page */
+   page = alloc_page(GFP_NOFS | __GFP_ZERO);
+   if (IS_ERR(page))
+   return PTR_ERR(page);
+   lock_page(page);
+
+   while (1) {
+   struct fsync_inode_entry *entry;
+
+   if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
+   goto out;
+
+   if (cp_ver != cpver_of_node(page))
+   goto out;
+
+   if (!is_fsync_dnode(page))
+   goto next;
+
+   entry = 

[PATCH 16/16] f2fs: update Kconfig and Makefile

2012-10-05 Thread 김재극
This adds Makefile and Kconfig for f2fs, and updates Makefile and Kconfig files
in the fs directory.

Signed-off-by: Jaegeuk Kim jaegeuk@samsung.com
---
 fs/Kconfig   |1 +
 fs/Makefile  |1 +
 fs/f2fs/Kconfig  |   55 ++
 fs/f2fs/Makefile |6 ++
 4 files changed, 63 insertions(+)
 create mode 100644 fs/f2fs/Kconfig
 create mode 100644 fs/f2fs/Makefile

diff --git a/fs/Kconfig b/fs/Kconfig
index f95ae3a..e352b37 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -220,6 +220,7 @@ source fs/pstore/Kconfig
 source fs/sysv/Kconfig
 source fs/ufs/Kconfig
 source fs/exofs/Kconfig
+source fs/f2fs/Kconfig
 
 endif # MISC_FILESYSTEMS
 
diff --git a/fs/Makefile b/fs/Makefile
index 2fb9779..e09edb5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -122,6 +122,7 @@ obj-$(CONFIG_DEBUG_FS)  += debugfs/
 obj-$(CONFIG_OCFS2_FS) += ocfs2/
 obj-$(CONFIG_BTRFS_FS) += btrfs/
 obj-$(CONFIG_GFS2_FS)   += gfs2/
+obj-$(CONFIG_F2FS_FS)  += f2fs/
 obj-y  += exofs/ # Multiple modules
 obj-$(CONFIG_CEPH_FS)  += ceph/
 obj-$(CONFIG_PSTORE)   += pstore/
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
new file mode 100644
index 000..8821c6a
--- /dev/null
+++ b/fs/f2fs/Kconfig
@@ -0,0 +1,55 @@
+config F2FS_FS
+   tristate F2FS filesystem support (EXPERIMENTAL)
+   depends on EXPERIMENTAL
+   help
+ F2FS is based on Log-structured File System (LFS), which supports
+ versatile flash-friendly features. The design has been focused on
+ addressing the fundamental issues in LFS, which are snowball effect
+ of wandering tree and high cleaning overhead.
+
+ Since flash-based storages show different characteristics according to
+ the internal geometry or flash memory management schemes aka FTL, F2FS
+ and tools support various parameters not only for configuring on-disk
+ layout, but also for selecting allocation and cleaning algorithms.
+
+ If unsure, say N.
+
+config F2FS_STAT_FS
+   bool F2FS Status Information
+   depends on F2FS_FS
+   default y
+   help
+ /proc/fs/f2fs/ contains information about partitions mounted as f2fs.
+ For each partition, a corresponding directory, named as its device
+ name, is provided with the following proc entries.
+
+ f2fs_stat major file system information managed by f2fs currently
+ f2fs_sit_stat average SIT information about whole segments
+ f2fs_mem_stat current memory footprint consumed by f2fs
+
+ e.g., in /proc/fs/f2fs/sdb1/
+
+config F2FS_FS_XATTR
+   bool F2FS extended attributes
+   depends on F2FS_FS
+   default y
+   help
+ Extended attributes are name:value pairs associated with inodes by
+ the kernel or by users (see the attr(5) manual page, or visit
+ http://acl.bestbits.at/ for details).
+
+ If unsure, say N.
+
+config F2FS_FS_POSIX_ACL
+   bool F2FS Access Control Lists
+   depends on F2FS_FS_XATTR
+   select FS_POSIX_ACL
+   default y
+   help
+ Posix Access Control Lists (ACLs) support permissions for users and
+ gourps beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website http://acl.bestbits.at/.
+
+ If you don't know what Access Control Lists are, say N
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
new file mode 100644
index 000..72fcf9a
--- /dev/null
+++ b/fs/f2fs/Makefile
@@ -0,0 +1,6 @@
+obj-$(CONFIG_F2FS_FS) += f2fs.o
+
+f2fs-y := dir.o file.o inode.o namei.o hash.o super.o
+f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
+f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
+f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
-- 
1.7.9.5




---
Jaegeuk Kim
Samsung



--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/