[PATCH] sysctl: fix improper indication of integer sysctl parameter

2012-07-31 Thread Mitsuo Hayasaka
Hi,

This patch fixes the improper type casting of integer
sysctl parameters.

When we read the sysctl parameter, they are always treated
as signed integer, and are casted into unsigned long type
in the current kernel. If we set a value equivalent to
(the maximum value in signed integer + 1) which is a power
of 2 and just causes the overflow, they outputs unexpected
value.

This bug can be reproduced as follows.

Example)
 # echo $((1<<31)) > /proc/sys/fs/lease-break-time
 # cat /proc/sys/fs/lease-break-time
 -18446744071562067968
   (It should be -2147483648.)
or
 # echo XXX > /proc/sys/fs/pipe-max-size
   (where XXX is an arbitrary number between (1<<30 + 1) and
(1<<31 - 1) since the pipe-max-size is rounded up to a
power of 2 in kernel.)
 # cat /proc/sys/fs/pipe-max-size
 -18446744071562067968
   (It should be -2147483648.)

To fix this problem, this patch casts the negative integer
into unsigned int type, instead of unsigned long type.

Signed-off-by: Mitsuo Hayasaka 
Cc: "Eric W. Biederman" 
Cc: Andrew Morton 
Cc: David Howells 
Cc: James Morris 
---

 kernel/sysctl.c |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 97186b9..e282b5b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1789,7 +1789,7 @@ static int do_proc_dointvec_conv(bool *negp, unsigned 
long *lvalp,
int val = *valp;
if (val < 0) {
*negp = true;
-   *lvalp = (unsigned long)-val;
+   *lvalp = (unsigned int)-val;
} else {
*negp = false;
*lvalp = (unsigned long)val;
@@ -1982,7 +1982,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, 
unsigned long *lvalp,
int val = *valp;
if (val < 0) {
*negp = true;
-   *lvalp = (unsigned long)-val;
+   *lvalp = (unsigned int)-val;
} else {
*negp = false;
*lvalp = (unsigned long)val;
@@ -2197,7 +2197,7 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, 
unsigned long *lvalp,
unsigned long lval;
if (val < 0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = (unsigned int)-val;
} else {
*negp = false;
lval = (unsigned long)val;
@@ -2220,7 +2220,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(bool 
*negp, unsigned long *lvalp
unsigned long lval;
if (val < 0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = (unsigned int)-val;
} else {
*negp = false;
lval = (unsigned long)val;
@@ -2241,7 +2241,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, 
unsigned long *lvalp,
unsigned long lval;
if (val < 0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = (unsigned int)-val;
} else {
*negp = false;
lval = (unsigned long)val;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] sysctl: fix improper indication of integer sysctl parameter

2012-07-31 Thread Mitsuo Hayasaka
Hi,

This patch fixes the improper type casting of integer
sysctl parameters.

When we read the sysctl parameter, they are always treated
as signed integer, and are casted into unsigned long type
in the current kernel. If we set a value equivalent to
(the maximum value in signed integer + 1) which is a power
of 2 and just causes the overflow, they outputs unexpected
value.

This bug can be reproduced as follows.

Example)
 # echo $((131))  /proc/sys/fs/lease-break-time
 # cat /proc/sys/fs/lease-break-time
 -18446744071562067968
   (It should be -2147483648.)
or
 # echo XXX  /proc/sys/fs/pipe-max-size
   (where XXX is an arbitrary number between (130 + 1) and
(131 - 1) since the pipe-max-size is rounded up to a
power of 2 in kernel.)
 # cat /proc/sys/fs/pipe-max-size
 -18446744071562067968
   (It should be -2147483648.)

To fix this problem, this patch casts the negative integer
into unsigned int type, instead of unsigned long type.

Signed-off-by: Mitsuo Hayasaka mitsuo.hayasaka...@hitachi.com
Cc: Eric W. Biederman ebied...@xmission.com
Cc: Andrew Morton a...@linux-foundation.org
Cc: David Howells dhowe...@redhat.com
Cc: James Morris james.l.mor...@oracle.com
---

 kernel/sysctl.c |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 97186b9..e282b5b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1789,7 +1789,7 @@ static int do_proc_dointvec_conv(bool *negp, unsigned 
long *lvalp,
int val = *valp;
if (val  0) {
*negp = true;
-   *lvalp = (unsigned long)-val;
+   *lvalp = (unsigned int)-val;
} else {
*negp = false;
*lvalp = (unsigned long)val;
@@ -1982,7 +1982,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, 
unsigned long *lvalp,
int val = *valp;
if (val  0) {
*negp = true;
-   *lvalp = (unsigned long)-val;
+   *lvalp = (unsigned int)-val;
} else {
*negp = false;
*lvalp = (unsigned long)val;
@@ -2197,7 +2197,7 @@ static int do_proc_dointvec_jiffies_conv(bool *negp, 
unsigned long *lvalp,
unsigned long lval;
if (val  0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = (unsigned int)-val;
} else {
*negp = false;
lval = (unsigned long)val;
@@ -2220,7 +2220,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(bool 
*negp, unsigned long *lvalp
unsigned long lval;
if (val  0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = (unsigned int)-val;
} else {
*negp = false;
lval = (unsigned long)val;
@@ -2241,7 +2241,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, 
unsigned long *lvalp,
unsigned long lval;
if (val  0) {
*negp = true;
-   lval = (unsigned long)-val;
+   lval = (unsigned int)-val;
} else {
*negp = false;
lval = (unsigned long)val;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 5/6] fuse: set default global limit considering tunable request size

2012-07-19 Thread Mitsuo Hayasaka
Set default global limits for backgrounded requests and congestion
threshold considering the tunable maximum request size.

They are calculated using size of fuse_req structure, which is
variable due to it. This patch sets them according to the current
request size unless they are set via mod_param by the system
administrator.

Signed-off-by: Mitsuo Hayasaka 
Cc: Miklos Szeredi 
Cc: Nikolaus Rath 
Cc: Liu Yuan 
Cc: Has-Wen Nienhuys 
---

 fs/fuse/fuse_i.h |4 +++
 fs/fuse/inode.c  |   62 --
 2 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 46df615..2dda6eb 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -27,6 +27,10 @@
 /** Default number of pages that can be used in a single read/write request */
 #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
 
+/** Maximum size of struct fuse_req */
+#define FUSE_CURRENT_REQ_SIZE (sizeof(struct fuse_req) +\
+  sysfs_max_req_pages * sizeof(struct page *))
+
 /** Bias for fi->writectr, meaning new writepages must not be sent */
 #define FUSE_NOWRITE INT_MIN
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 5f84a40..dc0302f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -29,25 +29,36 @@ static struct kmem_cache *fuse_inode_cachep;
 struct list_head fuse_conn_list;
 DEFINE_MUTEX(fuse_mutex);
 
-static int set_global_limit(const char *val, struct kernel_param *kp);
+static int set_global_limit_bgreq(const char *val, struct kernel_param *kp);
+static int set_global_limit_thresh(const char *val, struct kernel_param *kp);
 
 unsigned max_user_bgreq;
-module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
- _user_bgreq, 0644);
+module_param_call(max_user_bgreq, set_global_limit_bgreq,
+ param_get_uint, _user_bgreq, 0644);
 __MODULE_PARM_TYPE(max_user_bgreq, "uint");
 MODULE_PARM_DESC(max_user_bgreq,
  "Global limit for the maximum number of backgrounded requests an "
  "unprivileged user can set");
 
 unsigned max_user_congthresh;
-module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
- _user_congthresh, 0644);
+module_param_call(max_user_congthresh, set_global_limit_thresh,
+ param_get_uint, _user_congthresh, 0644);
 __MODULE_PARM_TYPE(max_user_congthresh, "uint");
 MODULE_PARM_DESC(max_user_congthresh,
  "Global limit for the maximum congestion threshold an "
  "unprivileged user can set");
 
 /**
+ * The flags below are used in order to distinguish how to set
+ * max_user_bgreq and max_user_congthresh, respectively. They
+ * should be used if they are set via mod_param. If not, we should
+ * check their current limitation using check_global_limit() any
+ * time due to the tunable read/write request size.
+ */
+static bool mod_param_set_flg_bgreq;
+static bool mod_param_set_flg_thresh;
+
+/**
  * Maximum number of pages allocated for struct fuse_req.
  * It can be changed via sysfs to arbitrary number between
  * FUSE_DEFAULT_MAX_PAGES_PER_REQ and nr_pages equivalent
@@ -766,13 +777,39 @@ static void sanitize_global_limit(unsigned *limit)
 {
if (*limit == 0)
*limit = ((num_physpages << PAGE_SHIFT) >> 13) /
-sizeof(struct fuse_req);
+FUSE_CURRENT_REQ_SIZE;
 
if (*limit >= 1 << 16)
*limit = (1 << 16) - 1;
 }
 
-static int set_global_limit(const char *val, struct kernel_param *kp)
+static void check_global_limit(unsigned *limit, bool mod_param_flg)
+{
+   if (!mod_param_flg) {
+   unsigned cur_global_limit = 0;
+
+   sanitize_global_limit(_global_limit);
+   *limit = cur_global_limit;
+   }
+}
+
+static int set_global_limit_bgreq(const char *val, struct kernel_param *kp)
+{
+   int rv;
+
+   rv = param_set_uint(val, kp);
+   if (rv)
+   return rv;
+
+   sanitize_global_limit((unsigned *)kp->arg);
+
+   /* max_user_bgreq is set via mod_param */
+   mod_param_set_flg_bgreq = true;
+
+   return 0;
+}
+
+static int set_global_limit_thresh(const char *val, struct kernel_param *kp)
 {
int rv;
 
@@ -782,6 +819,9 @@ static int set_global_limit(const char *val, struct 
kernel_param *kp)
 
sanitize_global_limit((unsigned *)kp->arg);
 
+   /* max_user_congthresh is set via mod_param */
+   mod_param_set_flg_thresh = true;
+
return 0;
 }
 
@@ -801,8 +841,8 @@ static void process_init_limits(struct fuse_conn *fc, 
struct fuse_init_out *arg)
if (arg->minor < 13)
return;
 
-   sanitize_global_limit(_user_bgreq);
-   sanitize_global_limit(_user_congthresh);
+   check_global_limit(_user_bgreq, mod_param_set_flg_bgreq);
+   check_global_limit(_user_congthresh, mod_param_set_flg_thresh);
 
i

[PATCH -v2 6/6] fuse: add documentation of sysfs parameter to limit maximum fuse request size

2012-07-19 Thread Mitsuo Hayasaka
Add an explanation about the sysfs parameter to limit the
maximum read/write request size.

Signed-off-by: Mitsuo Hayasaka 
Cc: Rob Landley 
Cc: Miklos Szeredi 
Cc: Nikolaus Rath 
Cc: Liu Yuan 
Cc: Has-Wen Nienhuys 
---

 Documentation/filesystems/fuse.txt |   15 ++-
 1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/Documentation/filesystems/fuse.txt 
b/Documentation/filesystems/fuse.txt
index 13af4a4..4e706ec 100644
--- a/Documentation/filesystems/fuse.txt
+++ b/Documentation/filesystems/fuse.txt
@@ -108,13 +108,26 @@ Mount options
 
   With this option the maximum size of read operations can be set.
   The default is infinite.  Note that the size of read requests is
-  limited anyway to 32 pages (which is 128kbyte on i386).
+  limited by max_pages_per_req sysfs parameter (See below for details.)
 
 'blksize=N'
 
   Set the block size for the filesystem.  The default is 512.  This
   option is only valid for 'fuseblk' type mounts.
 
+Sysfs parameter
+~~~
+
+  '/sys/fs/fuse/max_pages_per_req'
+
+Specify max request size in pages, which limits max_read/max_write
+mount option. The default is 32 pages (which is 128kbyte on i386).
+It can be changed to arbitrary number between 32 and the number of
+pages equivalent to pipe_max_size.
+
+Changing it may improve read/write throughput on systems. Existing
+FUSE mount must be remounted for this change to take effect.
+
 Control filesystem
 ~~
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 4/6] fuse: add a sysfs parameter to control the maximum request size

2012-07-19 Thread Mitsuo Hayasaka
Add a max_pages_per_req sysfs paramater to limit the maximum
read/write request size. It can be changed to arbitrary number
between 32 and the nr_pages equivalent to pipe_max_size, and the
32 pages are set by default.

The sysfs parameter control is required, as follows.

* The libfuse should change the current MIN_BUFSIZE limitation
  according to the current maximum request size in FUSE. If not,
  the libfuse must always set MIN_BUFSIZE to the maximum request
  limit (= [nr_pages (equivalent to pipe_max_size) * 4KB + 0x1000]),
  which leads to waste of memory.

* It is easy to find and set the paramter to the optimized value
  in order to improve the read/write throughput, since the
  maximum request limit does not always provides the highest
  throughput.

So, it is necessary to get and set the maximum size from userspace.

Existing FUSE mounts must be remounted for this change to take
effect.

Signed-off-by: Mitsuo Hayasaka 
Cc: Miklos Szeredi 
Cc: Nikolaus Rath 
Cc: Liu Yuan 
Cc: Has-Wen Nienhuys 
---

 fs/fuse/inode.c |   58 +++
 1 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f7f3c5d..5f84a40 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -47,6 +47,14 @@ MODULE_PARM_DESC(max_user_congthresh,
  "Global limit for the maximum congestion threshold an "
  "unprivileged user can set");
 
+/**
+ * Maximum number of pages allocated for struct fuse_req.
+ * It can be changed via sysfs to arbitrary number between
+ * FUSE_DEFAULT_MAX_PAGES_PER_REQ and nr_pages equivalent
+ * to pipe_max_size.
+ */
+static unsigned sysfs_max_req_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
+
 #define FUSE_SUPER_MAGIC 0x65735546
 
 #define FUSE_DEFAULT_BLKSIZE 512
@@ -779,11 +787,8 @@ static int set_global_limit(const char *val, struct 
kernel_param *kp)
 
 static void set_conn_max_pages(struct fuse_conn *fc, unsigned max_pages)
 {
-   unsigned pipe_max_size = pipe_get_max_size();
-   unsigned pipe_max_pages = DIV_ROUND_UP(pipe_max_size, PAGE_SIZE);
-
if (max_pages > fc->max_pages) {
-   fc->max_pages = min_t(unsigned, pipe_max_pages, max_pages);
+   fc->max_pages = min_t(unsigned, sysfs_max_req_pages, max_pages);
fc->fuse_req_size = sizeof(struct fuse_req) +
fc->max_pages * sizeof(struct page *);
}
@@ -1205,6 +1210,45 @@ static void fuse_fs_cleanup(void)
 static struct kobject *fuse_kobj;
 static struct kobject *connections_kobj;
 
+static ssize_t max_req_pages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, "%u\n", sysfs_max_req_pages);
+}
+
+static ssize_t max_req_pages_store(struct kobject *kobj,
+  struct kobj_attribute *attr,
+  const char *buf, size_t count)
+{
+   int err;
+   unsigned long t;
+   unsigned pipe_max_size = pipe_get_max_size();
+   unsigned pipe_max_pages = DIV_ROUND_UP(pipe_max_size, PAGE_SIZE);
+
+   err = kstrtoul(skip_spaces(buf), 0, );
+   if (err)
+   return err;
+
+   t = max_t(unsigned long, t, FUSE_DEFAULT_MAX_PAGES_PER_REQ);
+   t = min_t(unsigned long, t, pipe_max_pages);
+
+   sysfs_max_req_pages = t;
+   return count;
+}
+
+static struct kobj_attribute max_req_pages_attr =
+   __ATTR(max_pages_per_req, 0644, max_req_pages_show,
+  max_req_pages_store);
+
+static struct attribute *fuse_attrs[] = {
+   _req_pages_attr.attr,
+   NULL,
+};
+
+static struct attribute_group fuse_attr_grp = {
+   .attrs = fuse_attrs,
+};
+
 static int fuse_sysfs_init(void)
 {
int err;
@@ -1221,8 +1265,14 @@ static int fuse_sysfs_init(void)
goto out_fuse_unregister;
}
 
+   err = sysfs_create_group(fuse_kobj, _attr_grp);
+   if (err)
+   goto out_conn_unregister;
+
return 0;
 
+ out_conn_unregister:
+   kobject_put(connections_kobj);
  out_fuse_unregister:
kobject_put(fuse_kobj);
  out_err:

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 2/6] fuse: make the maximum read/write request size tunable

2012-07-19 Thread Mitsuo Hayasaka
Make the maximum read/write request size tunable between
32 pages and the number of pages equivalent to pipe_max_size.
The max_read/max_write mount options affect the size. The
32 pages are used by default without these options.

Currently, the maximum read/write request size is limited to
FUSE_MAX_PAGES_PER_REQ which is equal to 32 pages. It is
required to change it in order to maximize the throughput
since the optimized value depends on various factors such as
type and version of local filesystems used and hardware specs,
etc.

In addition, recently FUSE is widely used as a gateway to
connect cloud storage services and distributed filesystems.
Larger data might be stored in them over networking via FUSE
and the overhead might affect the read/write throughput.

So, a tunable functionality of read/write request size is
useful.

Signed-off-by: Mitsuo Hayasaka 
Cc: Miklos Szeredi 
Cc: Nikolaus Rath 
Cc: Liu Yuan 
Cc: Has-Wen Nienhuys 
---

 fs/fuse/dev.c|   27 ++-
 fs/fuse/file.c   |   32 +---
 fs/fuse/fuse_i.h |   27 +--
 fs/fuse/inode.c  |   42 +++---
 4 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7df2b5e..511560b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,35 +34,36 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
return file->private_data;
 }
 
-static void fuse_request_init(struct fuse_req *req)
+static void fuse_request_init(struct fuse_conn *fc, struct fuse_req *req)
 {
-   memset(req, 0, sizeof(*req));
+   memset(req, 0, fc->fuse_req_size);
INIT_LIST_HEAD(>list);
INIT_LIST_HEAD(>intr_entry);
init_waitqueue_head(>waitq);
atomic_set(>count, 1);
 }
 
-struct fuse_req *fuse_request_alloc(void)
+struct fuse_req *fuse_request_alloc(struct fuse_conn *fc)
 {
-   struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
+   struct fuse_req *req = kmalloc(fc->fuse_req_size, GFP_KERNEL);
+
if (req)
-   fuse_request_init(req);
+   fuse_request_init(fc, req);
return req;
 }
 EXPORT_SYMBOL_GPL(fuse_request_alloc);
 
-struct fuse_req *fuse_request_alloc_nofs(void)
+struct fuse_req *fuse_request_alloc_nofs(struct fuse_conn *fc)
 {
-   struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
+   struct fuse_req *req = kmalloc(fc->fuse_req_size, GFP_NOFS);
if (req)
-   fuse_request_init(req);
+   fuse_request_init(fc, req);
return req;
 }
 
 void fuse_request_free(struct fuse_req *req)
 {
-   kmem_cache_free(fuse_req_cachep, req);
+   kfree(req);
 }
 
 static void block_sigs(sigset_t *oldset)
@@ -116,7 +117,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
if (!fc->connected)
goto out;
 
-   req = fuse_request_alloc();
+   req = fuse_request_alloc(fc);
err = -ENOMEM;
if (!req)
goto out;
@@ -166,7 +167,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct 
fuse_req *req)
struct fuse_file *ff = file->private_data;
 
spin_lock(>lock);
-   fuse_request_init(req);
+   fuse_request_init(fc, req);
BUG_ON(ff->reserved_req);
ff->reserved_req = req;
wake_up_all(>reserved_req_waitq);
@@ -193,7 +194,7 @@ struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, 
struct file *file)
 
atomic_inc(>num_waiting);
wait_event(fc->blocked_waitq, !fc->blocked);
-   req = fuse_request_alloc();
+   req = fuse_request_alloc(fc);
if (!req)
req = get_reserved_req(fc, file);
 
@@ -1564,7 +1565,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct 
inode *inode,
else if (outarg->offset + num > file_size)
num = file_size - outarg->offset;
 
-   while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+   while (num && req->num_pages < fc->max_pages) {
struct page *page;
unsigned int this_num;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b321a68..7b96b00 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
return NULL;
 
ff->fc = fc;
-   ff->reserved_req = fuse_request_alloc();
+   ff->reserved_req = fuse_request_alloc(fc);
if (unlikely(!ff->reserved_req)) {
kfree(ff);
return NULL;
@@ -653,7 +653,7 @@ static int fuse_readpages_fill(void *_data, struct page 
*page)
fuse_wait_on_page_writeback(inode, page->index);
 
if (req->num_pages &&
-   (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+   (req->num_pages == fc->max_pages ||
 

[PATCH -v2 3/6] fuse: remove cache for fuse request allocation

2012-07-19 Thread Mitsuo Hayasaka
Remove fuse_req_cachep, which was used for fuse request buffer.
It is no longer used since the buffer is allocated dynamically
due to the tunable maximum read/write request size.

Signed-off-by: Mitsuo Hayasaka 
Cc: Miklos Szeredi 
Cc: Nikolaus Rath 
Cc: Liu Yuan 
Cc: Has-Wen Nienhuys 
---

 fs/fuse/dev.c |   21 +
 1 files changed, 1 insertions(+), 20 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 511560b..4087ff4 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -23,8 +23,6 @@
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
 
-static struct kmem_cache *fuse_req_cachep;
-
 static struct fuse_conn *fuse_get_conn(struct file *file)
 {
/*
@@ -2075,27 +2073,10 @@ static struct miscdevice fuse_miscdevice = {
 
 int __init fuse_dev_init(void)
 {
-   int err = -ENOMEM;
-   fuse_req_cachep = kmem_cache_create("fuse_request",
-   sizeof(struct fuse_req),
-   0, 0, NULL);
-   if (!fuse_req_cachep)
-   goto out;
-
-   err = misc_register(_miscdevice);
-   if (err)
-   goto out_cache_clean;
-
-   return 0;
-
- out_cache_clean:
-   kmem_cache_destroy(fuse_req_cachep);
- out:
-   return err;
+   return misc_register(_miscdevice);
 }
 
 void fuse_dev_cleanup(void)
 {
misc_deregister(_miscdevice);
-   kmem_cache_destroy(fuse_req_cachep);
 }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 1/6] pipe: make the maximum pipe size referable from kernel module

2012-07-19 Thread Mitsuo Hayasaka
Make the maximum pipe size referable from a kernel module.

The /proc/sys/fs/pipe-max-size defines an upper limit for the
capacity of a pipe. It is also used as an upper limit of a
fuse read/write request size in this patch series. So, it
is necessary to make it referable from a kernel module.

Signed-off-by: Mitsuo Hayasaka 
Cc: Alexander Viro 
Cc: Andrew Morton 
Cc: Muthukumar R 
Cc: Miklos Szeredi 
---

 fs/pipe.c |7 +++
 include/linux/pipe_fs_i.h |3 +++
 2 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 49c1065..f0f3768 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -36,6 +36,13 @@ unsigned int pipe_max_size = 1048576;
  */
 unsigned int pipe_min_size = PAGE_SIZE;
 
+/* get pipe_max_size */
+unsigned int pipe_get_max_size(void)
+{
+   return pipe_max_size;
+}
+EXPORT_SYMBOL(pipe_get_max_size);
+
 /*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index e1ac1ce..50a16dd 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -134,6 +134,9 @@ struct pipe_buf_operations {
memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE  PAGE_SIZE
 
+/* get pipe_max_size */
+unsigned int pipe_get_max_size(void);
+
 /* Pipe lock and unlock operations */
 void pipe_lock(struct pipe_inode_info *);
 void pipe_unlock(struct pipe_inode_info *);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 0/6] fuse: make maximum read/write request size tunable

2012-07-19 Thread Mitsuo Hayasaka
Hi,

This patch series make maximum read/write request size tunable in FUSE.
Currently, it is limited to FUSE_MAX_PAGES_PER_REQ which is equal
to 32 pages. It is required to change it in order to improve the
throughput since optimized value depends on various factors such
as type and version of local filesystems used and HW specs, etc.

In addition, recently FUSE is widely used as a gateway to connect
cloud storage services and distributed filesystems. Larger data
might be stored in them over networking via FUSE and the overhead
might affect the throughput.

It seems there were many requests to increase FUSE_MAX_PAGES_PER_REQ
to improve the throughput, as follows.

http://sourceforge.net/mailarchive/forum.php?thread_name=4FC2F7A1.4010609%40gmail.com_name=fuse-devel

http://old.nabble.com/-Fuse-2.8--big_write-option---%3E-128kb-write-syscall-...-howto-set-higher-value-td22292589.html

http://old.nabble.com/Block-size-%3E128k-td18675772.html

These discussions mention how to change both FUSE kernel and libfuse
sources such as FUSE_MAX_PAGES_PER_REQ and MIN_BUFSIZE, but the
changed and increased values have not been default yet. We guess this
is because it will be applied to the FUSE filesystems that do not need
the increased value.

One of the ways to solve this is to make them tunable.
In this series, the new sysfs parameter max_pages_per_req is introduced.
It limits the maximum read/write size in fuse request and it can be
changed to arbitrary number between 32 pages and nr_pages equivalent to
the maximum pipe size. When the max_read/max_write mount option is
specified, FUSE request size is set per mount. (The size is rounded-up
to page size and limited up to max_pages_per_req.)

We think the sysfs parameter control is required, as follows.

- The libfuse should change the current MIN_BUFSIZE limitation according
  to this value. If not, The libfuse must always set it to the maximum
  request limit (= [nr_pages (equivalent to pipe_max_size) * 4KB + 0x1000]),
  which leads to waste of memory.

- It is easy to find and set it to the optimized value in order to
  improve the throughput.

The 32 pages are set by default and the minimum value. The upper limit
is the number of pages equivalent to the maximum pipe size that can
be changed by only privileged user. So, we can flexibly set it to the
optimized value considering the system configuration. 

Also, the patch set for libfuse to change current MIN_BUFSIZE limitation
according to the sysfs parameter will be sent soon.


* Performance example

We evaluated the performance improvement due to this patch series.
FUSE filesystems are mounted on tmpfs and we measured the read/write
throughput using 512MB random data.

The results of average read/write throughtput are shown as follows.
 - we measured 10 times throughput for read and write operations,
   and calculated their average.
 - the results in 512 and 1024 pages are measured after changing and
   increasing both the maximum pipe size via /proc/sys/fs/pipe-max-size
   and the max_pages_per_req.

** write

For without direct_io option,
# of pages   |original(32)|tuning(32)|(64) |(128)|(256)|(512)|(1024)

thruput(MB/s)|305.4   |303.9 |364.6|414.4|441.5|442.4|437.4


For with direct_io option,
# of pages   |original(32)|tuning(32)|(64) |(128)|(256)|(512)|(1024)
-
thruput(MB/s)|391.6   |387.7 |502.6|595.4|675.7|762.4|743.9


** read

For without direct_io option, there is no deference between
original 32 pages and tuning patches since the read request size
are not changed even if changing the sysfs parameter.


For with direct_io option,
# of pages   |original(32)|tuning(32)|(64) |(128)|(256)|(512)|(1024)
-
thruput(MB/s)|484.6   |485.1 |567.7|611.9|653.5|794.5|788.2


 From these evaluations, this patch series can improve the
performance with an increase of the sysfs parameter. In
particular, the read/write throughput with direct_io achieves
a high improvement. However, it is clear that the results for
1024 pages do not always lead to the highest improvement.
These are just an exmaple and the results may be changed in
different systems. Therefore, we think a tunable functionality
of read/write request size is useful.

Changed in v2:
 - add a functionality to get the maximum pipe size from kernel
   module.
 - change the upper limit of fuse request size from 256 to
   nr_pages equivalent to the maximum pipe size.
 - revise the documentation in /Documentation/filesystems/
   fuse.txt

Thanks,

---

Mitsuo Hayasaka (6):
  fuse: add documentation of sysfs parameter to limit maximum fuse request 
size
  fuse: set default global limit considering tunable request size
  fuse: add a sysfs parameter to control the maximum request size
  fuse: remove cache for fuse request

[PATCH -v2 0/6] fuse: make maximum read/write request size tunable

2012-07-19 Thread Mitsuo Hayasaka
Hi,

This patch series make maximum read/write request size tunable in FUSE.
Currently, it is limited to FUSE_MAX_PAGES_PER_REQ which is equal
to 32 pages. It is required to change it in order to improve the
throughput since optimized value depends on various factors such
as type and version of local filesystems used and HW specs, etc.

In addition, recently FUSE is widely used as a gateway to connect
cloud storage services and distributed filesystems. Larger data
might be stored in them over networking via FUSE and the overhead
might affect the throughput.

It seems there were many requests to increase FUSE_MAX_PAGES_PER_REQ
to improve the throughput, as follows.

http://sourceforge.net/mailarchive/forum.php?thread_name=4FC2F7A1.4010609%40gmail.comforum_name=fuse-devel

http://old.nabble.com/-Fuse-2.8--big_write-option---%3E-128kb-write-syscall-...-howto-set-higher-value-td22292589.html

http://old.nabble.com/Block-size-%3E128k-td18675772.html

These discussions mention how to change both FUSE kernel and libfuse
sources such as FUSE_MAX_PAGES_PER_REQ and MIN_BUFSIZE, but the
changed and increased values have not been default yet. We guess this
is because it will be applied to the FUSE filesystems that do not need
the increased value.

One of the ways to solve this is to make them tunable.
In this series, the new sysfs parameter max_pages_per_req is introduced.
It limits the maximum read/write size in fuse request and it can be
changed to arbitrary number between 32 pages and nr_pages equivalent to
the maximum pipe size. When the max_read/max_write mount option is
specified, FUSE request size is set per mount. (The size is rounded-up
to page size and limited up to max_pages_per_req.)

We think the sysfs parameter control is required, as follows.

- The libfuse should change the current MIN_BUFSIZE limitation according
  to this value. If not, The libfuse must always set it to the maximum
  request limit (= [nr_pages (equivalent to pipe_max_size) * 4KB + 0x1000]),
  which leads to waste of memory.

- It is easy to find and set it to the optimized value in order to
  improve the throughput.

The 32 pages are set by default and the minimum value. The upper limit
is the number of pages equivalent to the maximum pipe size that can
be changed by only privileged user. So, we can flexibly set it to the
optimized value considering the system configuration. 

Also, the patch set for libfuse to change current MIN_BUFSIZE limitation
according to the sysfs parameter will be sent soon.


* Performance example

We evaluated the performance improvement due to this patch series.
FUSE filesystems are mounted on tmpfs and we measured the read/write
throughput using 512MB random data.

The results of average read/write throughtput are shown as follows.
 - we measured 10 times throughput for read and write operations,
   and calculated their average.
 - the results in 512 and 1024 pages are measured after changing and
   increasing both the maximum pipe size via /proc/sys/fs/pipe-max-size
   and the max_pages_per_req.

** write

For without direct_io option,
# of pages   |original(32)|tuning(32)|(64) |(128)|(256)|(512)|(1024)

thruput(MB/s)|305.4   |303.9 |364.6|414.4|441.5|442.4|437.4


For with direct_io option,
# of pages   |original(32)|tuning(32)|(64) |(128)|(256)|(512)|(1024)
-
thruput(MB/s)|391.6   |387.7 |502.6|595.4|675.7|762.4|743.9


** read

For without direct_io option, there is no deference between
original 32 pages and tuning patches since the read request size
are not changed even if changing the sysfs parameter.


For with direct_io option,
# of pages   |original(32)|tuning(32)|(64) |(128)|(256)|(512)|(1024)
-
thruput(MB/s)|484.6   |485.1 |567.7|611.9|653.5|794.5|788.2


 From these evaluations, this patch series can improve the
performance with an increase of the sysfs parameter. In
particular, the read/write throughput with direct_io achieves
a high improvement. However, it is clear that the results for
1024 pages do not always lead to the highest improvement.
These are just an exmaple and the results may be changed in
different systems. Therefore, we think a tunable functionality
of read/write request size is useful.

Changed in v2:
 - add a functionality to get the maximum pipe size from kernel
   module.
 - change the upper limit of fuse request size from 256 to
   nr_pages equivalent to the maximum pipe size.
 - revise the documentation in /Documentation/filesystems/
   fuse.txt

Thanks,

---

Mitsuo Hayasaka (6):
  fuse: add documentation of sysfs parameter to limit maximum fuse request 
size
  fuse: set default global limit considering tunable request size
  fuse: add a sysfs parameter to control the maximum request size
  fuse: remove cache for fuse request

[PATCH -v2 1/6] pipe: make the maximum pipe size referable from kernel module

2012-07-19 Thread Mitsuo Hayasaka
Make the maximum pipe size referable from a kernel module.

The /proc/sys/fs/pipe-max-size defines an upper limit for the
capacity of a pipe. It is also used as an upper limit of a
fuse read/write request size in this patch series. So, it
is necessary to make it referable from a kernel module.

Signed-off-by: Mitsuo Hayasaka mitsuo.hayasaka...@hitachi.com
Cc: Alexander Viro v...@zeniv.linux.org.uk
Cc: Andrew Morton a...@linux-foundation.org
Cc: Muthukumar R mut...@gmail.com
Cc: Miklos Szeredi mik...@szeredi.hu
---

 fs/pipe.c |7 +++
 include/linux/pipe_fs_i.h |3 +++
 2 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 49c1065..f0f3768 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -36,6 +36,13 @@ unsigned int pipe_max_size = 1048576;
  */
 unsigned int pipe_min_size = PAGE_SIZE;
 
+/* get pipe_max_size */
+unsigned int pipe_get_max_size(void)
+{
+   return pipe_max_size;
+}
+EXPORT_SYMBOL(pipe_get_max_size);
+
 /*
  * We use a start+len construction, which provides full use of the 
  * allocated memory.
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index e1ac1ce..50a16dd 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -134,6 +134,9 @@ struct pipe_buf_operations {
memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE  PAGE_SIZE
 
+/* get pipe_max_size */
+unsigned int pipe_get_max_size(void);
+
 /* Pipe lock and unlock operations */
 void pipe_lock(struct pipe_inode_info *);
 void pipe_unlock(struct pipe_inode_info *);

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 3/6] fuse: remove cache for fuse request allocation

2012-07-19 Thread Mitsuo Hayasaka
Remove fuse_req_cachep, which was used for fuse request buffer.
It is no longer used since the buffer is allocated dynamically
due to the tunable maximum read/write request size.

Signed-off-by: Mitsuo Hayasaka mitsuo.hayasaka...@hitachi.com
Cc: Miklos Szeredi mik...@szeredi.hu
Cc: Nikolaus Rath nikol...@rath.org
Cc: Liu Yuan namei.u...@gmail.com
Cc: Has-Wen Nienhuys han...@xs4all.nl
---

 fs/fuse/dev.c |   21 +
 1 files changed, 1 insertions(+), 20 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 511560b..4087ff4 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -23,8 +23,6 @@
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS(devname:fuse);
 
-static struct kmem_cache *fuse_req_cachep;
-
 static struct fuse_conn *fuse_get_conn(struct file *file)
 {
/*
@@ -2075,27 +2073,10 @@ static struct miscdevice fuse_miscdevice = {
 
 int __init fuse_dev_init(void)
 {
-   int err = -ENOMEM;
-   fuse_req_cachep = kmem_cache_create(fuse_request,
-   sizeof(struct fuse_req),
-   0, 0, NULL);
-   if (!fuse_req_cachep)
-   goto out;
-
-   err = misc_register(fuse_miscdevice);
-   if (err)
-   goto out_cache_clean;
-
-   return 0;
-
- out_cache_clean:
-   kmem_cache_destroy(fuse_req_cachep);
- out:
-   return err;
+   return misc_register(fuse_miscdevice);
 }
 
 void fuse_dev_cleanup(void)
 {
misc_deregister(fuse_miscdevice);
-   kmem_cache_destroy(fuse_req_cachep);
 }

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 2/6] fuse: make the maximum read/write request size tunable

2012-07-19 Thread Mitsuo Hayasaka
Make the maximum read/write request size tunable between
32 pages and the number of pages equivalent to pipe_max_size.
The max_read/max_write mount options affect the size. The
32 pages are used by default without these options.

Currently, the maximum read/write request size is limited to
FUSE_MAX_PAGES_PER_REQ which is equal to 32 pages. It is
required to change it in order to maximize the throughput
since the optimized value depends on various factors such as
type and version of local filesystems used and hardware specs,
etc.

In addition, recently FUSE is widely used as a gateway to
connect cloud storage services and distributed filesystems.
Larger data might be stored in them over networking via FUSE
and the overhead might affect the read/write throughput.

So, a tunable functionality of read/write request size is
useful.

Signed-off-by: Mitsuo Hayasaka mitsuo.hayasaka...@hitachi.com
Cc: Miklos Szeredi mik...@szeredi.hu
Cc: Nikolaus Rath nikol...@rath.org
Cc: Liu Yuan namei.u...@gmail.com
Cc: Has-Wen Nienhuys han...@xs4all.nl
---

 fs/fuse/dev.c|   27 ++-
 fs/fuse/file.c   |   32 +---
 fs/fuse/fuse_i.h |   27 +--
 fs/fuse/inode.c  |   42 +++---
 4 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7df2b5e..511560b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -34,35 +34,36 @@ static struct fuse_conn *fuse_get_conn(struct file *file)
return file-private_data;
 }
 
-static void fuse_request_init(struct fuse_req *req)
+static void fuse_request_init(struct fuse_conn *fc, struct fuse_req *req)
 {
-   memset(req, 0, sizeof(*req));
+   memset(req, 0, fc-fuse_req_size);
INIT_LIST_HEAD(req-list);
INIT_LIST_HEAD(req-intr_entry);
init_waitqueue_head(req-waitq);
atomic_set(req-count, 1);
 }
 
-struct fuse_req *fuse_request_alloc(void)
+struct fuse_req *fuse_request_alloc(struct fuse_conn *fc)
 {
-   struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
+   struct fuse_req *req = kmalloc(fc-fuse_req_size, GFP_KERNEL);
+
if (req)
-   fuse_request_init(req);
+   fuse_request_init(fc, req);
return req;
 }
 EXPORT_SYMBOL_GPL(fuse_request_alloc);
 
-struct fuse_req *fuse_request_alloc_nofs(void)
+struct fuse_req *fuse_request_alloc_nofs(struct fuse_conn *fc)
 {
-   struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
+   struct fuse_req *req = kmalloc(fc-fuse_req_size, GFP_NOFS);
if (req)
-   fuse_request_init(req);
+   fuse_request_init(fc, req);
return req;
 }
 
 void fuse_request_free(struct fuse_req *req)
 {
-   kmem_cache_free(fuse_req_cachep, req);
+   kfree(req);
 }
 
 static void block_sigs(sigset_t *oldset)
@@ -116,7 +117,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
if (!fc-connected)
goto out;
 
-   req = fuse_request_alloc();
+   req = fuse_request_alloc(fc);
err = -ENOMEM;
if (!req)
goto out;
@@ -166,7 +167,7 @@ static void put_reserved_req(struct fuse_conn *fc, struct 
fuse_req *req)
struct fuse_file *ff = file-private_data;
 
spin_lock(fc-lock);
-   fuse_request_init(req);
+   fuse_request_init(fc, req);
BUG_ON(ff-reserved_req);
ff-reserved_req = req;
wake_up_all(fc-reserved_req_waitq);
@@ -193,7 +194,7 @@ struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, 
struct file *file)
 
atomic_inc(fc-num_waiting);
wait_event(fc-blocked_waitq, !fc-blocked);
-   req = fuse_request_alloc();
+   req = fuse_request_alloc(fc);
if (!req)
req = get_reserved_req(fc, file);
 
@@ -1564,7 +1565,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct 
inode *inode,
else if (outarg-offset + num  file_size)
num = file_size - outarg-offset;
 
-   while (num  req-num_pages  FUSE_MAX_PAGES_PER_REQ) {
+   while (num  req-num_pages  fc-max_pages) {
struct page *page;
unsigned int this_num;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b321a68..7b96b00 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -57,7 +57,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
return NULL;
 
ff-fc = fc;
-   ff-reserved_req = fuse_request_alloc();
+   ff-reserved_req = fuse_request_alloc(fc);
if (unlikely(!ff-reserved_req)) {
kfree(ff);
return NULL;
@@ -653,7 +653,7 @@ static int fuse_readpages_fill(void *_data, struct page 
*page)
fuse_wait_on_page_writeback(inode, page-index);
 
if (req-num_pages 
-   (req-num_pages == FUSE_MAX_PAGES_PER_REQ ||
+   (req-num_pages == fc-max_pages ||
 (req-num_pages + 1) * PAGE_CACHE_SIZE  fc

[PATCH -v2 4/6] fuse: add a sysfs parameter to control the maximum request size

2012-07-19 Thread Mitsuo Hayasaka
Add a max_pages_per_req sysfs paramater to limit the maximum
read/write request size. It can be changed to arbitrary number
between 32 and the nr_pages equivalent to pipe_max_size, and the
32 pages are set by default.

The sysfs parameter control is required, as follows.

* The libfuse should change the current MIN_BUFSIZE limitation
  according to the current maximum request size in FUSE. If not,
  the libfuse must always set MIN_BUFSIZE to the maximum request
  limit (= [nr_pages (equivalent to pipe_max_size) * 4KB + 0x1000]),
  which leads to waste of memory.

* It is easy to find and set the paramter to the optimized value
  in order to improve the read/write throughput, since the
  maximum request limit does not always provides the highest
  throughput.

So, it is necessary to get and set the maximum size from userspace.

Existing FUSE mounts must be remounted for this change to take
effect.

Signed-off-by: Mitsuo Hayasaka mitsuo.hayasaka...@hitachi.com
Cc: Miklos Szeredi mik...@szeredi.hu
Cc: Nikolaus Rath nikol...@rath.org
Cc: Liu Yuan namei.u...@gmail.com
Cc: Has-Wen Nienhuys han...@xs4all.nl
---

 fs/fuse/inode.c |   58 +++
 1 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f7f3c5d..5f84a40 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -47,6 +47,14 @@ MODULE_PARM_DESC(max_user_congthresh,
  Global limit for the maximum congestion threshold an 
  unprivileged user can set);
 
+/**
+ * Maximum number of pages allocated for struct fuse_req.
+ * It can be changed via sysfs to arbitrary number between
+ * FUSE_DEFAULT_MAX_PAGES_PER_REQ and nr_pages equivalent
+ * to pipe_max_size.
+ */
+static unsigned sysfs_max_req_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
+
 #define FUSE_SUPER_MAGIC 0x65735546
 
 #define FUSE_DEFAULT_BLKSIZE 512
@@ -779,11 +787,8 @@ static int set_global_limit(const char *val, struct 
kernel_param *kp)
 
 static void set_conn_max_pages(struct fuse_conn *fc, unsigned max_pages)
 {
-   unsigned pipe_max_size = pipe_get_max_size();
-   unsigned pipe_max_pages = DIV_ROUND_UP(pipe_max_size, PAGE_SIZE);
-
if (max_pages  fc-max_pages) {
-   fc-max_pages = min_t(unsigned, pipe_max_pages, max_pages);
+   fc-max_pages = min_t(unsigned, sysfs_max_req_pages, max_pages);
fc-fuse_req_size = sizeof(struct fuse_req) +
fc-max_pages * sizeof(struct page *);
}
@@ -1205,6 +1210,45 @@ static void fuse_fs_cleanup(void)
 static struct kobject *fuse_kobj;
 static struct kobject *connections_kobj;
 
+static ssize_t max_req_pages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+   return sprintf(buf, %u\n, sysfs_max_req_pages);
+}
+
+static ssize_t max_req_pages_store(struct kobject *kobj,
+  struct kobj_attribute *attr,
+  const char *buf, size_t count)
+{
+   int err;
+   unsigned long t;
+   unsigned pipe_max_size = pipe_get_max_size();
+   unsigned pipe_max_pages = DIV_ROUND_UP(pipe_max_size, PAGE_SIZE);
+
+   err = kstrtoul(skip_spaces(buf), 0, t);
+   if (err)
+   return err;
+
+   t = max_t(unsigned long, t, FUSE_DEFAULT_MAX_PAGES_PER_REQ);
+   t = min_t(unsigned long, t, pipe_max_pages);
+
+   sysfs_max_req_pages = t;
+   return count;
+}
+
+static struct kobj_attribute max_req_pages_attr =
+   __ATTR(max_pages_per_req, 0644, max_req_pages_show,
+  max_req_pages_store);
+
+static struct attribute *fuse_attrs[] = {
+   max_req_pages_attr.attr,
+   NULL,
+};
+
+static struct attribute_group fuse_attr_grp = {
+   .attrs = fuse_attrs,
+};
+
 static int fuse_sysfs_init(void)
 {
int err;
@@ -1221,8 +1265,14 @@ static int fuse_sysfs_init(void)
goto out_fuse_unregister;
}
 
+   err = sysfs_create_group(fuse_kobj, fuse_attr_grp);
+   if (err)
+   goto out_conn_unregister;
+
return 0;
 
+ out_conn_unregister:
+   kobject_put(connections_kobj);
  out_fuse_unregister:
kobject_put(fuse_kobj);
  out_err:

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 6/6] fuse: add documentation of sysfs parameter to limit maximum fuse request size

2012-07-19 Thread Mitsuo Hayasaka
Add an explanation about the sysfs parameter to limit the
maximum read/write request size.

Signed-off-by: Mitsuo Hayasaka mitsuo.hayasaka...@hitachi.com
Cc: Rob Landley r...@landley.net
Cc: Miklos Szeredi mik...@szeredi.hu
Cc: Nikolaus Rath nikol...@rath.org
Cc: Liu Yuan namei.u...@gmail.com
Cc: Has-Wen Nienhuys han...@xs4all.nl
---

 Documentation/filesystems/fuse.txt |   15 ++-
 1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/Documentation/filesystems/fuse.txt 
b/Documentation/filesystems/fuse.txt
index 13af4a4..4e706ec 100644
--- a/Documentation/filesystems/fuse.txt
+++ b/Documentation/filesystems/fuse.txt
@@ -108,13 +108,26 @@ Mount options
 
   With this option the maximum size of read operations can be set.
   The default is infinite.  Note that the size of read requests is
-  limited anyway to 32 pages (which is 128kbyte on i386).
+  limited by max_pages_per_req sysfs parameter (See below for details.)
 
 'blksize=N'
 
   Set the block size for the filesystem.  The default is 512.  This
   option is only valid for 'fuseblk' type mounts.
 
+Sysfs parameter
+~~~
+
+  '/sys/fs/fuse/max_pages_per_req'
+
+Specify max request size in pages, which limits max_read/max_write
+mount option. The default is 32 pages (which is 128kbyte on i386).
+It can be changed to arbitrary number between 32 and the number of
+pages equivalent to pipe_max_size.
+
+Changing it may improve read/write throughput on systems. Existing
+FUSE mount must be remounted for this change to take effect.
+
 Control filesystem
 ~~
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH -v2 5/6] fuse: set default global limit considering tunable request size

2012-07-19 Thread Mitsuo Hayasaka
Set default global limits for backgrounded requests and congestion
threshold considering the tunable maximum request size.

They are calculated using size of fuse_req structure, which is
variable due to it. This patch sets them according to the current
request size unless they are set via mod_param by the system
administrator.

Signed-off-by: Mitsuo Hayasaka mitsuo.hayasaka...@hitachi.com
Cc: Miklos Szeredi mik...@szeredi.hu
Cc: Nikolaus Rath nikol...@rath.org
Cc: Liu Yuan namei.u...@gmail.com
Cc: Has-Wen Nienhuys han...@xs4all.nl
---

 fs/fuse/fuse_i.h |4 +++
 fs/fuse/inode.c  |   62 --
 2 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 46df615..2dda6eb 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -27,6 +27,10 @@
 /** Default number of pages that can be used in a single read/write request */
 #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
 
+/** Maximum size of struct fuse_req */
+#define FUSE_CURRENT_REQ_SIZE (sizeof(struct fuse_req) +\
+  sysfs_max_req_pages * sizeof(struct page *))
+
 /** Bias for fi-writectr, meaning new writepages must not be sent */
 #define FUSE_NOWRITE INT_MIN
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 5f84a40..dc0302f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -29,25 +29,36 @@ static struct kmem_cache *fuse_inode_cachep;
 struct list_head fuse_conn_list;
 DEFINE_MUTEX(fuse_mutex);
 
-static int set_global_limit(const char *val, struct kernel_param *kp);
+static int set_global_limit_bgreq(const char *val, struct kernel_param *kp);
+static int set_global_limit_thresh(const char *val, struct kernel_param *kp);
 
 unsigned max_user_bgreq;
-module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
- max_user_bgreq, 0644);
+module_param_call(max_user_bgreq, set_global_limit_bgreq,
+ param_get_uint, max_user_bgreq, 0644);
 __MODULE_PARM_TYPE(max_user_bgreq, uint);
 MODULE_PARM_DESC(max_user_bgreq,
  Global limit for the maximum number of backgrounded requests an 
  unprivileged user can set);
 
 unsigned max_user_congthresh;
-module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
- max_user_congthresh, 0644);
+module_param_call(max_user_congthresh, set_global_limit_thresh,
+ param_get_uint, max_user_congthresh, 0644);
 __MODULE_PARM_TYPE(max_user_congthresh, uint);
 MODULE_PARM_DESC(max_user_congthresh,
  Global limit for the maximum congestion threshold an 
  unprivileged user can set);
 
 /**
+ * The flags below are used in order to distinguish how to set
+ * max_user_bgreq and max_user_congthresh, respectively. They
+ * should be used if they are set via mod_param. If not, we should
+ * check their current limitation using check_global_limit() any
+ * time due to the tunable read/write request size.
+ */
+static bool mod_param_set_flg_bgreq;
+static bool mod_param_set_flg_thresh;
+
+/**
  * Maximum number of pages allocated for struct fuse_req.
  * It can be changed via sysfs to arbitrary number between
  * FUSE_DEFAULT_MAX_PAGES_PER_REQ and nr_pages equivalent
@@ -766,13 +777,39 @@ static void sanitize_global_limit(unsigned *limit)
 {
if (*limit == 0)
*limit = ((num_physpages  PAGE_SHIFT)  13) /
-sizeof(struct fuse_req);
+FUSE_CURRENT_REQ_SIZE;
 
if (*limit = 1  16)
*limit = (1  16) - 1;
 }
 
-static int set_global_limit(const char *val, struct kernel_param *kp)
+static void check_global_limit(unsigned *limit, bool mod_param_flg)
+{
+   if (!mod_param_flg) {
+   unsigned cur_global_limit = 0;
+
+   sanitize_global_limit(cur_global_limit);
+   *limit = cur_global_limit;
+   }
+}
+
+static int set_global_limit_bgreq(const char *val, struct kernel_param *kp)
+{
+   int rv;
+
+   rv = param_set_uint(val, kp);
+   if (rv)
+   return rv;
+
+   sanitize_global_limit((unsigned *)kp-arg);
+
+   /* max_user_bgreq is set via mod_param */
+   mod_param_set_flg_bgreq = true;
+
+   return 0;
+}
+
+static int set_global_limit_thresh(const char *val, struct kernel_param *kp)
 {
int rv;
 
@@ -782,6 +819,9 @@ static int set_global_limit(const char *val, struct 
kernel_param *kp)
 
sanitize_global_limit((unsigned *)kp-arg);
 
+   /* max_user_congthresh is set via mod_param */
+   mod_param_set_flg_thresh = true;
+
return 0;
 }
 
@@ -801,8 +841,8 @@ static void process_init_limits(struct fuse_conn *fc, 
struct fuse_init_out *arg)
if (arg-minor  13)
return;
 
-   sanitize_global_limit(max_user_bgreq);
-   sanitize_global_limit(max_user_congthresh);
+   check_global_limit(max_user_bgreq, mod_param_set_flg_bgreq);
+   check_global_limit(max_user_congthresh, mod_param_set_flg_thresh