from:"Steven Whitehouse"

Re: [ 64/66] GFS2: Test bufdata with buffer locked and gfs2_log_lock held

2012-11-26 Thread Steven Whitehouse

Hi,

On Sun, 2012-11-25 at 14:11 +0100, Ben Hutchings wrote:
> On Wed, 2012-11-14 at 20:11 -0800, Greg Kroah-Hartman wrote:
> > 3.6-stable review patch.  If anyone has any objections, please let me know.
> > 
> > --
> > 
> > From: Benjamin Marzinski 
> > 
> > commit 96e5d1d3adf56f1c7eeb07258f6a1a0a7ae9c489 upstream.
> > 
> > In gfs2_trans_add_bh(), gfs2 was testing if a there was a bd attached to the
> > buffer without having the gfs2_log_lock held. It was then assuming it would
> > stay attached for the rest of the function. However, without either the log
> > lock being held of the buffer locked, __gfs2_ail_flush() could detach bd at 
> > any
> > time.  This patch moves the locking before the test.  If there isn't a bd
> > already attached, gfs2 can safely allocate one and attach it before locking.
> > There is no way that the newly allocated bd could be on the ail list,
> > and thus no way for __gfs2_ail_flush() to detach it.
> > 
> > Signed-off-by: Benjamin Marzinski 
> > Signed-off-by: Steven Whitehouse 
> > Signed-off-by: Greg Kroah-Hartman 
> [...]
> 
> Is this needed for any earlier versions?  It looks applicable to 3.2
> (with minor changes).
> 
> Ben.
> 

Potentially yes, although I don't think we've had any reports from that
far back,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 01/16] GFS2: Review bug traps in glops.c

2012-11-30 Thread Steven Whitehouse

Two of the bug traps here could really be warnings. The others are
converted from BUG() to GLOCK_BUG_ON() since we'll most likely
need to know the glock state in order to debug any issues which
arise. As a result of this, __dump_glock has to be renamed and
is no longer static.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e6c2fd5..e543871 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -55,8 +55,6 @@ struct gfs2_glock_iter {
 
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
 
-static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
-#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); 
BUG(); } } while(0)
 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned 
int target);
 
 static struct dentry *gfs2_root;
@@ -1013,7 +1011,7 @@ trap_recursive:
printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid));
printk(KERN_ERR "lock type: %d req lock state : %d\n",
   gh->gh_gl->gl_name.ln_type, gh->gh_state);
-   __dump_glock(NULL, gl);
+   gfs2_dump_glock(NULL, gl);
BUG();
 }
 
@@ -1508,7 +1506,7 @@ static int dump_glock(struct seq_file *seq, struct 
gfs2_glock *gl)
 {
int ret;
spin_lock(&gl->gl_spin);
-   ret = __dump_glock(seq, gl);
+   ret = gfs2_dump_glock(seq, gl);
spin_unlock(&gl->gl_spin);
return ret;
 }
@@ -1655,7 +1653,7 @@ static const char *gflags2str(char *buf, const struct 
gfs2_glock *gl)
 }
 
 /**
- * __dump_glock - print information about a glock
+ * gfs2_dump_glock - print information about a glock
  * @seq: The seq_file struct
  * @gl: the glock
  *
@@ -1672,7 +1670,7 @@ static const char *gflags2str(char *buf, const struct 
gfs2_glock *gl)
  * Returns: 0 on success, -ENOBUFS when we run out of space
  */
 
-static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
+int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
 {
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned long long dtime;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 307ac31..fd580b7 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -178,33 +178,33 @@ static inline struct address_space 
*gfs2_glock2aspace(struct gfs2_glock *gl)
return NULL;
 }
 
-int gfs2_glock_get(struct gfs2_sbd *sdp,
-  u64 number, const struct gfs2_glock_operations *glops,
-  int create, struct gfs2_glock **glp);
-void gfs2_glock_hold(struct gfs2_glock *gl);
-void gfs2_glock_put_nolock(struct gfs2_glock *gl);
-void gfs2_glock_put(struct gfs2_glock *gl);
-void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned 
flags,
- struct gfs2_holder *gh);
-void gfs2_holder_reinit(unsigned int state, unsigned flags,
-   struct gfs2_holder *gh);
-void gfs2_holder_uninit(struct gfs2_holder *gh);
-int gfs2_glock_nq(struct gfs2_holder *gh);
-int gfs2_glock_poll(struct gfs2_holder *gh);
-int gfs2_glock_wait(struct gfs2_holder *gh);
-void gfs2_glock_dq(struct gfs2_holder *gh);
-void gfs2_glock_dq_wait(struct gfs2_holder *gh);
-
-void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
-int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
- u64 number, const struct gfs2_glock_operations *glops,
- unsigned int state, int flags, struct gfs2_holder *gh);
-
-int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
-void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
-
-__printf(2, 3)
+extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
+ const struct gfs2_glock_operations *glops,
+ int create, struct gfs2_glock **glp);
+extern void gfs2_glock_hold(struct gfs2_glock *gl);
+extern void gfs2_glock_put_nolock(struct gfs2_glock *gl);
+extern void gfs2_glock_put(struct gfs2_glock *gl);
+extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state,
+unsigned flags, struct gfs2_holder *gh);
+extern void gfs2_holder_reinit(unsigned int state, unsigned flags,
+  struct gfs2_holder *gh);
+extern void gfs2_holder_uninit(struct gfs2_holder *gh);
+extern int gfs2_glock_nq(struct gfs2_holder *gh);
+extern int gfs2_glock_poll(struct gfs2_holder *gh);
+extern int gfs2_glock_wait(struct gfs2_holder *gh);
+extern void gfs2_glock_dq(struct gfs2_holder *gh);
+extern void gfs2_glock_dq_wait(struct gfs2_holder *gh);
+extern void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
+extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
+const struct gfs2_glock_operations *glops,
+unsigned int state, int flags,
+struct gfs2_holder *gh);
+extern int g

[PATCH 03/16] GFS2: Rename glops go_xmote_th to go_sync

2012-11-30 Thread Steven Whitehouse

From: Bob Peterson 

[Editorial: This is a nit, but has been a minor irritation for a long time:]

This patch renames glops structure item for go_xmote_th to go_sync.
The functionality is unchanged; it's just for readability.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e543871..6114571 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -535,8 +535,8 @@ __acquires(&gl->gl_spin)
(lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
clear_bit(GLF_BLOCKING, &gl->gl_flags);
spin_unlock(&gl->gl_spin);
-   if (glops->go_xmote_th)
-   glops->go_xmote_th(gl);
+   if (glops->go_sync)
+   glops->go_sync(gl);
if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : 
DIO_METADATA);
clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 0a3e7c7..e86fe26 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -536,7 +536,7 @@ const struct gfs2_glock_operations gfs2_meta_glops = {
 };
 
 const struct gfs2_glock_operations gfs2_inode_glops = {
-   .go_xmote_th = inode_go_sync,
+   .go_sync = inode_go_sync,
.go_inval = inode_go_inval,
.go_demote_ok = inode_go_demote_ok,
.go_lock = inode_go_lock,
@@ -546,7 +546,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
 };
 
 const struct gfs2_glock_operations gfs2_rgrp_glops = {
-   .go_xmote_th = rgrp_go_sync,
+   .go_sync = rgrp_go_sync,
.go_inval = rgrp_go_inval,
.go_lock = gfs2_rgrp_go_lock,
.go_unlock = gfs2_rgrp_go_unlock,
@@ -556,7 +556,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
 };
 
 const struct gfs2_glock_operations gfs2_trans_glops = {
-   .go_xmote_th = trans_go_sync,
+   .go_sync = trans_go_sync,
.go_xmote_bh = trans_go_xmote_bh,
.go_demote_ok = trans_go_demote_ok,
.go_type = LM_TYPE_NONDISK,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 24bb0b8..a46f034 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -205,7 +205,7 @@ struct lm_lockname {
 
 
 struct gfs2_glock_operations {
-   void (*go_xmote_th) (struct gfs2_glock *gl);
+   void (*go_sync) (struct gfs2_glock *gl);
int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
void (*go_inval) (struct gfs2_glock *gl, int flags);
int (*go_demote_ok) (const struct gfs2_glock *gl);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 12/16] GFS2: only use lvb on glocks that need it

2012-11-30 Thread Steven Whitehouse

From: David Teigland 

Save the effort of allocating, reading and writing
the lvb for most glocks that do not use it.

Signed-off-by: David Teigland 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9d29a51..2284de4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -105,10 +105,12 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
 {
struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
 
-   if (gl->gl_ops->go_flags & GLOF_ASPACE)
+   if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
-   else
+   } else {
+   kfree(gl->gl_lvb);
kmem_cache_free(gfs2_glock_cachep, gl);
+   }
 }
 
 void gfs2_glock_free(struct gfs2_glock *gl)
@@ -545,7 +547,10 @@ __acquires(&gl->gl_spin)
if (sdp->sd_lockstruct.ls_ops->lm_lock) {
/* lock_dlm */
ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
-   GLOCK_BUG_ON(gl, ret);
+   if (ret) {
+   printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret);
+   GLOCK_BUG_ON(gl, 1);
+   }
} else { /* lock_nolock */
finish_xmote(gl, target);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
@@ -734,6 +739,18 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
if (!gl)
return -ENOMEM;
 
+   memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
+   gl->gl_lvb = NULL;
+
+   if (glops->go_flags & GLOF_LVB) {
+   gl->gl_lvb = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL);
+   if (!gl->gl_lvb) {
+   kmem_cache_free(cachep, gl);
+   return -ENOMEM;
+   }
+   gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
+   }
+
atomic_inc(&sdp->sd_glock_disposal);
gl->gl_sbd = sdp;
gl->gl_flags = 0;
@@ -751,9 +768,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
preempt_enable();
gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
-   memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
-   memset(gl->gl_lvb, 0, 32 * sizeof(char));
-   gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
gl->gl_tchange = jiffies;
gl->gl_object = NULL;
gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
@@ -775,6 +789,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
tmp = search_bucket(hash, sdp, &name);
if (tmp) {
spin_unlock_bucket(hash);
+   kfree(gl->gl_lvb);
kmem_cache_free(cachep, gl);
atomic_dec(&sdp->sd_glock_disposal);
gl = tmp;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index e86fe26..78d4184 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -552,7 +552,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_unlock = gfs2_rgrp_go_unlock,
.go_dump = gfs2_rgrp_dump,
.go_type = LM_TYPE_RGRP,
-   .go_flags = GLOF_ASPACE,
+   .go_flags = GLOF_ASPACE | GLOF_LVB,
 };
 
 const struct gfs2_glock_operations gfs2_trans_glops = {
@@ -577,6 +577,7 @@ const struct gfs2_glock_operations gfs2_nondisk_glops = {
 
 const struct gfs2_glock_operations gfs2_quota_glops = {
.go_type = LM_TYPE_QUOTA,
+   .go_flags = GLOF_LVB,
 };
 
 const struct gfs2_glock_operations gfs2_journal_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a35ef5c..bd577fc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -216,6 +216,7 @@ struct gfs2_glock_operations {
const int go_type;
const unsigned long go_flags;
 #define GLOF_ASPACE 1
+#define GLOF_LVB2
 };
 
 enum {
@@ -321,7 +322,7 @@ struct gfs2_glock {
ktime_t gl_dstamp;
struct gfs2_lkstats gl_stats;
struct dlm_lksb gl_lksb;
-   char gl_lvb[32];
+   char *gl_lvb;
unsigned long gl_tchange;
void *gl_object;
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index f6504d3..d28ae37 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -120,7 +120,7 @@ static void gdlm_ast(void *arg)
gfs2_update_reply_times(gl);
BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
 
-   if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID)
+   if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID && gl->gl_lvb)
memset(gl->gl_lvb, 0, GDLM_LVB_SIZE);
 
switch (gl->gl_lksb.sb_status) {
@@ -203,8 +203,10 @@ static int make_mode(const unsigned int lmstate)
 static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
  const int req)
 {
-   u32 lkf = DLM_LKF_VALBLK;
-   u32 lkid = gl->gl_l

[PATCH 15/16] GFS2: add error check while allocating new inodes

2012-11-30 Thread Steven Whitehouse

From: Bob Peterson 

This patch adds a return code check after attempting to allocate
a new inode during dinode creation.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e321333..2405695 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -674,6 +674,10 @@ static int gfs2_create_inode(struct inode *dir, struct 
dentry *dentry,
goto fail_gunlock;
 
inode = new_inode(sdp->sd_vfs);
+   if (!inode) {
+   gfs2_glock_dq_uninit(ghs);
+   return -ENOMEM;
+   }
ip = GFS2_I(inode);
error = gfs2_rs_alloc(ip);
if (error)
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 14/16] GFS2: don't reference inode's glock during block allocation trace

2012-11-30 Thread Steven Whitehouse

From: Bob Peterson 

This patch changes the block allocation trace so that it references
the rgd's glock rather than the inode's glock. Now that the order
of inode creation is switched, this prevents a reference to the
glock which may not be set yet.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index bbdc78a..2ee13e8 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -486,7 +486,7 @@ TRACE_EVENT(gfs2_block_alloc,
),
 
TP_fast_assign(
-   __entry->dev= ip->i_gl->gl_sbd->sd_vfs->s_dev;
+   __entry->dev= rgd->rd_gl->gl_sbd->sd_vfs->s_dev;
__entry->start  = block;
__entry->inum   = ip->i_no_addr;
__entry->len= len;
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 16/16] GFS2: Set gl_object during inode create

2012-11-30 Thread Steven Whitehouse

From: Bob Peterson 

This patch fixes a cluster coherency problem that occurs when one
node creates a file, does several writes, then a different node
tries to write to the same file. When the inode's glock is demoted,
the inode wasn't synced to the media properly because the gl_object
wasn't set. Later, the flush daemon noticed the uncommitted data
and tried to flush it, only to discover the glock was no longer locked
properly in exclusive mode. That caused an assert withdraw.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2405695..2b6f569 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -702,6 +702,7 @@ static int gfs2_create_inode(struct inode *dir, struct 
dentry *dentry,
if (error)
goto fail_free_inode;
 
+   ip->i_gl->gl_object = ip;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
if (error)
goto fail_free_inode;
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 11/16] GFS2: skip dlm_unlock calls in unmount

2012-11-30 Thread Steven Whitehouse

From: David Teigland 

When unmounting, gfs2 does a full dlm_unlock operation on every
cached lock.  This can create a very large amount of work and can
take a long time to complete.  However, the vast majority of these
dlm unlock operations are unnecessary because after all the unlocks
are done, gfs2 leaves the dlm lockspace, which automatically clears
the locks of the leaving node, without unlocking each one individually.
So, gfs2 can skip explicit dlm unlocks, and use dlm_release_lockspace to
remove the locks implicitly.  The one exception is when the lock's lvb is
being used.  In this case, dlm_unlock is called because it may update the
lvb of the resource.

Signed-off-by: David Teigland 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6114571..9d29a51 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1526,6 +1526,7 @@ static void dump_glock_func(struct gfs2_glock *gl)
 
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 {
+   set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
glock_hash_walk(clear_glock, sdp);
flush_workqueue(glock_workqueue);
wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 
0);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a46f034..a35ef5c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -539,6 +539,7 @@ enum {
SDF_DEMOTE  = 5,
SDF_NOJOURNALID = 6,
SDF_RORECOVERY  = 7, /* read only recovery */
+   SDF_SKIP_DLM_UNLOCK = 8,
 };
 
 #define GFS2_FSNAME_LEN256
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 0fb6539..f6504d3 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -289,6 +289,14 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
+
+   /* don't want to skip dlm_unlock writing the lvb when lock is ex */
+   if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
+   gl->gl_state != LM_ST_EXCLUSIVE) {
+   gfs2_glock_free(gl);
+   return;
+   }
+
error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
   NULL, gl);
if (error) {
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 13/16] GFS2: remove redundant lvb pointer

2012-11-30 Thread Steven Whitehouse

From: David Teigland 

The lksb struct already contains a pointer to the lvb,
so another directly from the glock struct is not needed.

Signed-off-by: David Teigland 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2284de4..274b6be 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -108,7 +108,7 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
} else {
-   kfree(gl->gl_lvb);
+   kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(gfs2_glock_cachep, gl);
}
 }
@@ -740,15 +740,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
return -ENOMEM;
 
memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
-   gl->gl_lvb = NULL;
 
if (glops->go_flags & GLOF_LVB) {
-   gl->gl_lvb = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL);
-   if (!gl->gl_lvb) {
+   gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL);
+   if (!gl->gl_lksb.sb_lvbptr) {
kmem_cache_free(cachep, gl);
return -ENOMEM;
}
-   gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
}
 
atomic_inc(&sdp->sd_glock_disposal);
@@ -789,7 +787,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
tmp = search_bucket(hash, sdp, &name);
if (tmp) {
spin_unlock_bucket(hash);
-   kfree(gl->gl_lvb);
+   kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
atomic_dec(&sdp->sd_glock_disposal);
gl = tmp;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index bd577fc..c373a24 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -322,7 +322,6 @@ struct gfs2_glock {
ktime_t gl_dstamp;
struct gfs2_lkstats gl_stats;
struct dlm_lksb gl_lksb;
-   char *gl_lvb;
unsigned long gl_tchange;
void *gl_object;
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index d28ae37..8dad6b0 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -120,8 +120,8 @@ static void gdlm_ast(void *arg)
gfs2_update_reply_times(gl);
BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
 
-   if (gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID && gl->gl_lvb)
-   memset(gl->gl_lvb, 0, GDLM_LVB_SIZE);
+   if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && 
gl->gl_lksb.sb_lvbptr)
+   memset(gl->gl_lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
 
switch (gl->gl_lksb.sb_status) {
case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
@@ -205,7 +205,7 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned 
int gfs_flags,
 {
u32 lkf = 0;
 
-   if (gl->gl_lvb)
+   if (gl->gl_lksb.sb_lvbptr)
lkf |= DLM_LKF_VALBLK;
 
if (gfs_flags & LM_FLAG_TRY)
@@ -294,7 +294,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 
/* don't want to skip dlm_unlock writing the lvb when lock is ex */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
-   gl->gl_lvb && gl->gl_state != LM_ST_EXCLUSIVE) {
+   gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) {
gfs2_glock_free(gl);
return;
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6bbf64f..ae55e24 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -869,7 +869,7 @@ static int update_qd(struct gfs2_sbd *sdp, struct 
gfs2_quota_data *qd)
if (error < 0)
return error;
 
-   qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
+   qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC);
qlvb->__pad = 0;
qlvb->qb_limit = q.qu_limit;
@@ -893,7 +893,7 @@ restart:
if (error)
return error;
 
-   qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
+   qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
 
if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
gfs2_glock_dq_uninit(q_gh);
@@ -1506,7 +1506,7 @@ static int gfs2_get_dqblk(struct super_block *sb, struct 
kqid qid,
if (error)
goto out;
 
-   qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb;
+   qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
fdq->d_version = FS_DQUOT_VERSION;
fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
fdq->d_id = from_kqid(&init_user_ns, qid);
diff --git a/fs/gfs

[PATCH 09/16] GFS2: Eliminate redundant buffer_head manipulation in gfs2_unlink_inode

2012-11-30 Thread Steven Whitehouse

From: Bob Peterson 

Since we now have a dirty_inode that takes care of manipulating the
inode buffer and writing from the inode to the buffer, we can
eliminate some unnecessary buffer manipulations in gfs2_unlink_inode
that are now redundant.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ef3ce00..e321333 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -995,7 +995,6 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const 
struct qstr *name,
  * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
  * @dip: The parent directory
  * @name: The name of the entry in the parent directory
- * @bh: The inode buffer for the inode to be removed
  * @inode: The inode to be removed
  *
  * Called with all the locks and in a transaction. This will only be
@@ -1005,8 +1004,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const 
struct qstr *name,
  */
 
 static int gfs2_unlink_inode(struct gfs2_inode *dip,
-const struct dentry *dentry,
-struct buffer_head *bh)
+const struct dentry *dentry)
 {
struct inode *inode = dentry->d_inode;
struct gfs2_inode *ip = GFS2_I(inode);
@@ -1046,7 +1044,6 @@ static int gfs2_unlink(struct inode *dir, struct dentry 
*dentry)
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct inode *inode = dentry->d_inode;
struct gfs2_inode *ip = GFS2_I(inode);
-   struct buffer_head *bh;
struct gfs2_holder ghs[3];
struct gfs2_rgrpd *rgd;
int error;
@@ -1095,14 +1092,9 @@ static int gfs2_unlink(struct inode *dir, struct dentry 
*dentry)
 
error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 
0);
if (error)
-   goto out_gunlock;
-
-   error = gfs2_meta_inode_buffer(ip, &bh);
-   if (error)
goto out_end_trans;
 
-   error = gfs2_unlink_inode(dip, dentry, bh);
-   brelse(bh);
+   error = gfs2_unlink_inode(dip, dentry);
 
 out_end_trans:
gfs2_trans_end(sdp);
@@ -1402,14 +1394,8 @@ static int gfs2_rename(struct inode *odir, struct dentry 
*odentry,
 
/* Remove the target file, if it exists */
 
-   if (nip) {
-   struct buffer_head *bh;
-   error = gfs2_meta_inode_buffer(nip, &bh);
-   if (error)
-   goto out_end_trans;
-   error = gfs2_unlink_inode(ndip, ndentry, bh);
-   brelse(bh);
-   }
+   if (nip)
+   error = gfs2_unlink_inode(ndip, ndentry);
 
if (dir_rename) {
error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

GFS2: Pre-pull patch posting (merge window)

2012-11-30 Thread Steven Whitehouse

Hi,

So yes, this is a bit early, but the tree seems to have settled down
now, and I'd like to hold off any further feature patches until the
subsequent merge window at this stage.

The main feature this time is the new Orlov allocator and the patches
leading up to it which allow us to allocate new inodes from their own
allocation context, rather than borrowing that of their parent directory.
It is this change which then allows us to choose a different location
for subdirectories when required. This works exactly as per the ext3
implementation from the users point of view.

In addition to that, we've got a speed up in gfs2_rbm_from_block()
from Bob Peterson, three locking related improvements from Dave
Teigland plus a selection of smaller bug fixes and clean ups.

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 05/16] GFS2: Use proper allocation context for new inodes

2012-11-30 Thread Steven Whitehouse

Rather than using the parent directory's allocation context, this
patch allocated the new inode earlier in the process and then uses
it to contain all the information required. As a result, we can now
use the new inode's own allocation context to allocate it rather
than having to use the parent directory's context. This give us a
lot more flexibility in where the inode is placed on disk.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 381893c..749b05a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -364,34 +364,34 @@ static int create_ok(struct gfs2_inode *dip, const struct 
qstr *name,
return 0;
 }
 
-static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode,
-  unsigned int *uid, unsigned int *gid)
+static void munge_mode_uid_gid(const struct gfs2_inode *dip,
+  struct inode *inode)
 {
if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
(dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) {
-   if (S_ISDIR(*mode))
-   *mode |= S_ISUID;
+   if (S_ISDIR(inode->i_mode))
+   inode->i_mode |= S_ISUID;
else if (dip->i_inode.i_uid != current_fsuid())
-   *mode &= ~07111;
-   *uid = dip->i_inode.i_uid;
+   inode->i_mode &= ~07111;
+   inode->i_uid = dip->i_inode.i_uid;
} else
-   *uid = current_fsuid();
+   inode->i_uid = current_fsuid();
 
if (dip->i_inode.i_mode & S_ISGID) {
-   if (S_ISDIR(*mode))
-   *mode |= S_ISGID;
-   *gid = dip->i_inode.i_gid;
+   if (S_ISDIR(inode->i_mode))
+   inode->i_mode |= S_ISGID;
+   inode->i_gid = dip->i_inode.i_gid;
} else
-   *gid = current_fsgid();
+   inode->i_gid = current_fsgid();
 }
 
-static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
+static int alloc_dinode(struct gfs2_inode *ip)
 {
-   struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
+   struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
int error;
int dblocks = 1;
 
-   error = gfs2_inplace_reserve(dip, RES_DINODE);
+   error = gfs2_inplace_reserve(ip, RES_DINODE);
if (error)
goto out;
 
@@ -399,12 +399,15 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 
*no_addr, u64 *generation)
if (error)
goto out_ipreserv;
 
-   error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation);
+   error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, 
&ip->i_generation);
+   ip->i_no_formal_ino = ip->i_generation;
+   ip->i_inode.i_ino = ip->i_no_addr;
+   ip->i_goal = ip->i_no_addr;
 
gfs2_trans_end(sdp);
 
 out_ipreserv:
-   gfs2_inplace_release(dip);
+   gfs2_inplace_release(ip);
 out:
return error;
 }
@@ -429,52 +432,42 @@ static void gfs2_init_dir(struct buffer_head *dibh,
 /**
  * init_dinode - Fill in a new dinode structure
  * @dip: The directory this inode is being created in
- * @gl: The glock covering the new inode
- * @inum: The inode number
- * @mode: The file permissions
- * @uid: The uid of the new inode
- * @gid: The gid of the new inode
- * @generation: The generation number of the new inode
- * @dev: The device number (if a device node)
+ * @ip: The inode
  * @symname: The symlink destination (if a symlink)
- * @size: The inode size (ignored for directories)
  * @bhp: The buffer head (returned to caller)
  *
  */
 
-static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
-   const struct gfs2_inum_host *inum, umode_t mode,
-   unsigned int uid, unsigned int gid,
-   const u64 *generation, dev_t dev, const char *symname,
-   unsigned size, struct buffer_head **bhp)
+static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
+   const char *symname, struct buffer_head **bhp)
 {
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_dinode *di;
struct buffer_head *dibh;
struct timespec tv = CURRENT_TIME;
 
-   dibh = gfs2_meta_new(gl, inum->no_addr);
-   gfs2_trans_add_bh(gl, dibh, 1);
+   dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr);
+   gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
di = (struct gfs2_dinode *)dibh->b_data;
 
-   di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino);
-   di->di_num.no_addr = cpu

[PATCH 08/16] GFS2: Use dirty_inode in gfs2_dir_add

2012-11-30 Thread Steven Whitehouse

From: Bob Peterson 

This patch changes the gfs2_dir_add function so that it uses
the dirty_inode function (via mark_inode_dirty) rather than manually
updating the dinode.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 259b088..9a35670 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1676,16 +1676,11 @@ int gfs2_dir_add(struct inode *inode, const struct qstr 
*name,
be16_add_cpu(&leaf->lf_entries, 1);
}
brelse(bh);
-   error = gfs2_meta_inode_buffer(ip, &bh);
-   if (error)
-   break;
-   gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_entries++;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = 
CURRENT_TIME;
if (S_ISDIR(nip->i_inode.i_mode))
inc_nlink(&ip->i_inode);
-   gfs2_dinode_out(ip, bh->b_data);
-   brelse(bh);
+   mark_inode_dirty(inode);
error = 0;
break;
}
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 10/16] GFS2: Fix one RG corner case

2012-11-30 Thread Steven Whitehouse

For filesystems with only a single resource group, we need to be careful
that the allocation loop will not land up with a NULL resource group. This
fixes a bug in a previous patch where the gfs2_rgrpd_get_next() function
was being used instead of gfs2_rgrpd_get_first()

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 99a6197..5625e93 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1776,10 +1776,11 @@ static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd 
*begin)
 {
struct gfs2_rgrpd *rgd = *pos;
+   struct gfs2_sbd *sdp = rgd->rd_sbd;
 
rgd = gfs2_rgrpd_get_next(rgd);
if (rgd == NULL)
-   rgd = gfs2_rgrpd_get_next(NULL);
+   rgd = gfs2_rgrpd_get_first(sdp);
*pos = rgd;
if (rgd != begin) /* If we didn't wrap */
return true;
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 07/16] GFS2: Fix truncation of journaled data files

2012-11-30 Thread Steven Whitehouse

This patch fixes an issue relating to not having enough revokes
available when truncating journaled data files. In order to ensure
that we do no run out, the truncation is broken into separate pieces
if it is large enough.

Tested using fsx on a journaled data file.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index de70e52..a68e91b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -991,6 +991,41 @@ unlock:
return err;
 }
 
+/**
+ * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files
+ * @inode: The inode being truncated
+ * @oldsize: The original (larger) size
+ * @newsize: The new smaller size
+ *
+ * With jdata files, we have to journal a revoke for each block which is
+ * truncated. As a result, we need to split this into separate transactions
+ * if the number of pages being truncated gets too large.
+ */
+
+#define GFS2_JTRUNC_REVOKES 8192
+
+static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 
newsize)
+{
+   struct gfs2_sbd *sdp = GFS2_SB(inode);
+   u64 max_chunk = GFS2_JTRUNC_REVOKES * sdp->sd_vfs->s_blocksize;
+   u64 chunk;
+   int error;
+
+   while (oldsize != newsize) {
+   chunk = oldsize - newsize;
+   if (chunk > max_chunk)
+   chunk = max_chunk;
+   truncate_pagecache(inode, oldsize, oldsize - chunk);
+   oldsize -= chunk;
+   gfs2_trans_end(sdp);
+   error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
+   if (error)
+   return error;
+   }
+
+   return 0;
+}
+
 static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
 {
struct gfs2_inode *ip = GFS2_I(inode);
@@ -1000,8 +1035,10 @@ static int trunc_start(struct inode *inode, u64 oldsize, 
u64 newsize)
int journaled = gfs2_is_jdata(ip);
int error;
 
-   error = gfs2_trans_begin(sdp,
-RES_DINODE + (journaled ? RES_JDATA : 0), 0);
+   if (journaled)
+   error = gfs2_trans_begin(sdp, RES_DINODE + RES_JDATA, 
GFS2_JTRUNC_REVOKES);
+   else
+   error = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (error)
return error;
 
@@ -1026,7 +1063,16 @@ static int trunc_start(struct inode *inode, u64 oldsize, 
u64 newsize)
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(ip, dibh->b_data);
 
-   truncate_pagecache(inode, oldsize, newsize);
+   if (journaled)
+   error = gfs2_journaled_truncate(inode, oldsize, newsize);
+   else
+   truncate_pagecache(inode, oldsize, newsize);
+
+   if (error) {
+   brelse(dibh);
+   return error;
+   }
+
 out_brelse:
brelse(dibh);
 out:
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 06/16] GFS2: Add Orlov allocator

2012-11-30 Thread Steven Whitehouse

Just like ext3, this works on the root directory and any directory
with the +T flag set. Also, just like ext3, any subdirectory created
in one of the just mentioned cases will be allocated to a random
resource group (GFS2 equivalent of a block group).

If you are creating a set of directories, each of which will contain a
job running on a different node, then by setting +T on the parent
directory before creating the subdirectories, each will land up in a
different resource group, and thus resource group contention between
nodes will be kept to a minimum.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 01c4975..30de4f2 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -643,7 +643,7 @@ static int gfs2_write_begin(struct file *file, struct 
address_space *mapping,
goto out_unlock;
 
requested = data_blocks + ind_blocks;
-   error = gfs2_inplace_reserve(ip, requested);
+   error = gfs2_inplace_reserve(ip, requested, 0);
if (error)
goto out_qunlock;
}
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 1fd3ae2..de70e52 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1178,7 +1178,7 @@ static int do_grow(struct inode *inode, u64 size)
if (error)
return error;
 
-   error = gfs2_inplace_reserve(ip, 1);
+   error = gfs2_inplace_reserve(ip, 1, 0);
if (error)
goto do_grow_qunlock;
unstuff = 1;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e056b4c..dfe2d8c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -432,7 +432,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, 
struct vm_fault *vmf)
if (ret)
goto out_unlock;
gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
-   ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
+   ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
if (ret)
goto out_quota_unlock;
 
@@ -825,7 +825,7 @@ static long gfs2_fallocate(struct file *file, int mode, 
loff_t offset,
 retry:
gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
 
-   error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
+   error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0);
if (error) {
if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
bytes >>= 1;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 749b05a..ef3ce00 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -385,13 +385,13 @@ static void munge_mode_uid_gid(const struct gfs2_inode 
*dip,
inode->i_gid = current_fsgid();
 }
 
-static int alloc_dinode(struct gfs2_inode *ip)
+static int alloc_dinode(struct gfs2_inode *ip, u32 flags)
 {
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
int error;
int dblocks = 1;
 
-   error = gfs2_inplace_reserve(ip, RES_DINODE);
+   error = gfs2_inplace_reserve(ip, RES_DINODE, flags);
if (error)
goto out;
 
@@ -560,7 +560,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct 
qstr *name,
if (error)
goto fail_quota_locks;
 
-   error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
+   error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
if (error)
goto fail_quota_locks;
 
@@ -650,6 +650,7 @@ static int gfs2_create_inode(struct inode *dir, struct 
dentry *dentry,
struct gfs2_glock *io_gl;
int error;
struct buffer_head *bh = NULL;
+   u32 aflags = 0;
 
if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG;
@@ -685,7 +686,11 @@ static int gfs2_create_inode(struct inode *dir, struct 
dentry *dentry,
munge_mode_uid_gid(dip, inode);
ip->i_goal = dip->i_goal;
 
-   error = alloc_dinode(ip);
+   if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) ||
+   (dip->i_diskflags & GFS2_DIF_TOPDIR))
+   aflags |= GFS2_AF_ORLOV;
+
+   error = alloc_dinode(ip, aflags);
if (error)
goto fail_free_inode;
 
@@ -897,7 +902,7 @@ static int gfs2_link(struct dentry *old_dentry, struct 
inode *dir,
if (error)
goto out_gunlock;
 
-   error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
+   error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0);
if (error)
goto out_gunlock_q;
 
@@ -1378,7 +1383,7 @@ static int gfs2_rename(struct inode *odir, struct dentry 
*odentry,
if (error)
got

[PATCH 04/16] GFS2: Add test for resource group congestion status

2012-11-30 Thread Steven Whitehouse

This patch uses information gathered by the recent glock statistics
patch in order to derrive a boolean verdict on the congestion
status of a resource group. This is then used when making decisions
on which resource group to choose during block allocation.

The aim is to avoid resource groups which are heavily contended
by other nodes, while still ensuring locality of access wherever
possible.

Once a reservation has been made in a particular resource group
we continue to use that resource group until a new reservation is
required. This should help to ensure that we do not change resource
groups too often.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 669b89b..bdf3e64 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1681,6 +1681,88 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 
*last_unlinked, u64 skip
return;
 }
 
+/**
+ * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
+ * @rgd: The rgrp in question
+ * @loops: An indication of how picky we can be (0=very, 1=less so)
+ *
+ * This function uses the recently added glock statistics in order to
+ * figure out whether a parciular resource group is suffering from
+ * contention from multiple nodes. This is done purely on the basis
+ * of timings, since this is the only data we have to work with and
+ * our aim here is to reject a resource group which is highly contended
+ * but (very important) not to do this too often in order to ensure that
+ * we do not land up introducing fragmentation by changing resource
+ * groups when not actually required.
+ *
+ * The calculation is fairly simple, we want to know whether the SRTTB
+ * (i.e. smoothed round trip time for blocking operations) to acquire
+ * the lock for this rgrp's glock is significantly greater than the
+ * time taken for resource groups on average. We introduce a margin in
+ * the form of the variable @var which is computed as the sum of the two
+ * respective variences, and multiplied by a factor depending on @loops
+ * and whether we have a lot of data to base the decision on. This is
+ * then tested against the square difference of the means in order to
+ * decide whether the result is statistically significant or not.
+ *
+ * Returns: A boolean verdict on the congestion status
+ */
+
+static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
+{
+   const struct gfs2_glock *gl = rgd->rd_gl;
+   const struct gfs2_sbd *sdp = gl->gl_sbd;
+   struct gfs2_lkstats *st;
+   s64 r_dcount, l_dcount;
+   s64 r_srttb, l_srttb;
+   s64 srttb_diff;
+   s64 sqr_diff;
+   s64 var;
+
+   preempt_disable();
+   st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
+   r_srttb = st->stats[GFS2_LKS_SRTTB];
+   r_dcount = st->stats[GFS2_LKS_DCOUNT];
+   var = st->stats[GFS2_LKS_SRTTVARB] +
+ gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
+   preempt_enable();
+
+   l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
+   l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
+
+   if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
+   return false;
+
+   srttb_diff = r_srttb - l_srttb;
+   sqr_diff = srttb_diff * srttb_diff;
+
+   var *= 2;
+   if (l_dcount < 8 || r_dcount < 8)
+   var *= 2;
+   if (loops == 1)
+   var *= 2;
+
+   return ((srttb_diff < 0) && (sqr_diff > var));
+}
+
+/**
+ * gfs2_rgrp_used_recently
+ * @rs: The block reservation with the rgrp to test
+ * @msecs: The time limit in milliseconds
+ *
+ * Returns: True if the rgrp glock has been used within the time limit
+ */
+static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
+   u64 msecs)
+{
+   u64 tdiff;
+
+   tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
+rs->rs_rbm.rgd->rd_gl->gl_dstamp));
+
+   return tdiff > (msecs * 1000 * 1000);
+}
+
 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd 
*begin)
 {
struct gfs2_rgrpd *rgd = *pos;
@@ -1707,7 +1789,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 
requested)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
struct gfs2_blkreserv *rs = ip->i_res;
-   int error = 0, rg_locked, flags = LM_FLAG_TRY;
+   int error = 0, rg_locked, flags = 0;
u64 last_unlinked = NO_BLOCK;
int loops = 0;
 
@@ -1731,13 +1813,18 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 
requested)
 
if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
rg_locked = 0;
+   if (!gfs2_rs_active(rs) && (loops < 2) &&
+gfs2_rgrp_used_recently(rs, 1000) &&
+g

[PATCH 02/16] GFS2: Speed up gfs2_rbm_from_block

2012-11-30 Thread Steven Whitehouse

From: Bob Peterson 

This patch is a rewrite of function gfs2_rbm_from_block. Rather than
looping to find the right bitmap, the code now does a few simple
math calculations.

I compared the performance of both algorithms side by side and the new
algorithm is noticeably faster. Sample instrumentation output from a
"fast" machine:

5 million calls: millisec spent: Orig: 166 New: 113
5 million calls: millisec spent: Orig: 189 New: 114

In addition, I ran postmark (on a somewhat slowr CPU) before the after
the new algorithm was put in place and postmark showed a decent
improvement:

Before the new algorithm:
-
Time:
645 seconds total
584 seconds of transactions (171 per second)

Files:
150087 created (232 per second)
Creation alone: 10 files (2083 per second)
Mixed with transactions: 50087 files (85 per second)
49995 read (85 per second)
49991 appended (85 per second)
150087 deleted (232 per second)
Deletion alone: 100174 files (7705 per second)
Mixed with transactions: 49913 files (85 per second)

Data:
273.42 megabytes read (434.08 kilobytes per second)
852.13 megabytes written (1.32 megabytes per second)

With the new algorithm:
---
Time:
599 seconds total
530 seconds of transactions (188 per second)

Files:
150087 created (250 per second)
Creation alone: 10 files (1886 per second)
Mixed with transactions: 50087 files (94 per second)
49995 read (94 per second)
49991 appended (94 per second)
150087 deleted (250 per second)
Deletion alone: 100174 files (6260 per second)
Mixed with transactions: 49913 files (94 per second)

Data:
273.42 megabytes read (467.42 kilobytes per second)
852.13 megabytes written (1.42 megabytes per second)

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 3d469d3..24bb0b8 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -621,6 +621,7 @@ struct gfs2_sbd {
u32 sd_hash_bsize_shift;
u32 sd_hash_ptrs;   /* Number of pointers in a hash block */
u32 sd_qc_per_block;
+   u32 sd_blocks_per_bitmap;
u32 sd_max_dirres;  /* Max blocks needed to add a directory entry */
u32 sd_max_height;  /* Max height of a file's metadata tree */
u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e443966..0e3554e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -278,6 +278,9 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_meta_header)) /
sizeof(struct gfs2_quota_change);
+   sdp->sd_blocks_per_bitmap = (sdp->sd_sb.sb_bsize -
+sizeof(struct gfs2_meta_header))
+   * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */
 
/* Compute maximum reservation required to add a entry to a directory */
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 38fe18f..669b89b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -251,22 +251,25 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int 
len,
 static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 {
u64 rblock = block - rbm->rgd->rd_data0;
-   u32 goal = (u32)rblock;
-   int x;
+   u32 x;
 
if (WARN_ON_ONCE(rblock > UINT_MAX))
return -EINVAL;
if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
return -E2BIG;
 
-   for (x = 0; x < rbm->rgd->rd_length; x++) {
-   rbm->bi = rbm->rgd->rd_bits + x;
-   if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
-   rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
-   break;
-   }
-   }
+   rbm->bi = rbm->rgd->rd_bits;
+   rbm->offset = (u32)(rblock);
+   /* Check if the block is within the first block */
+   if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY)
+   return 0;
 
+   /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
+   rbm->offset += (sizeof(struct gfs2_rgrp) -
+   sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
+   x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
+   rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
+   rbm->bi += x;
return 0;
 }
 
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux

Re: [Cluster-devel] [PATCH v2 08/18] gfs2: use ->invalidatepage() length argument

2013-02-05 Thread Steven Whitehouse

Hi,

Acked-by: Steven Whitehouse 

Steve.

On Tue, 2013-02-05 at 10:12 +0100, Lukas Czerner wrote:
> ->invalidatepage() aop now accepts range to invalidate so we can make
> use of it in gfs2_invalidatepage().
> 
> Signed-off-by: Lukas Czerner 
> Cc: cluster-de...@redhat.com
> ---
>  fs/gfs2/aops.c |9 +++--
>  1 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
> index 5bd558c..3cf3dc8 100644
> --- a/fs/gfs2/aops.c
> +++ b/fs/gfs2/aops.c
> @@ -949,24 +949,29 @@ static void gfs2_invalidatepage(struct page *page, 
> unsigned int offset,
>   unsigned int length)
>  {
>   struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
> + unsigned int stop = offset + length;
> + int partial_page = (offset || length < PAGE_CACHE_SIZE);
>   struct buffer_head *bh, *head;
>   unsigned long pos = 0;
>  
>   BUG_ON(!PageLocked(page));
> - if (offset == 0)
> + if (!partial_page)
>   ClearPageChecked(page);
>   if (!page_has_buffers(page))
>   goto out;
>  
>   bh = head = page_buffers(page);
>   do {
> + if (pos + bh->b_size > stop)
> + return;
> +
>   if (offset <= pos)
>   gfs2_discard(sdp, bh);
>   pos += bh->b_size;
>   bh = bh->b_this_page;
>   } while (bh != head);
>  out:
> - if (offset == 0)
> + if (!partial_page)
>   try_to_release_page(page, 0);
>  }
>  


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch for-3.8] fs, dlm: fix build error when EXPERIMENTAL is disabled

2013-02-12 Thread Steven Whitehouse

Hi,

On Mon, 2013-02-11 at 13:48 -0800, David Rientjes wrote:
> CONFIG_IP_SCTP relies on being able to select things like CONFIG_CRC32C to 
> build.  Thus, nothing should be selecting CONFIG_IP_SCTP that does not 
> meet its requirements.
> 
> For example, if CONFIG_EXPERIMENTAL is disabled and CONFIG_DLM is enabled, 
> the build fails at link time:
> 
>   net/built-in.o: In function `sctp_crc32c':
>   include/net/sctp/checksum.h:51: undefined reference to `crc32c'
>   include/net/sctp/checksum.h:51: undefined reference to `crc32c'
>   include/net/sctp/checksum.h:51: undefined reference to `crc32c'
>   include/net/sctp/checksum.h:51: undefined reference to `crc32c'
>   include/net/sctp/checksum.h:51: undefined reference to `crc32c'
>   net/built-in.o:include/net/sctp/checksum.h:51: more undefined 
> references to `crc32c' follow
> 
> Fix this by making CONFIG_DLM depend on CONFIG_EXPERIMENTAL so that 
> CONFIG_IP_SCTP properly builds.
> 
That doesn't seem right to me... DLM has not been experimental for a
long time now. Why not just select CRC32 in addition to IP_SCTP ?

Steve.

> Signed-off-by: David Rientjes 
> ---
>  fs/dlm/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
> --- a/fs/dlm/Kconfig
> +++ b/fs/dlm/Kconfig
> @@ -2,6 +2,7 @@ menuconfig DLM
>   tristate "Distributed Lock Manager (DLM)"
>   depends on INET
>   depends on SYSFS && CONFIGFS_FS && (IPV6 || IPV6=n)
> + depends on EXPERIMENTAL
>   select IP_SCTP
>   help
>   A general purpose distributed lock manager for kernel or userspace
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

GFS2: Pre-pull patch posting

2013-04-05 Thread Steven Whitehouse

Hi,

Here are a few GFS2 fixes which are pending. There are two patches
which fix up a couple of minor issues in the DLM interface code,
a missing error path in gfs2_rs_alloc(), two patches which fix problems
during "withdraw" and a fix for discards/FITRIM when using 4k sector
sized devices,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/6] GFS2: use kmalloc for lvb bitmap

2013-04-05 Thread Steven Whitehouse

From: David Teigland 

The temp lvb bitmap was on the stack, which could
be an alignment problem for __set_bit_le.  Use
kmalloc for it instead.

Signed-off-by: David Teigland 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 156e42e..5c29216 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -588,6 +588,7 @@ struct lm_lockstruct {
struct dlm_lksb ls_control_lksb; /* control_lock */
char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
+   char *ls_lvb_bits;
 
spinlock_t ls_recover_spin; /* protects following fields */
unsigned long ls_recover_flags; /* DFL_ */
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 9802de0..b15bb45 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -580,7 +580,6 @@ static void gfs2_control_func(struct work_struct *work)
 {
struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, 
sd_control_work.work);
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-   char lvb_bits[GDLM_LVB_SIZE];
uint32_t block_gen, start_gen, lvb_gen, flags;
int recover_set = 0;
int write_lvb = 0;
@@ -634,7 +633,7 @@ static void gfs2_control_func(struct work_struct *work)
return;
}
 
-   control_lvb_read(ls, &lvb_gen, lvb_bits);
+   control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
 
spin_lock(&ls->ls_recover_spin);
if (block_gen != ls->ls_recover_block ||
@@ -664,10 +663,10 @@ static void gfs2_control_func(struct work_struct *work)
 
ls->ls_recover_result[i] = 0;
 
-   if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET))
+   if (!test_bit_le(i, ls->ls_lvb_bits + 
JID_BITMAP_OFFSET))
continue;
 
-   __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+   __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET);
write_lvb = 1;
}
}
@@ -691,7 +690,7 @@ static void gfs2_control_func(struct work_struct *work)
continue;
if (ls->ls_recover_submit[i] < start_gen) {
ls->ls_recover_submit[i] = 0;
-   __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+   __set_bit_le(i, ls->ls_lvb_bits + 
JID_BITMAP_OFFSET);
}
}
/* even if there are no bits to set, we need to write the
@@ -705,7 +704,7 @@ static void gfs2_control_func(struct work_struct *work)
spin_unlock(&ls->ls_recover_spin);
 
if (write_lvb) {
-   control_lvb_write(ls, start_gen, lvb_bits);
+   control_lvb_write(ls, start_gen, ls->ls_lvb_bits);
flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
} else {
flags = DLM_LKF_CONVERT;
@@ -725,7 +724,7 @@ static void gfs2_control_func(struct work_struct *work)
 */
 
for (i = 0; i < recover_size; i++) {
-   if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) {
+   if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) {
fs_info(sdp, "recover generation %u jid %d\n",
start_gen, i);
gfs2_recover_set(sdp, i);
@@ -758,7 +757,6 @@ static void gfs2_control_func(struct work_struct *work)
 static int control_mount(struct gfs2_sbd *sdp)
 {
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-   char lvb_bits[GDLM_LVB_SIZE];
uint32_t start_gen, block_gen, mount_gen, lvb_gen;
int mounted_mode;
int retries = 0;
@@ -857,7 +855,7 @@ locks_done:
 * lvb_gen will be non-zero.
 */
 
-   control_lvb_read(ls, &lvb_gen, lvb_bits);
+   control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits);
 
if (lvb_gen == 0x) {
/* special value to force mount attempts to fail */
@@ -887,7 +885,7 @@ locks_done:
 * and all lvb bits to be clear (no pending journal recoveries.)
 */
 
-   if (!all_jid_bits_clear(lvb_bits)) {
+   if (!all_jid_bits_clear(ls->ls_lvb_bits)) {
/* journals need recovery, wait until all are clear */
fs_info(sdp, "control_mount wait for journal recovery\n");
goto restart;
@@ -949,7 +947,6 @@ static int dlm_recovery_wait(void *word)
 static int control_first_done(struct gfs2_sbd *sdp)
 {
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-   char lvb_bits[GDLM_LVB_SIZE];
uint32_t start_gen, block_gen;
int error;
 
@@ -991,8 +988,8 @@ restart:
memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));

[PATCH 2/6] GFS2: use memchr_inv

2013-04-05 Thread Steven Whitehouse

From: Akinobu Mita 

Use memchr_inv to verify that the specified memory range is cleared.

Signed-off-by: Akinobu Mita 
Cc: Steven Whitehouse 
Cc: cluster-de...@redhat.com
Cc: Christine Caulfield 
Cc: David Teigland 

diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index b15bb45..c8423d6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -483,12 +483,8 @@ static void control_lvb_write(struct lm_lockstruct *ls, 
uint32_t lvb_gen,
 
 static int all_jid_bits_clear(char *lvb)
 {
-   int i;
-   for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) {
-   if (lvb[i])
-   return 0;
-   }
-   return 1;
+   return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0,
+   GDLM_LVB_SIZE - JID_BITMAP_OFFSET);
 }
 
 static void sync_wait_cb(void *arg)
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/6] GFS2: return error if malloc failed in gfs2_rs_alloc()

2013-04-05 Thread Steven Whitehouse

From: Wei Yongjun 

The error code in gfs2_rs_alloc() is set to ENOMEM when error
but never be used, instead, gfs2_rs_alloc() always return 0.
Fix to return 'error'.

Signed-off-by: Wei Yongjun 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index d1f51fd..70d1cd0 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -576,7 +576,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
RB_CLEAR_NODE(&ip->i_res->rs_node);
 out:
up_write(&ip->i_rw_mutex);
-   return 0;
+   return error;
 }
 
 static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 4/6] GFS2: Fix unlock of fcntl locks during withdrawn state

2013-04-05 Thread Steven Whitehouse

When withdraw occurs, we need to continue to allow unlocks of fcntl
locks to occur, however these will only be local, since the node has
withdrawn from the cluster. This prevents triggering a VFS level
bug trap due to locks remaining when a file is closed.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 019f45e..d79c2da 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -923,8 +923,11 @@ static int gfs2_lock(struct file *file, int cmd, struct 
file_lock *fl)
cmd = F_SETLK;
fl->fl_type = F_UNLCK;
}
-   if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
+   if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+   if (fl->fl_type == F_UNLCK)
+   posix_lock_file_wait(file, fl);
return -EIO;
+   }
if (IS_GETLK(cmd))
return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl);
else if (fl->fl_type == F_UNLCK)
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 5/6] GFS2: Fix unlock of fcntl locks during withdrawn state

2013-04-05 Thread Steven Whitehouse

From: David Teigland 

This reminded me of another old patch I had sitting around which I never
had a chance to test.  I copied this idea from the nfs code.  The problem
is that when the kernel clears flocks/plocks during close, it calls posix
unlock even if there are no posix locks on the file.  Without this patch,
that extraneous unlock propagates up to controld, across the cluster, and
back down to the kernel.  That can amount to a lot of plock activity on a
fs that may have never used a single plock (only flocks).  With this
patch, we should detect that the unlock is extraneous (since it doesn't
exist in the vfs), and skip all the userland traffic.

Signed-off-by: David Teigland 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 01fd5c1..f704458 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -247,6 +247,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 
number, struct file *file,
struct dlm_ls *ls;
struct plock_op *op;
int rv;
+   unsigned char fl_flags = fl->fl_flags;
 
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
@@ -258,9 +259,18 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 
number, struct file *file,
goto out;
}
 
-   if (posix_lock_file_wait(file, fl) < 0)
-   log_error(ls, "dlm_posix_unlock: vfs unlock error %llx",
- (unsigned long long)number);
+   /* cause the vfs unlock to return ENOENT if lock is not found */
+   fl->fl_flags |= FL_EXISTS;
+
+   rv = posix_lock_file_wait(file, fl);
+   if (rv == -ENOENT) {
+   rv = 0;
+   goto out_free;
+   }
+   if (rv < 0) {
+   log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
+ rv, (unsigned long long)number);
+   }
 
op->info.optype = DLM_PLOCK_OP_UNLOCK;
op->info.pid= fl->fl_pid;
@@ -296,9 +306,11 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 
number, struct file *file,
if (rv == -ENOENT)
rv = 0;
 
+out_free:
kfree(op);
 out:
dlm_put_lockspace(ls);
+   fl->fl_flags = fl_flags;
return rv;
 }
 EXPORT_SYMBOL_GPL(dlm_posix_unlock);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 6/6] GFS2: Issue discards in 512b sectors

2013-04-05 Thread Steven Whitehouse

From: Bob Peterson 

This patch changes GFS2's discard issuing code so that it calls
function sb_issue_discard rather than blkdev_issue_discard. The
code was calling blkdev_issue_discard and specifying the correct
sector offset and sector size, but blkdev_issue_discard expects
these values to be in terms of 512 byte sectors, even if the native
sector size for the device is different. Calling sb_issue_discard
with the BLOCK size instead ensures the correct block-to-512b-sector
translation. I verified that "minlen" is specified in blocks, so
comparing it to a number of blocks is correct.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 70d1cd0..5a51265 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1181,12 +1181,9 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 
offset,
 const struct gfs2_bitmap *bi, unsigned minlen, u64 
*ptrimmed)
 {
struct super_block *sb = sdp->sd_vfs;
-   struct block_device *bdev = sb->s_bdev;
-   const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize /
-  bdev_logical_block_size(sb->s_bdev);
u64 blk;
sector_t start = 0;
-   sector_t nr_sects = 0;
+   sector_t nr_blks = 0;
int rv;
unsigned int x;
u32 trimmed = 0;
@@ -1206,35 +1203,34 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 
offset,
if (diff == 0)
continue;
blk = offset + ((bi->bi_start + x) * GFS2_NBBY);
-   blk *= sects_per_blk; /* convert to sectors */
while(diff) {
if (diff & 1) {
-   if (nr_sects == 0)
+   if (nr_blks == 0)
goto start_new_extent;
-   if ((start + nr_sects) != blk) {
-   if (nr_sects >= minlen) {
-   rv = blkdev_issue_discard(bdev,
-   start, nr_sects,
+   if ((start + nr_blks) != blk) {
+   if (nr_blks >= minlen) {
+   rv = sb_issue_discard(sb,
+   start, nr_blks,
GFP_NOFS, 0);
if (rv)
goto fail;
-   trimmed += nr_sects;
+   trimmed += nr_blks;
}
-   nr_sects = 0;
+   nr_blks = 0;
 start_new_extent:
start = blk;
}
-   nr_sects += sects_per_blk;
+   nr_blks++;
}
diff >>= 2;
-   blk += sects_per_blk;
+   blk++;
}
}
-   if (nr_sects >= minlen) {
-   rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0);
+   if (nr_blks >= minlen) {
+   rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);
if (rv)
goto fail;
-   trimmed += nr_sects;
+   trimmed += nr_blks;
}
if (ptrimmed)
*ptrimmed = trimmed;
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

GFS2: Pull request (fixes)

2013-04-05 Thread Steven Whitehouse

Hi,

Please consider pulling the following changes,

Steve.



There are two patches which fix up a couple of minor issues in the DLM
interface code, a missing error path in gfs2_rs_alloc(), two patches
which fix problems during "withdraw" and a fix for discards/FITRIM when
using 4k sector sized devices.


The following changes since commit 66ade474237745a57b7e87da9a93c7ec69fd52bb:

  Merge branch 'fixes' of git://git.linaro.org/people/rmk/linux-arm (2013-04-03 
16:15:17 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes.git master

Akinobu Mita (1):
  GFS2: use memchr_inv

Bob Peterson (1):
  GFS2: Issue discards in 512b sectors

David Teigland (2):
  GFS2: use kmalloc for lvb bitmap
  GFS2: Fix unlock of fcntl locks during withdrawn state

Steven Whitehouse (1):
  GFS2: Fix unlock of fcntl locks during withdrawn state

Wei Yongjun (1):
  GFS2: return error if malloc failed in gfs2_rs_alloc()

 fs/dlm/plock.c |   18 +++---
 fs/gfs2/file.c |5 -
 fs/gfs2/incore.h   |1 +
 fs/gfs2/lock_dlm.c |   39 ---
 fs/gfs2/rgrp.c |   32 ++--
 5 files changed, 54 insertions(+), 41 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: GFS2: Pull request (fixes)

2013-04-05 Thread Steven Whitehouse

Hi,

On Fri, 2013-04-05 at 12:27 -0400, David Teigland wrote:
> On Fri, Apr 05, 2013 at 11:34:45AM +0100, Steven Whitehouse wrote:
> > Please consider pulling the following changes,
> 
> There's some mixup here that should be cleared up first.
> 
> > David Teigland (2):
> >   GFS2: Fix unlock of fcntl locks during withdrawn state
> > 
> > Steven Whitehouse (1):
> >   GFS2: Fix unlock of fcntl locks during withdrawn state
> 
> 

Yes, sorry about that. I'll fix that up and resend the pull request in
due course,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: GFS2: Pull request (fixes)

2013-04-05 Thread Steven Whitehouse

Hi,

I've dropped out the patch which shouldn't have made it into the previous
pull request, so this set should be ok now. Apologies for not spotting
that issue sooner,

Steve.

-
There are two patches which fix up a couple of minor issues in the DLM
interface code, a missing error path in gfs2_rs_alloc(), one patch
which fixes a problem during "withdraw" and a fix for discards/FITRIM when
using 4k sector sized devices.

-

The following changes since commit 66ade474237745a57b7e87da9a93c7ec69fd52bb:

  Merge branch 'fixes' of git://git.linaro.org/people/rmk/linux-arm (2013-04-03 
16:15:17 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes.git master

Akinobu Mita (1):
  GFS2: use memchr_inv

Bob Peterson (1):
  GFS2: Issue discards in 512b sectors

David Teigland (1):
  GFS2: use kmalloc for lvb bitmap

Steven Whitehouse (1):
  GFS2: Fix unlock of fcntl locks during withdrawn state

Wei Yongjun (1):
  GFS2: return error if malloc failed in gfs2_rs_alloc()

 fs/gfs2/file.c |5 -
 fs/gfs2/incore.h   |1 +
 fs/gfs2/lock_dlm.c |   39 ---
 fs/gfs2/rgrp.c |   32 ++--
 4 files changed, 39 insertions(+), 38 deletions(-)



signature.asc
Description: This is a digitally signed message part

rcu: fix hlist_bl_set_first_rcu annotation

2013-01-30 Thread Steven Whitehouse


Abhi noticed that we were getting a complaint from the RCU subsystem
about access of an RCU protected list under the write side bit lock.
This patch adds additional annotation to check both the RCU read
lock and the write side bit lock before printing a message.

Signed-off-by: Steven Whitehouse 
Reported-by: Abhijith Das 
Tested-by: Abhijith Das 

diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 31f9d75..2eb8855 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -125,6 +125,11 @@ static inline void hlist_bl_unlock(struct hlist_bl_head *b)
__bit_spin_unlock(0, (unsigned long *)b);
 }
 
+static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
+{
+   return bit_spin_is_locked(0, (unsigned long *)b);
+}
+
 /**
  * hlist_bl_for_each_entry - iterate over list of given type
  * @tpos:  the type * to use as a loop cursor.
diff --git a/include/linux/rculist_bl.h b/include/linux/rculist_bl.h
index cf1244f..4f216c5 100644
--- a/include/linux/rculist_bl.h
+++ b/include/linux/rculist_bl.h
@@ -20,7 +20,7 @@ static inline void hlist_bl_set_first_rcu(struct 
hlist_bl_head *h,
 static inline struct hlist_bl_node *hlist_bl_first_rcu(struct hlist_bl_head *h)
 {
return (struct hlist_bl_node *)
-   ((unsigned long)rcu_dereference(h->first) & ~LIST_BL_LOCKMASK);
+   ((unsigned long)rcu_dereference_check(h->first, 
hlist_bl_is_locked(h)) & ~LIST_BL_LOCKMASK);
 }
 
 /**


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

GFS2: Pull request (fixes)

2013-01-28 Thread Steven Whitehouse

Hi,

Please consider pulling the following fix. Since there is only one patch
this time, I've attached it below rather than posting it separately,

Steve.

--
The following changes since commit 949db153b6466c6f7cad5a427ecea94985927311:

  Linux 3.8-rc5 (2013-01-25 11:57:28 -0800)

are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes.git master

David Teigland (1):
  GFS2: fix skip unlock condition

 fs/gfs2/lock_dlm.c |7 ++-
 1 files changed, 6 insertions(+), 1 deletions(-)
--
From d4e0bfec9b6fbb9b58640b44e01bb74ae0d29b22 Mon Sep 17 00:00:00 2001
From: David Teigland 
Date: Thu, 3 Jan 2013 17:52:07 -0500
Subject: [PATCH] GFS2: fix skip unlock condition

The recent commit fb6791d100d1bba20b5cdbc4912e1f7086ec60f8
included the wrong logic.  The lvbptr check was incorrectly
added after the patch was tested.

Signed-off-by: David Teigland 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index b906ed1..9802de0 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -281,6 +281,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 {
struct gfs2_sbd *sdp = gl->gl_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+   int lvb_needs_unlock = 0;
int error;
 
if (gl->gl_lksb.sb_lkid == 0) {
@@ -294,8 +295,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_update_request_times(gl);
 
/* don't want to skip dlm_unlock writing the lvb when lock is ex */
+
+   if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
+   lvb_needs_unlock = 1;
+
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
-   gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) {
+   !lvb_needs_unlock) {
gfs2_glock_free(gl);
return;
}
-- 
1.7.4




signature.asc
Description: This is a digitally signed message part

[PATCH 02/10] GFS2: Merge revoke adding functions

2013-02-19 Thread Steven Whitehouse

This moves the lo_add function for revokes into trans.c, removing
a function call and making the code easier to read.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9ceccb1..9c80742 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -600,20 +600,6 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int 
error, int pass)
jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
 }
 
-static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
-{
-   struct gfs2_glock *gl = bd->bd_gl;
-   struct gfs2_trans *tr;
-
-   tr = current->journal_info;
-   tr->tr_touched = 1;
-   tr->tr_num_revoke++;
-   sdp->sd_log_num_revoke++;
-   atomic_inc(&gl->gl_revokes);
-   set_bit(GLF_LFLUSH, &gl->gl_flags);
-   list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
-}
-
 static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
 {
struct gfs2_meta_header *mh;
@@ -895,7 +881,6 @@ const struct gfs2_log_operations gfs2_buf_lops = {
 };
 
 const struct gfs2_log_operations gfs2_revoke_lops = {
-   .lo_add = revoke_lo_add,
.lo_before_commit = revoke_lo_before_commit,
.lo_after_commit = revoke_lo_after_commit,
.lo_before_scan = revoke_lo_before_scan,
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 4136270..6f3ddbc 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -175,11 +175,19 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct 
buffer_head *bh, int meta)
 
 void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 {
+   struct gfs2_glock *gl = bd->bd_gl;
+   struct gfs2_trans *tr = current->journal_info;
+
BUG_ON(!list_empty(&bd->bd_list));
BUG_ON(!list_empty(&bd->bd_ail_st_list));
BUG_ON(!list_empty(&bd->bd_ail_gl_list));
lops_init_le(bd, &gfs2_revoke_lops);
-   lops_add(sdp, bd);
+   tr->tr_touched = 1;
+   tr->tr_num_revoke++;
+   sdp->sd_log_num_revoke++;
+   atomic_inc(&gl->gl_revokes);
+   set_bit(GLF_LFLUSH, &gl->gl_flags);
+   list_add(&bd->bd_list, &sdp->sd_log_le_revoke);
 }
 
 void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 05/10] GFS2: Merge gfs2_attach_bufdata() into trans.c

2013-02-19 Thread Steven Whitehouse

The locking in gfs2_attach_bufdata() was type specific (data/meta)
which made the function rather confusing. This patch moves the core
of gfs2_attach_bufdata() into trans.c renaming it gfs2_alloc_bufdata()
and moving the locking into gfs2_trans_add_data()/gfs2_trans_add_meta()

As a result all of the locking related to adding data and metadata to
the journal is now in these two functions. This should help to clarify
what is going on, and give us some opportunities to simplify in
some cases.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index d85b376..ba77b7d 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -47,13 +47,6 @@ static inline unsigned int databuf_limit(struct gfs2_sbd 
*sdp)
return limit;
 }
 
-static inline void lops_init_le(struct gfs2_bufdata *bd,
-   const struct gfs2_log_operations *lops)
-{
-   INIT_LIST_HEAD(&bd->bd_list);
-   bd->bd_ops = lops;
-}
-
 static inline void lops_before_commit(struct gfs2_sbd *sdp)
 {
int x;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 22255d9..b059bbb 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -271,41 +271,6 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct 
buffer_head *bh)
return 0;
 }
 
-/**
- * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
- * @gl: the glock the buffer belongs to
- * @bh: The buffer to be attached to
- * @meta: Flag to indicate whether its metadata or not
- */
-
-void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
-int meta)
-{
-   struct gfs2_bufdata *bd;
-
-   if (meta)
-   lock_page(bh->b_page);
-
-   if (bh->b_private) {
-   if (meta)
-   unlock_page(bh->b_page);
-   return;
-   }
-
-   bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
-   bd->bd_bh = bh;
-   bd->bd_gl = gl;
-
-   if (meta)
-   lops_init_le(bd, &gfs2_buf_lops);
-   else
-   lops_init_le(bd, &gfs2_databuf_lops);
-   bh->b_private = bd;
-
-   if (meta)
-   unlock_page(bh->b_page);
-}
-
 void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, 
int meta)
 {
struct address_space *mapping = bh->b_page->mapping;
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index c30973b..0d4c843 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -56,9 +56,6 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
 int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
 struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
 
-void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
-int meta);
-
 void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
  int meta);
 
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 1fbd57e..14dbf6d 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -143,6 +143,21 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
sb_end_intwrite(sdp->sd_vfs);
 }
 
+static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
+  struct buffer_head *bh,
+  const struct gfs2_log_operations 
*lops)
+{
+   struct gfs2_bufdata *bd;
+
+   bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
+   bd->bd_bh = bh;
+   bd->bd_gl = gl;
+   bd->bd_ops = lops;
+   INIT_LIST_HEAD(&bd->bd_list);
+   bh->b_private = bd;
+   return bd;
+}
+
 /**
  * databuf_lo_add - Add a databuf to the transaction.
  *
@@ -190,16 +205,15 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct 
buffer_head *bh)
lock_buffer(bh);
gfs2_log_lock(sdp);
bd = bh->b_private;
-   if (bd)
-   gfs2_assert(sdp, bd->bd_gl == gl);
-   else {
+   if (bd == NULL) {
gfs2_log_unlock(sdp);
unlock_buffer(bh);
-   gfs2_attach_bufdata(gl, bh, 0);
-   bd = bh->b_private;
+   if (bh->b_private == NULL)
+   bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops);
lock_buffer(bh);
gfs2_log_lock(sdp);
}
+   gfs2_assert(sdp, bd->bd_gl == gl);
databuf_lo_add(sdp, bd);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
@@ -240,16 +254,17 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct 
buffer_head *bh)
lock_buffer(bh);
gfs2_log_lock(sdp);
bd = bh->b_private;
-   if (bd)
-   gfs2_assert(sdp, bd->bd_gl == gl);
-   else {
+   if (bd == NULL) {
gfs2_log_unlock(sdp);
unlock_buffer(bh);
-   gfs2_attach_

[PATCH 10/10] GFS2: Reinstate withdraw ack system

2013-02-19 Thread Steven Whitehouse

This patch reinstates the ack system which withdraw should be using. It
appears to have been accidentally forgotten when the lock module was
merged into GFS2, due to two different sysfs files having the same name.

Reported-by: David Teigland 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1533cf8..e2601ba 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -642,6 +642,7 @@ struct gfs2_sbd {
wait_queue_head_t sd_glock_wait;
atomic_t sd_glock_disposal;
struct completion sd_locking_init;
+   struct completion sd_wdack;
struct delayed_work sd_control_work;
 
/* Inode Stuff */
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e063f22..1b612be 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -81,6 +81,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
init_waitqueue_head(&sdp->sd_glock_wait);
atomic_set(&sdp->sd_glock_disposal, 0);
init_completion(&sdp->sd_locking_init);
+   init_completion(&sdp->sd_wdack);
spin_lock_init(&sdp->sd_statfs_spin);
 
spin_lock_init(&sdp->sd_rindex_spin);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 462e841..4fb9ad8 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -330,6 +330,28 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const 
char *buf, size_t len)
return ret;
 }
 
+static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
+{
+   int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
+
+   return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+   ssize_t ret = len;
+   int val;
+
+   val = simple_strtol(buf, NULL, 0);
+
+   if ((val == 1) &&
+   !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
+   complete(&sdp->sd_wdack);
+   else
+   ret = -EINVAL;
+   return ret;
+}
+
 static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
 {
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -461,7 +483,7 @@ static struct gfs2_attr gdlm_attr_##_name = 
__ATTR(_name,_mode,_show,_store)
 
 GDLM_ATTR(proto_name,  0444, proto_name_show,  NULL);
 GDLM_ATTR(block,   0644, block_show,   block_store);
-GDLM_ATTR(withdraw,0644, withdraw_show,withdraw_store);
+GDLM_ATTR(withdraw,0644, wdack_show,   wdack_store);
 GDLM_ATTR(jid, 0644, jid_show, jid_store);
 GDLM_ATTR(first,   0644, lkfirst_show, lkfirst_store);
 GDLM_ATTR(first_done,  0444, first_done_show,  NULL);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index f00d7c5..6402fb6 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -54,6 +54,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
 
kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
 
+   if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, 
"lock_dlm"))
+   wait_for_completion(&sdp->sd_wdack);
+
if (lm->lm_unmount) {
fs_err(sdp, "telling LM to unmount\n");
lm->lm_unmount(sdp);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 09/10] GFS2: Get a block reservation before resizing a file

2013-02-19 Thread Steven Whitehouse

From: Bob Peterson 

This patch allocates a block reservation structure before growing
or shrinking a file. Without this structure, the grow or shink code
can reference the bad pointer.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index d29d779..df686d1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1288,6 +1288,10 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
 
inode_dio_wait(inode);
 
+   ret = gfs2_rs_alloc(GFS2_I(inode));
+   if (ret)
+   return ret;
+
oldsize = inode->i_size;
if (newsize >= oldsize)
return do_grow(inode, newsize);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 06/10] GFS2: Clean up freeze code

2013-02-19 Thread Steven Whitehouse

The freeze code has not been looked at a lot recently. Upstream has
moved on, and this is an attempt to catch us back up again. There
is a vfs level interface for the freeze code which can be called
from our (obsolete, but kept for backward compatibility purposes)
sysfs freeze interface. This means freezing this way vs. doing it
from the ioctl should now work in identical fashion.

As a result of this, the freeze function is only called once
and we can drop our own special purpose code for counting the
number of freezes.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 5d129ab..19750bc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -757,10 +757,7 @@ struct gfs2_sbd {
unsigned int sd_replayed_blocks;
 
/* For quiescing the filesystem */
-
struct gfs2_holder sd_freeze_gh;
-   struct mutex sd_freeze_lock;
-   unsigned int sd_freeze_count;
 
char sd_fsname[GFS2_FSNAME_LEN];
char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 0e3554e..5f5aba5 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -115,8 +115,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
INIT_LIST_HEAD(&sdp->sd_revoke_list);
 
-   mutex_init(&sdp->sd_freeze_lock);
-
return sdp;
 }
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4dfda4c..c075b62 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -663,54 +663,6 @@ out:
return error;
 }
 
-/**
- * gfs2_freeze_fs - freezes the file system
- * @sdp: the file system
- *
- * This function flushes data and meta data for all machines by
- * acquiring the transaction log exclusively.  All journals are
- * ensured to be in a clean state as well.
- *
- * Returns: errno
- */
-
-int gfs2_freeze_fs(struct gfs2_sbd *sdp)
-{
-   int error = 0;
-
-   mutex_lock(&sdp->sd_freeze_lock);
-
-   if (!sdp->sd_freeze_count++) {
-   error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
-   if (error)
-   sdp->sd_freeze_count--;
-   }
-
-   mutex_unlock(&sdp->sd_freeze_lock);
-
-   return error;
-}
-
-/**
- * gfs2_unfreeze_fs - unfreezes the file system
- * @sdp: the file system
- *
- * This function allows the file system to proceed by unlocking
- * the exclusively held transaction lock.  Other GFS2 nodes are
- * now free to acquire the lock shared and go on with their lives.
- *
- */
-
-void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
-{
-   mutex_lock(&sdp->sd_freeze_lock);
-
-   if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
-   gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-
-   mutex_unlock(&sdp->sd_freeze_lock);
-}
-
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
struct gfs2_dinode *str = buf;
@@ -888,13 +840,6 @@ static void gfs2_put_super(struct super_block *sb)
int error;
struct gfs2_jdesc *jd;
 
-   /*  Unfreeze the filesystem, if we need to  */
-
-   mutex_lock(&sdp->sd_freeze_lock);
-   if (sdp->sd_freeze_count)
-   gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-   mutex_unlock(&sdp->sd_freeze_lock);
-
/* No more recovery requests */
set_bit(SDF_NORECOVERY, &sdp->sd_flags);
smp_mb();
@@ -985,7 +930,7 @@ static int gfs2_freeze(struct super_block *sb)
return -EINVAL;
 
for (;;) {
-   error = gfs2_freeze_fs(sdp);
+   error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
if (!error)
break;
 
@@ -1013,7 +958,9 @@ static int gfs2_freeze(struct super_block *sb)
 
 static int gfs2_unfreeze(struct super_block *sb)
 {
-   gfs2_unfreeze_fs(sb->s_fs_info);
+   struct gfs2_sbd *sdp = sb->s_fs_info;
+
+   gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
return 0;
 }
 
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index a046468..90e3322 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -46,9 +46,6 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct 
buffer_head *m_bh,
  struct buffer_head *l_bh);
 extern int gfs2_statfs_sync(struct super_block *sb, int type);
 
-extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
-extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
-
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
 extern const struct export_operations gfs2_export_ops;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 8056b7b..462e841 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -91,19 +91,15 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
 
 static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
 {
-   unsigned int count;
-
-   mutex_lock(&sdp->sd_freeze_lock);
-   count = sd

[PATCH 08/10] GFS2: Split glock lru processing into two parts

2013-02-19 Thread Steven Whitehouse

The intent here is to split the processing of the glock lru
list into two parts, so that the selection of glocks and the
disposal are separate functions. The plan is then, that further
updates can then be made to these functions in the future
to improve the selection of glocks and also the efficiency of
glock disposal.

The new feature which this patch brings is sorting the
glocks to be disposed of into glock number (and thus also
disk block number) order. Not all glocks will need i/o in
order to dispose of them, but some will, and at least we'll
generate mostly disk block order i/o now.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3ad8fd3..cf35155 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "gfs2.h"
 #include "incore.h"
@@ -1376,50 +1377,93 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
gfs2_glock_put(gl);
 }
 
+static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+   struct gfs2_glock *gla, *glb;
+
+   gla = list_entry(a, struct gfs2_glock, gl_lru);
+   glb = list_entry(b, struct gfs2_glock, gl_lru);
+
+   if (gla->gl_name.ln_number > glb->gl_name.ln_number)
+   return 1;
+   if (gla->gl_name.ln_number < glb->gl_name.ln_number)
+   return -1;
+
+   return 0;
+}
+
+/**
+ * gfs2_dispose_glock_lru - Demote a list of glocks
+ * @list: The list to dispose of
+ *
+ * Disposing of glocks may involve disk accesses, so that here we sort
+ * the glocks by number (i.e. disk location of the inodes) so that if
+ * there are any such accesses, they'll be sent in order (mostly).
+ *
+ * Must be called under the lru_lock, but may drop and retake this
+ * lock. While the lru_lock is dropped, entries may vanish from the
+ * list, but no new entries will appear on the list (since it is
+ * private)
+ */
+
+static void gfs2_dispose_glock_lru(struct list_head *list)
+__releases(&lru_lock)
+__acquires(&lru_lock)
+{
+   struct gfs2_glock *gl;
+
+   list_sort(NULL, list, glock_cmp);
+
+   while(!list_empty(list)) {
+   gl = list_entry(list->next, struct gfs2_glock, gl_lru);
+   list_del_init(&gl->gl_lru);
+   clear_bit(GLF_LRU, &gl->gl_flags);
+   gfs2_glock_hold(gl);
+   spin_unlock(&lru_lock);
+   spin_lock(&gl->gl_spin);
+   if (demote_ok(gl))
+   handle_callback(gl, LM_ST_UNLOCKED, 0);
+   WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
+   smp_mb__after_clear_bit();
+   if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+   gfs2_glock_put_nolock(gl);
+   spin_unlock(&gl->gl_spin);
+   spin_lock(&lru_lock);
+   }
+}
+
 /**
  * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
  * @nr: The number of entries to scan
  *
+ * This function selects the entries on the LRU which are able to
+ * be demoted, and then kicks off the process by calling
+ * gfs2_dispose_glock_lru() above.
  */
 
 static void gfs2_scan_glock_lru(int nr)
 {
struct gfs2_glock *gl;
-   int may_demote;
-   int nr_skipped = 0;
LIST_HEAD(skipped);
+   LIST_HEAD(dispose);
 
spin_lock(&lru_lock);
while(nr && !list_empty(&lru_list)) {
gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
-   list_del_init(&gl->gl_lru);
-   clear_bit(GLF_LRU, &gl->gl_flags);
-   atomic_dec(&lru_count);
 
/* Test for being demotable */
if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
-   gfs2_glock_hold(gl);
-   spin_unlock(&lru_lock);
-   spin_lock(&gl->gl_spin);
-   may_demote = demote_ok(gl);
-   if (may_demote) {
-   handle_callback(gl, LM_ST_UNLOCKED, 0);
-   nr--;
-   }
-   clear_bit(GLF_LOCK, &gl->gl_flags);
-   smp_mb__after_clear_bit();
-   if (queue_delayed_work(glock_workqueue, &gl->gl_work, 
0) == 0)
-   gfs2_glock_put_nolock(gl);
-   spin_unlock(&gl->gl_spin);
-   spin_lock(&lru_lock);
+   list_move(&gl->gl_lru, &dispose);
+   atomic_dec(&lru_count);
+   nr--;
continue;
}
-   nr_skipped++;
-   list_add(&gl->gl_lru, &skipped);
-   s

[PATCH 07/10] GFS2: Use ->writepages for ordered writes

2013-02-19 Thread Steven Whitehouse

Instead of using a list of buffers to write ahead of the journal
flush, this now uses a list of inodes and calls ->writepages
via filemap_fdatawrite() in order to achieve the same thing. For
most use cases this results in a shorter ordered write list,
as well as much larger i/os being issued.

The ordered write list is sorted by inode number before writing
in order to retain the disk block ordering between inodes as
per the previous code.

The previous ordered write code used to conflict in its assumptions
about how to write out the disk blocks with mpage_writepages()
so that with this updated version we can also use mpage_writepages()
for GFS2's ordered write, writepages implementation. So we will
also send larger i/os from writeback too.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 92340dd..24f414f 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -230,16 +230,14 @@ out_ignore:
 }
 
 /**
- * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
+ * gfs2_writepages - Write a bunch of dirty pages back to disk
  * @mapping: The mapping to write
  * @wbc: Write-back control
  *
- * For the data=writeback case we can already ignore buffer heads
- * and write whole extents at once. This is a big reduction in the
- * number of I/O requests we send and the bmap calls we make in this case.
+ * Used for both ordered and writeback modes.
  */
-static int gfs2_writeback_writepages(struct address_space *mapping,
-struct writeback_control *wbc)
+static int gfs2_writepages(struct address_space *mapping,
+  struct writeback_control *wbc)
 {
return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
 }
@@ -1102,7 +1100,7 @@ cannot_release:
 
 static const struct address_space_operations gfs2_writeback_aops = {
.writepage = gfs2_writeback_writepage,
-   .writepages = gfs2_writeback_writepages,
+   .writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
.write_begin = gfs2_write_begin,
@@ -1118,6 +1116,7 @@ static const struct address_space_operations 
gfs2_writeback_aops = {
 
 static const struct address_space_operations gfs2_ordered_aops = {
.writepage = gfs2_ordered_writepage,
+   .writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
.write_begin = gfs2_write_begin,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 7a86275..d29d779 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -22,6 +22,7 @@
 #include "meta_io.h"
 #include "quota.h"
 #include "rgrp.h"
+#include "log.h"
 #include "super.h"
 #include "trans.h"
 #include "dir.h"
@@ -1137,6 +1138,7 @@ static int trunc_end(struct gfs2_inode *ip)
ip->i_height = 0;
ip->i_goal = ip->i_no_addr;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
+   gfs2_ordered_del_inode(ip);
}
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 19750bc..1533cf8 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -340,6 +340,7 @@ enum {
GIF_QD_LOCKED   = 1,
GIF_ALLOC_FAILED= 2,
GIF_SW_PAGED= 3,
+   GIF_ORDERED = 4,
 };
 
 struct gfs2_inode {
@@ -356,6 +357,7 @@ struct gfs2_inode {
struct gfs2_rgrpd *i_rgd;
u64 i_goal; /* goal block for allocations */
struct rw_semaphore i_rw_mutex;
+   struct list_head i_ordered;
struct list_head i_trunc_list;
__be64 *i_hash_cache;
u32 i_entries;
@@ -722,6 +724,7 @@ struct gfs2_sbd {
struct list_head sd_log_le_revoke;
struct list_head sd_log_le_databuf;
struct list_head sd_log_le_ordered;
+   spinlock_t sd_ordered_lock;
 
atomic_t sd_log_thresh1;
atomic_t sd_log_thresh2;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f4beeb9..9a2ca8b 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -482,70 +482,66 @@ static void log_flush_wait(struct gfs2_sbd *sdp)
}
 }
 
-static int bd_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
-   struct gfs2_bufdata *bda, *bdb;
+   struct gfs2_inode *ipa, *ipb;
 
-   bda = list_entry(a, struct gfs2_bufdata, bd_list);
-   bdb = list_entry(b, struct gfs2_bufdata, bd_list);
+   ipa = list_entry(a, struct gfs2_inode, i_ordered);
+   ipb = list_entry(b, struct gfs2_inode, i_ordered);
 
-   if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr)
+   if (ipa->i_no_addr < ipb->i_no_addr)
return -1;
-   if (bda->bd_bh->b_blocknr &

[PATCH 04/10] GFS2: Copy gfs2_trans_add_bh into new data/meta functions

2013-02-19 Thread Steven Whitehouse

This patch copies the body of gfs2_trans_add_bh into the two newly
added gfs2_trans_add_data and gfs2_trans_add_meta functions. We can
then move the .lo_add functions from lops.c into trans.c and call
them directly.

As a result of this, we no longer need to use the .lo_add functions
at all, so that is removed from the log operations structure.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c373a24..5d129ab 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -52,7 +52,6 @@ struct gfs2_log_header_host {
  */
 
 struct gfs2_log_operations {
-   void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
void (*lo_before_commit) (struct gfs2_sbd *sdp);
void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
void (*lo_before_scan) (struct gfs2_jdesc *jd,
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 9c80742..a505597 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -37,7 +37,7 @@
  *
  * The log lock must be held when calling this function
  */
-static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
+void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
 {
struct gfs2_bufdata *bd;
 
@@ -388,32 +388,6 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd 
*sdp, u32 ld_type,
return page;
 }
 
-static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
-{
-   struct gfs2_meta_header *mh;
-   struct gfs2_trans *tr;
-
-   tr = current->journal_info;
-   tr->tr_touched = 1;
-   if (!list_empty(&bd->bd_list))
-   return;
-   set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
-   set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
-   mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
-   if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
-   printk(KERN_ERR
-  "Attempting to add uninitialised block to journal 
(inplace block=%lld)\n",
-  (unsigned long long)bd->bd_bh->b_blocknr);
-   BUG();
-   }
-   gfs2_pin(sdp, bd->bd_bh);
-   mh->__pad0 = cpu_to_be64(0);
-   mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
-   sdp->sd_log_num_buf++;
-   list_add(&bd->bd_list, &sdp->sd_log_le_buf);
-   tr->tr_num_buf_new++;
-}
-
 static void gfs2_check_magic(struct buffer_head *bh)
 {
void *kaddr;
@@ -735,44 +709,6 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, 
int error, int pass)
 }
 
 /**
- * databuf_lo_add - Add a databuf to the transaction.
- *
- * This is used in two distinct cases:
- * i) In ordered write mode
- *We put the data buffer on a list so that we can ensure that its
- *synced to disk at the right time
- * ii) In journaled data mode
- *We need to journal the data block in the same way as metadata in
- *the functions above. The difference is that here we have a tag
- *which is two __be64's being the block number (as per meta data)
- *and a flag which says whether the data block needs escaping or
- *not. This means we need a new log entry for each 251 or so data
- *blocks, which isn't an enormous overhead but twice as much as
- *for normal metadata blocks.
- */
-static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
-{
-   struct gfs2_trans *tr = current->journal_info;
-   struct address_space *mapping = bd->bd_bh->b_page->mapping;
-   struct gfs2_inode *ip = GFS2_I(mapping->host);
-
-   if (tr)
-   tr->tr_touched = 1;
-   if (!list_empty(&bd->bd_list))
-   return;
-   set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
-   set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
-   if (gfs2_is_jdata(ip)) {
-   gfs2_pin(sdp, bd->bd_bh);
-   tr->tr_num_databuf_new++;
-   sdp->sd_log_num_databuf++;
-   list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
-   } else {
-   list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
-   }
-}
-
-/**
  * databuf_lo_before_commit - Scan the data buffers, writing as we go
  *
  */
@@ -871,7 +807,6 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, 
struct gfs2_ail *ai)
 
 
 const struct gfs2_log_operations gfs2_buf_lops = {
-   .lo_add = buf_lo_add,
.lo_before_commit = buf_lo_before_commit,
.lo_after_commit = buf_lo_after_commit,
.lo_before_scan = buf_lo_before_scan,
@@ -894,7 +829,6 @@ const struct gfs2_log_operations gfs2_rg_lops = {
 };
 
 const struct gfs2_log_operations gfs2_databuf_lops = {
-   .lo_add = databuf_lo_add,
.lo_before_commit = databuf_lo_before_commit,
.lo_after_commit = databuf_lo_after_commit,
.lo_scan_elements = databuf_lo_scan_elements,
diff --git a/fs/gfs2/lops.h b

GFS2: Pre-pull patch posting (merge window)

2013-02-19 Thread Steven Whitehouse

This is one of the smallest collections of patches for the merge
window for some time. There are some clean ups relating to the
transaction code and the shrinker, which are mostly in preparation
for further development, but also make the code much easier to
follow in these areas.

There is a patch which allows the use of ->writepages even in the
default ordered write mode for all writebacks. This results in
sending larger i/os to the block layer, and a subsequent increase
in performance. It also reduces the number of different i/o paths
by one.

There is also a bug fix reinstating the withdraw ack system which
somehow got lost when the lock modules were merged into GFS2.

And thats all this time around,

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 03/10] GFS2: Split gfs2_trans_add_bh() into two

2013-02-19 Thread Steven Whitehouse

There is little common content in gfs2_trans_add_bh() between the data
and meta classes by the time that the functions which it calls are
taken into account. The intent here is to split this into two
separate functions. Stage one is to introduce gfs2_trans_add_data()
and gfs2_trans_add_meta() and update the callers accordingly.

Later patches will then pull in the content of gfs2_trans_add_bh()
and its dependent functions in order to clean up the code in this
area.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 30de4f2..92340dd 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -51,7 +51,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, 
struct page *page,
continue;
if (gfs2_is_jdata(ip))
set_buffer_uptodate(bh);
-   gfs2_trans_add_bh(ip->i_gl, bh, 0);
+   gfs2_trans_add_data(ip->i_gl, bh);
}
 }
 
@@ -852,7 +852,7 @@ static int gfs2_write_end(struct file *file, struct 
address_space *mapping,
goto failed;
}
 
-   gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+   gfs2_trans_add_meta(ip->i_gl, dibh);
 
if (gfs2_is_stuffed(ip))
return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, 
page);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index a68e91b..7a86275 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -93,7 +93,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct 
buffer_head *dibh,
if (!gfs2_is_jdata(ip))
mark_buffer_dirty(bh);
if (!gfs2_is_writeback(ip))
-   gfs2_trans_add_bh(ip->i_gl, bh, 0);
+   gfs2_trans_add_data(ip->i_gl, bh);
 
if (release) {
unlock_page(page);
@@ -153,7 +153,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page 
*page)
 
/*  Set up the pointer to the new block  */
 
-   gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+   gfs2_trans_add_meta(ip->i_gl, dibh);
di = (struct gfs2_dinode *)dibh->b_data;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
 
@@ -405,7 +405,7 @@ static inline __be64 *gfs2_indirect_init(struct metapath 
*mp,
BUG_ON(i < 1);
BUG_ON(mp->mp_bh[i] != NULL);
mp->mp_bh[i] = gfs2_meta_new(gl, bn);
-   gfs2_trans_add_bh(gl, mp->mp_bh[i], 1);
+   gfs2_trans_add_meta(gl, mp->mp_bh[i]);
gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header));
ptr += offset;
@@ -468,7 +468,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const 
sector_t lblock,
BUG_ON(sheight < 1);
BUG_ON(dibh == NULL);
 
-   gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+   gfs2_trans_add_meta(ip->i_gl, dibh);
 
if (height == sheight) {
struct buffer_head *bh;
@@ -544,7 +544,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const 
sector_t lblock,
/* Branching from existing tree */
case ALLOC_GROW_DEPTH:
if (i > 1 && i < height)
-   gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1);
+   gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]);
for (; i < height && n > 0; i++, n--)
gfs2_indirect_init(mp, ip->i_gl, i,
   mp->mp_list[i-1], bn++);
@@ -556,7 +556,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const 
sector_t lblock,
case ALLOC_DATA:
BUG_ON(n > dblks);
BUG_ON(mp->mp_bh[end_of_metadata] == NULL);
-   gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 
1);
+   gfs2_trans_add_meta(ip->i_gl, 
mp->mp_bh[end_of_metadata]);
dblks = n;
ptr = metapointer(end_of_metadata, mp);
dblock = bn;
@@ -796,8 +796,8 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
 
down_write(&ip->i_rw_mutex);
 
-   gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-   gfs2_trans_add_bh(ip->i_gl, bh, 1);
+   gfs2_trans_add_meta(ip->i_gl, dibh);
+   gfs2_trans_add_meta(ip->i_gl, bh);
 
bstart = 0;
blen = 0;
@@ -981,7 +981,7 @@ static int gfs2_block_truncate_page(struct address_space 
*mapping, loff_t from)
}
 
if (!gfs2_is_writeback(ip))
-   gfs2_trans_add_bh(ip->i_gl, bh, 0);
+   gfs2_trans_add_data(ip->i_gl, bh);
 
zero_user(page, offset, length);
mark_buffer_dirty(bh);
@@ -1046,7 +1046,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, 
u64 newsize)
if (error)

[PATCH 01/10] GFS2: Separate LRU scanning from shrinker

2013-02-19 Thread Steven Whitehouse

This breaks out the LRU scanning function from the shrinker in
preparation for adding other callers to the LRU scanner.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 992c5c0..3ad8fd3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1376,23 +1376,19 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
gfs2_glock_put(gl);
 }
 
+/**
+ * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
+ * @nr: The number of entries to scan
+ *
+ */
 
-static int gfs2_shrink_glock_memory(struct shrinker *shrink,
-   struct shrink_control *sc)
+static void gfs2_scan_glock_lru(int nr)
 {
struct gfs2_glock *gl;
int may_demote;
int nr_skipped = 0;
-   int nr = sc->nr_to_scan;
-   gfp_t gfp_mask = sc->gfp_mask;
LIST_HEAD(skipped);
 
-   if (nr == 0)
-   goto out;
-
-   if (!(gfp_mask & __GFP_FS))
-   return -1;
-
spin_lock(&lru_lock);
while(nr && !list_empty(&lru_list)) {
gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
@@ -1425,7 +1421,17 @@ static int gfs2_shrink_glock_memory(struct shrinker 
*shrink,
list_splice(&skipped, &lru_list);
atomic_add(nr_skipped, &lru_count);
spin_unlock(&lru_lock);
-out:
+}
+
+static int gfs2_shrink_glock_memory(struct shrinker *shrink,
+   struct shrink_control *sc)
+{
+   if (sc->nr_to_scan) {
+   if (!(sc->gfp_mask & __GFP_FS))
+   return -1;
+   gfs2_scan_glock_lru(sc->nr_to_scan);
+   }
+
return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
 }
 
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

GFS2: Pull request (merge window)

2013-02-19 Thread Steven Whitehouse

Hi,

Please consider pulling the following GFS2 patches for the merge window,

Steve.

---
This is one of the smallest collections of patches for the merge
window for some time. There are some clean ups relating to the
transaction code and the shrinker, which are mostly in preparation
for further development, but also make the code much easier to
follow in these areas.

There is a patch which allows the use of ->writepages even in the
default ordered write mode for all writebacks. This results in
sending larger i/os to the block layer, and a subsequent increase
in performance. It also reduces the number of different i/o paths
by one.

There is also a bug fix reinstating the withdraw ack system which
somehow got lost when the lock modules were merged into GFS2.

---
The following changes since commit 6abb7c25775b7fb2225ad0508236d63ca710e65f:

  Merge tag 'regulator-3.8-rc5' of 
git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator (2013-01-28 
22:44:53 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw.git master

Bob Peterson (1):
  GFS2: Get a block reservation before resizing a file

Steven Whitehouse (9):
  GFS2: Separate LRU scanning from shrinker
  GFS2: Merge revoke adding functions
  GFS2: Split gfs2_trans_add_bh() into two
  GFS2: Copy gfs2_trans_add_bh into new data/meta functions
  GFS2: Merge gfs2_attach_bufdata() into trans.c
  GFS2: Clean up freeze code
  GFS2: Use ->writepages for ordered writes
  GFS2: Split glock lru processing into two parts
  GFS2: Reinstate withdraw ack system

 fs/gfs2/aops.c   |   17 +++
 fs/gfs2/bmap.c   |   30 +++-
 fs/gfs2/dir.c|   30 ++--
 fs/gfs2/file.c   |4 +-
 fs/gfs2/glock.c  |  116 +-
 fs/gfs2/incore.h |8 ++--
 fs/gfs2/inode.c  |8 ++--
 fs/gfs2/log.c|   76 ++
 fs/gfs2/log.h|   12 +
 fs/gfs2/lops.c   |   83 +-
 fs/gfs2/lops.h   |   14 +-
 fs/gfs2/meta_io.c|   35 --
 fs/gfs2/meta_io.h|3 -
 fs/gfs2/ops_fstype.c |4 +-
 fs/gfs2/quota.c  |4 +-
 fs/gfs2/rgrp.c   |   18 
 fs/gfs2/super.c  |   70 
 fs/gfs2/super.h  |3 -
 fs/gfs2/sys.c|   48 ++--
 fs/gfs2/trans.c  |  124 +-
 fs/gfs2/trans.h  |3 +-
 fs/gfs2/util.c   |3 +
 fs/gfs2/xattr.c  |   36 +++---
 23 files changed, 375 insertions(+), 374 deletions(-)



signature.asc
Description: This is a digitally signed message part

Re: [PATCH] decnet: fix shutdown parameter checking

2012-09-05 Thread Steven Whitehouse

Hi,

On Fri, 2012-08-31 at 15:57 -0400, David Miller wrote:
> From: Steven Whitehouse 
> Date: Mon, 27 Aug 2012 10:16:41 +0100
> 
> > On Sun, 2012-08-26 at 22:37 -0400, Xi Wang wrote:
> >> The allowed value of "how" is SHUT_RD/SHUT_WR/SHUT_RDWR (0/1/2),
> >> rather than SHUTDOWN_MASK (3).
> >> 
> >> Signed-off-by: Xi Wang 
> > Acked-by: Steven Whitehouse 
> 
> Applied to net-next.
> 
> > Although it could be argued that we should also continue to accept the
> > value 3 just in case there is any userland software out there which
> > sends that value,
> 
> True, but this is a rather standard BSD socket interface with a very
> specific small set of legitimate input parameters.  Allowing
> deviation, even for compatability for specific protocols, is largely
> unwise.

Yes, I'd agree on the whole, and certainly if this was a recent
addition. However since this code has been around for somewhere close to
16 years now, I'd say that means that either (a) nobody calls shutdown
for DECnet or (b) existing users are buggy too.

We do have a precedent for this kind of compatibility, such as the AX.25
use of SOCK_SEQPACKET.

However, I'm not overly worried and we'll soon know if it will cause any
problems or not,

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] fs: encode_fh: return FILEID_INVALID if invalid fid_type

2012-09-05 Thread Steven Whitehouse

Hi,

On Fri, 2012-08-31 at 12:46 -0400, Namjae Jeon wrote:
> This patch is a follow up on below patch:
> 
> [PATCH] exportfs: add FILEID_INVALID to indicate invalid fid_type
> https://patchwork.kernel.org/patch/1385131/
> 
> Signed-off-by: Namjae Jeon 
> Signed-off-by: Vivek Trivedi 
Acked-by: Steven Whitehouse 

for the gfs2 bits,

Steve.

> ---
>  fs/btrfs/export.c   |4 ++--
>  fs/ceph/export.c|4 ++--
>  fs/fuse/inode.c |2 +-
>  fs/gfs2/export.c|4 ++--
>  fs/isofs/export.c   |4 ++--
>  fs/nilfs2/namei.c   |4 ++--
>  fs/ocfs2/export.c   |4 ++--
>  fs/reiserfs/inode.c |4 ++--
>  fs/udf/namei.c  |4 ++--
>  fs/xfs/xfs_export.c |4 ++--
>  mm/cleancache.c |2 +-
>  mm/shmem.c  |2 +-
>  12 files changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
> index 614f34a..81ee29e 100644
> --- a/fs/btrfs/export.c
> +++ b/fs/btrfs/export.c
> @@ -22,10 +22,10 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, 
> int *max_len,
>  
>   if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
>   *max_len = BTRFS_FID_SIZE_CONNECTABLE;
> - return 255;
> + return FILEID_INVALID;
>   } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
>   *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
> - return 255;
> + return FILEID_INVALID;
>   }
>  
>   len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
> diff --git a/fs/ceph/export.c b/fs/ceph/export.c
> index 8e1b60e..98bde89 100644
> --- a/fs/ceph/export.c
> +++ b/fs/ceph/export.c
> @@ -79,7 +79,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, 
> int *max_len,
>   if (parent_inode) {
>   /* nfsd wants connectable */
>   *max_len = connected_handle_length;
> - type = 255;
> + type = FILEID_INVALID;
>   } else {
>   dout("encode_fh %p\n", dentry);
>   fh->ino = ceph_ino(inode);
> @@ -88,7 +88,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, 
> int *max_len,
>   }
>   } else {
>   *max_len = handle_length;
> - type = 255;
> + type = FILEID_INVALID;
>   }
>   return type;
>  }
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 2690a76..b787a6f 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -672,7 +672,7 @@ static int fuse_encode_fh(struct inode *inode, u32 *fh, 
> int *max_len,
>  
>   if (*max_len < len) {
>   *max_len = len;
> - return  255;
> + return  FILEID_INVALID;
>   }
>  
>   nodeid = get_fuse_inode(inode)->nodeid;
> diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
> index e8ed6d4..f7a8092 100644
> --- a/fs/gfs2/export.c
> +++ b/fs/gfs2/export.c
> @@ -37,10 +37,10 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, 
> int *len,
>  
>   if (parent && (*len < GFS2_LARGE_FH_SIZE)) {
>   *len = GFS2_LARGE_FH_SIZE;
> - return 255;
> + return FILEID_INVALID;
>   } else if (*len < GFS2_SMALL_FH_SIZE) {
>   *len = GFS2_SMALL_FH_SIZE;
> - return 255;
> + return FILEID_INVALID;
>   }
>  
>   fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
> diff --git a/fs/isofs/export.c b/fs/isofs/export.c
> index 1d38044..5e693b3 100644
> --- a/fs/isofs/export.c
> +++ b/fs/isofs/export.c
> @@ -125,10 +125,10 @@ isofs_export_encode_fh(struct inode *inode,
>*/
>   if (parent && (len < 5)) {
>   *max_len = 5;
> - return 255;
> + return FILEID_INVALID;
>   } else if (len < 3) {
>   *max_len = 3;
> - return 255;
> + return FILEID_INVALID;
>   }
>  
>   len = 3;
> diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
> index 1d0c0b8..9de78f0 100644
> --- a/fs/nilfs2/namei.c
> +++ b/fs/nilfs2/namei.c
> @@ -517,11 +517,11 @@ static int nilfs_encode_fh(struct inode *inode, __u32 
> *fh, int *lenp,
>  
>   if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) {
>   *lenp = NILFS_FID_SIZE_CONNECTABLE;
> - return 255;
> + return FILEID_INVALID;
>   }
>   if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) {
>   *lenp = NILFS_FID_SIZE_NON_CONNECTABLE;
> - return 255;
> + return FILEID_INVALID;

[PATCH 2/3] GFS2: Fix missing allocation data for set/remove xattr

2012-09-13 Thread Steven Whitehouse

These entry points were missed in the original patch to allocate
this data structure.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4ce22e5..753af3d 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1722,7 +1722,9 @@ static int gfs2_setxattr(struct dentry *dentry, const 
char *name,
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret == 0) {
-   ret = generic_setxattr(dentry, name, data, size, flags);
+   ret = gfs2_rs_alloc(ip);
+   if (ret == 0)
+   ret = generic_setxattr(dentry, name, data, size, flags);
gfs2_glock_dq(&gh);
}
gfs2_holder_uninit(&gh);
@@ -1757,7 +1759,9 @@ static int gfs2_removexattr(struct dentry *dentry, const 
char *name)
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret == 0) {
-   ret = generic_removexattr(dentry, name);
+   ret = gfs2_rs_alloc(ip);
+   if (ret == 0)
+   ret = generic_removexattr(dentry, name);
gfs2_glock_dq(&gh);
}
gfs2_holder_uninit(&gh);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/3] GFS2: Take account of blockages when using reserved blocks

2012-09-13 Thread Steven Whitehouse

The claim_reserved_blks() function was not taking account of
the possibility of "blockages" while performing allocation.
This can be caused by another node allocating something in
the same extent which has been reserved locally.

This patch tests for this condition and then skips the remainder
of the reservation in this case. This is a relatively rare event,
so that it should not affect the general performance improvement
which the block reservations provide.

The claim_reserved_blks() function also appears not to be able
to deal with reservations which cross bitmap boundaries, but
that can be dealt with in a future patch since we don't generate
boundary crossing reservations currently.

Signed-off-by: Steven Whitehouse 
Reported-by: David Teigland 
Cc: Bob Peterson 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4d34887..c9ed814 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1961,7 +1961,7 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
  * @dinode: 1 if this block is a dinode block, otherwise data block
  * @nblocks: desired extent length
  *
- * Lay claim to previously allocated block reservation blocks.
+ * Lay claim to previously reserved blocks.
  * Returns: Starting block number of the blocks claimed.
  * Sets *nblocks to the actual extent length allocated.
  */
@@ -1970,19 +1970,17 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, 
bool dinode,
 {
struct gfs2_blkreserv *rs = ip->i_res;
struct gfs2_rgrpd *rgd = rs->rs_rgd;
-   struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_bitmap *bi;
u64 start_block = gfs2_rs_startblk(rs);
const unsigned int elen = *nblocks;
 
-   /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
-   gfs2_assert_withdraw(sdp, rgd);
-   /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
bi = rs->rs_bi;
gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
 
for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
-   /* Make sure the bitmap hasn't changed */
+   if (gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
+bi->bi_len, rs->rs_biblk) != GFS2_BLKST_FREE)
+   break;
gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
rs->rs_biblk++;
@@ -1991,20 +1989,12 @@ static u64 claim_reserved_blks(struct gfs2_inode *ip, 
bool dinode,
BUG_ON(!rgd->rd_reserved);
rgd->rd_reserved--;
dinode = false;
-   trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
}
 
-   if (!rs->rs_free) {
-   struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
-
+   trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
+   if (!rs->rs_free || *nblocks != elen)
gfs2_rs_deltree(rs);
-   /* -nblocks because we haven't returned to do the math yet.
-  I'm doing the math backwards to prevent negative numbers,
-  but think of it as:
-  if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
-   if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
-   rg_mblk_search(rgd, ip);
-   }
+
return start_block;
 }
 
@@ -2037,34 +2027,34 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, 
unsigned int *nblocks,
if (ip->i_res->rs_requested == 0)
return -ECANCELED;
 
-   /* Check if we have a multi-block reservation, and if so, claim the
-  next free block from it. */
+   /* If we have a reservation, claim blocks from it. */
if (gfs2_rs_active(ip->i_res)) {
BUG_ON(!ip->i_res->rs_free);
rgd = ip->i_res->rs_rgd;
block = claim_reserved_blks(ip, dinode, nblocks);
-   } else {
-   rgd = ip->i_rgd;
+   if (*nblocks)
+   goto found_blocks;
+   }
 
-   if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
-   goal = ip->i_goal - rgd->rd_data0;
-   else
-   goal = rgd->rd_last_alloc;
-
-   blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
-
-   /* Since all blocks are reserved in advance, this shouldn't
-  happen */
-   if (blk == BFITNOENT) {
-   printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
-  *nblocks);
-   printk(KERN_WARNING "FULL=%d\n",
-  test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
-   goto rgrp_error;
-   }
+   rgd = ip->i_rgd;
 
-   blo

GFS2: Pre-pull patch posting (fixes)

2012-09-13 Thread Steven Whitehouse

Hi,

Here are three GFS2 fixes for the current kernel tree. These are all
related to the block reservation code which was added at the merge
window. That code will be getting an update at the forthcoming merge
window too. In the mean time though there are a few smaller issues
which should be fixed.

The first patch resolves an issue with write sizes of greater than
32 bits with the size hinting code. The second ensures that the
allocation data structure is initialised when using xattrs and the
third takes into account allocations which may have been made by
other nodes which affect a reservation on the local node,

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/3] GFS2: Make write size hinting code common

2012-09-13 Thread Steven Whitehouse

This collects up the write size hinting code which is used by the
block reservation subsystem into a single function. At the same
time this also corrects the rounding for this calculation.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d1d791e..382000f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -323,6 +323,29 @@ static long gfs2_ioctl(struct file *filp, unsigned int 
cmd, unsigned long arg)
 }
 
 /**
+ * gfs2_size_hint - Give a hint to the size of a write request
+ * @file: The struct file
+ * @offset: The file offset of the write
+ * @size: The length of the write
+ *
+ * When we are about to do a write, this function records the total
+ * write size in order to provide a suitable hint to the lower layers
+ * about how many blocks will be required.
+ *
+ */
+
+static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
+{
+   struct inode *inode = filep->f_dentry->d_inode;
+   struct gfs2_sbd *sdp = GFS2_SB(inode);
+   struct gfs2_inode *ip = GFS2_I(inode);
+   size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> 
sdp->sd_sb.sb_bsize_shift;
+   int hint = min_t(size_t, INT_MAX, blks);
+
+   atomic_set(&ip->i_res->rs_sizehint, hint);
+}
+
+/**
  * gfs2_allocate_page_backing - Use bmap to allocate blocks
  * @page: The (locked) page to allocate backing for
  *
@@ -382,8 +405,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, 
struct vm_fault *vmf)
if (ret)
return ret;
 
-   atomic_set(&ip->i_res->rs_sizehint,
-  PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
+   gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
 
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
@@ -663,7 +685,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, 
const struct iovec *iov,
if (ret)
return ret;
 
-   atomic_set(&ip->i_res->rs_sizehint, writesize >> 
sdp->sd_sb.sb_bsize_shift);
+   gfs2_size_hint(file, pos, writesize);
+
if (file->f_flags & O_APPEND) {
struct gfs2_holder gh;
 
@@ -789,7 +812,7 @@ static long gfs2_fallocate(struct file *file, int mode, 
loff_t offset,
if (unlikely(error))
goto out_uninit;
 
-   atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift);
+   gfs2_size_hint(file, offset, len);
 
while (len > 0) {
if (len < bytes)
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

GFS2: Pull request (fixes)

2012-09-13 Thread Steven Whitehouse

Hi,

Please consider pulling the following GFS2 fixes

Steve.

--

Here are three GFS2 fixes for the current kernel tree. These are all
related to the block reservation code which was added at the merge
window. That code will be getting an update at the forthcoming merge
window too. In the mean time though there are a few smaller issues
which should be fixed.

The first patch resolves an issue with write sizes of greater than
32 bits with the size hinting code. The second ensures that the
allocation data structure is initialised when using xattrs and the
third takes into account allocations which may have been made by
other nodes which affect a reservation on the local node.

The following changes since commit 0d7614f09c1ebdbaa1599a5aba7593f147bf96ee:

  Linux 3.6-rc1 (2012-08-02 16:38:10 -0700)

are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-fixes.git master

Steven Whitehouse (3):
  GFS2: Make write size hinting code common
  GFS2: Fix missing allocation data for set/remove xattr
  GFS2: Take account of blockages when using reserved blocks

 fs/gfs2/file.c  |   31 ++---
 fs/gfs2/inode.c |8 +-
 fs/gfs2/rgrp.c  |   66 +++---
 3 files changed, 61 insertions(+), 44 deletions(-)



signature.asc
Description: This is a digitally signed message part

Re: [PATCH] gfs2: be*_add_cpu conversion

2008-02-13 Thread Steven Whitehouse

Hi,

Now in the GFS2 -nmw git tree. Thanks,

Steve.

On Wed, 2008-02-13 at 00:06 +0100, [EMAIL PROTECTED] wrote:
> From: Marcin Slusarz <[EMAIL PROTECTED]>
> 
> replace all:
> big_endian_variable = cpu_to_beX(beX_to_cpu(big_endian_variable) +
>   expression_in_cpu_byteorder);
> with:
>   beX_add_cpu(&big_endian_variable, expression_in_cpu_byteorder);
> generated with semantic patch
> 
> Signed-off-by: Marcin Slusarz <[EMAIL PROTECTED]>
> Cc: Steven Whitehouse <[EMAIL PROTECTED]>
> Cc: [EMAIL PROTECTED]
> ---
>  fs/gfs2/dir.c |6 +++---
>  1 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
> index c347095..6f2e382 100644
> --- a/fs/gfs2/dir.c
> +++ b/fs/gfs2/dir.c
> @@ -1021,13 +1021,13 @@ static int dir_split_leaf(struct inode *inode, const 
> struct qstr *name)
>  
>   new->de_inum = dent->de_inum; /* No endian worries */
>   new->de_type = dent->de_type; /* No endian worries */
> - nleaf->lf_entries = 
> cpu_to_be16(be16_to_cpu(nleaf->lf_entries)+1);
> + be16_add_cpu(&nleaf->lf_entries, 1);
>  
>   dirent_del(dip, obh, prev, dent);
>  
>   if (!oleaf->lf_entries)
>   gfs2_consist_inode(dip);
> - oleaf->lf_entries = 
> cpu_to_be16(be16_to_cpu(oleaf->lf_entries)-1);
> + be16_add_cpu(&oleaf->lf_entries, -1);
>  
>   if (!prev)
>   prev = dent;
> @@ -1616,7 +1616,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr 
> *name,
>   dent->de_type = cpu_to_be16(type);
>   if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
>   leaf = (struct gfs2_leaf *)bh->b_data;
> - leaf->lf_entries = 
> cpu_to_be16(be16_to_cpu(leaf->lf_entries) + 1);
> + be16_add_cpu(&leaf->lf_entries, 1);
>   }
>   brelse(bh);
>   error = gfs2_meta_inode_buffer(ip, &bh);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: gfs2_fh_to_parent() array overflow

2008-02-14 Thread Steven Whitehouse

Hi,

On Wed, 2008-02-13 at 23:31 +0200, Adrian Bunk wrote:
> On Mon, Oct 29, 2007 at 09:11:21AM +0000, Steven Whitehouse wrote:
> > Hi,
> > 
> > On Sat, 2007-10-27 at 23:00 +0200, Christoph Hellwig wrote:
> > > On Wed, Oct 24, 2007 at 06:26:26PM +0200, Adrian Bunk wrote:
> > > > The Coverity checker spotted the following array overflow caused by
> > > > commit 34c0d154243dd913c5690ae6ceb9557017429b9c:
> > > 
> > > The line is a left-over from times when gfs stored the mode of the
> > > inode in the file handle.  It can simply be deleted.  Steve, do you
> > > want a patch for that or could you commit that one-liner directly?
> > > 
> > 
> > I'm just back from holiday this morning and this is looking a bit more
> > complicated than that... give me a day or two and I'll try and come up
> > with a solution,
> 
> This issue is still present in 2.6.25-rc1.
> 
Yes, it seems to have slipped off my list somehow... I've opened a bz
(#432775 at bugzilla.redhat.com) to ensure that it doesn't get missed
again,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: VFS hot tracking: How to calculate data temperature?

2012-11-05 Thread Steven Whitehouse

Hi,

On Mon, 2012-11-05 at 16:44 +0800, Zhi Yong Wu wrote:
> On Mon, Nov 5, 2012 at 4:28 PM, Dave Chinner  wrote:
> > On Mon, Nov 05, 2012 at 10:35:50AM +0800, Zhi Yong Wu wrote:
> >> On Sat, Nov 3, 2012 at 5:27 AM, Mingming.cao  wrote:
> >> > On Fri, 2012-11-02 at 14:38 +0800, Zhi Yong Wu wrote:
> >> >> Here also has another question.
> >> >>
> >> >> How to save the file temperature among the umount to be able to
> >> >> preserve the file tempreture after reboot?
> >> >>
> >> >> This above is the requirement from DB product.
> >> >> I thought that we can save file temperature in its inode struct, that
> >> >> is, add one new field in struct inode, then this info will be written
> >> >> to disk with inode.
> >> >>
> >> >> Any comments or ideas are appreciated, thanks.
> >> >>
> >> >>
> >> >
> >> > Maybe could save the last file temperature with extended attributes.
> >> It seems that only ext4 has the concept of extended attributes.
> >
> > All major filesystems have xattr support. They are used extensively
> > by the security and integrity subsystems, for example.
> got it, thanks.
> >
> > Saving the information might be something that is useful to certian
> > applications, but lets have the people that need that functionality
> > spell out their requirements before discussing how or what to
> > implement.  Indeed, discussion shoul dreally focus on getting the
> > core, in-memory infrastructure sorted out first before trying to
> > expand the functionality further...
> ah, but the latest patchset need some love from experienced FS guys:)...

There is one other possible issue with saving the data into the
filesystem, which is that it may disturb what you are trying to measure.
Some filesystems (GFS2 is one) store data for small inodes in the same
block as the inode itself. So that means the accesses to the saved hot
tracking info may potentially affect the data access times too. Also
there is a very limited amount of space to expand the number of fields
in the inode, so xattr may be the only solution, depending on how much
data needs to be stored in each case.

In the GFS2 case (I don't think it is unique in this) xattrs are stored
out of line and having to access them in every open means an extra block
read per inode, which again has performance implications.

So that is not an insurmountable problem, but something to take into
account in selecting a solution,

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC v4+ hot_track 03/19] vfs: add I/O frequency update function

2012-11-05 Thread Steven Whitehouse

Hi,

On Mon, 2012-10-29 at 12:30 +0800, zwu.ker...@gmail.com wrote:
> From: Zhi Yong Wu 
> 
>   Add some util helpers to update access frequencies
> for one file or its range.
> 
> Signed-off-by: Zhi Yong Wu 
> ---
>  fs/hot_tracking.c|  179 
> ++
>  fs/hot_tracking.h|7 ++
>  include/linux/hot_tracking.h |2 +
>  3 files changed, 188 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
> index 68591f0..0a7d9a3 100644
> --- a/fs/hot_tracking.c
> +++ b/fs/hot_tracking.c
> @@ -172,6 +172,137 @@ static void hot_inode_tree_exit(struct hot_info *root)
>   }
>  }
>  
> +struct hot_inode_item
> +*hot_inode_item_find(struct hot_info *root, u64 ino)
> +{
> + struct hot_inode_item *he;
> + int ret;
> +
> +again:
> + spin_lock(&root->lock);
> + he = radix_tree_lookup(&root->hot_inode_tree, ino);
> + if (he) {
> + kref_get(&he->hot_inode.refs);
> + spin_unlock(&root->lock);
> + return he;
> + }
> + spin_unlock(&root->lock);
> +
> + he = kmem_cache_zalloc(hot_inode_item_cachep,
> + GFP_KERNEL | GFP_NOFS);
This doesn't look quite right... which of these two did you mean? I
assume probably just GFP_NOFS

> + if (!he)
> + return ERR_PTR(-ENOMEM);
> +
> + hot_inode_item_init(he, ino, &root->hot_inode_tree);
> +
> + ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
> + if (ret) {
> + kmem_cache_free(hot_inode_item_cachep, he);
> + return ERR_PTR(ret);
> + }
> +
> + spin_lock(&root->lock);
> + ret = radix_tree_insert(&root->hot_inode_tree, ino, he);
> + if (ret == -EEXIST) {
> + kmem_cache_free(hot_inode_item_cachep, he);
> + spin_unlock(&root->lock);
> + radix_tree_preload_end();
> + goto again;
> + }
> + spin_unlock(&root->lock);
> + radix_tree_preload_end();
> +
> + kref_get(&he->hot_inode.refs);
> + return he;
> +}
> +EXPORT_SYMBOL_GPL(hot_inode_item_find);
> +
> +static struct hot_range_item
> +*hot_range_item_find(struct hot_inode_item *he,
> + u32 start)
> +{
> + struct hot_range_item *hr;
> + int ret;
> +
> +again:
> + spin_lock(&he->lock);
> + hr = radix_tree_lookup(&he->hot_range_tree, start);
> + if (hr) {
> + kref_get(&hr->hot_range.refs);
> + spin_unlock(&he->lock);
> + return hr;
> + }
> + spin_unlock(&he->lock);
> +
> + hr = kmem_cache_zalloc(hot_range_item_cachep,
> + GFP_KERNEL | GFP_NOFS);
Likewise, here too.

Steve.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC v4+ hot_track 09/19] vfs: add one work queue

2012-11-05 Thread Steven Whitehouse

Hi,

On Mon, 2012-10-29 at 12:30 +0800, zwu.ker...@gmail.com wrote:
> From: Zhi Yong Wu 
> 
>   Add a per-superblock workqueue and a delayed_work
> to run periodic work to update map info on each superblock.
> 
> Signed-off-by: Zhi Yong Wu 
> ---
>  fs/hot_tracking.c|   85 
> ++
>  fs/hot_tracking.h|3 +
>  include/linux/hot_tracking.h |3 +
>  3 files changed, 91 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
> index fff0038..0ef9cad 100644
> --- a/fs/hot_tracking.c
> +++ b/fs/hot_tracking.c
> @@ -15,9 +15,12 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include "hot_tracking.h"
>  
> @@ -557,6 +560,67 @@ static void hot_map_array_exit(struct hot_info *root)
>   }
>  }
>  
> +/* Temperature compare function*/
> +static int hot_temp_cmp(void *priv, struct list_head *a,
> + struct list_head *b)
> +{
> + struct hot_comm_item *ap =
> + container_of(a, struct hot_comm_item, n_list);
> + struct hot_comm_item *bp =
> + container_of(b, struct hot_comm_item, n_list);
> +
> + int diff = ap->hot_freq_data.last_temp
> + - bp->hot_freq_data.last_temp;
> + if (diff > 0)
> + return -1;
> + if (diff < 0)
> + return 1;
> + return 0;
> +}
> +
> +/*
> + * Every sync period we update temperatures for
> + * each hot inode item and hot range item for aging
> + * purposes.
> + */
> +static void hot_update_worker(struct work_struct *work)
> +{
> + struct hot_info *root = container_of(to_delayed_work(work),
> + struct hot_info, update_work);
> + struct hot_inode_item *hi_nodes[8];
> + u64 ino = 0;
> + int i, n;
> +
> + while (1) {
> + n = radix_tree_gang_lookup(&root->hot_inode_tree,
> +(void **)hi_nodes, ino,
> +ARRAY_SIZE(hi_nodes));
> + if (!n)
> + break;
> +
> + ino = hi_nodes[n - 1]->i_ino + 1;
> + for (i = 0; i < n; i++) {
> + kref_get(&hi_nodes[i]->hot_inode.refs);
> + hot_map_array_update(
> + &hi_nodes[i]->hot_inode.hot_freq_data, root);
> + hot_range_update(hi_nodes[i], root);
> + hot_inode_item_put(hi_nodes[i]);
> + }
> + }
> +
> + /* Sort temperature map info */
> + for (i = 0; i < HEAT_MAP_SIZE; i++) {
> + list_sort(NULL, &root->heat_inode_map[i].node_list,
> + hot_temp_cmp);
> + list_sort(NULL, &root->heat_range_map[i].node_list,
> + hot_temp_cmp);
> + }
> +

If this list can potentially have one (or more) entries per inode, then
filesystems with a lot of inodes (millions) may potentially exceed the
max size of list which list_sort() can handle. If that happens it still
works, but you'll get a warning message and it won't be as efficient.

It is something that we've run into with list_sort() and GFS2, but it
only happens very rarely,

Steve.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: VFS hot tracking: How to calculate data temperature?

2012-11-05 Thread Steven Whitehouse

Hi,

On Mon, 2012-11-05 at 19:46 +0800, Zhi Yong Wu wrote:
> On Mon, Nov 5, 2012 at 6:33 PM, Steven Whitehouse  wrote:
> > Hi,
> >
> > On Mon, 2012-11-05 at 16:44 +0800, Zhi Yong Wu wrote:
> >> On Mon, Nov 5, 2012 at 4:28 PM, Dave Chinner  wrote:
> >> > On Mon, Nov 05, 2012 at 10:35:50AM +0800, Zhi Yong Wu wrote:
> >> >> On Sat, Nov 3, 2012 at 5:27 AM, Mingming.cao  wrote:
> >> >> > On Fri, 2012-11-02 at 14:38 +0800, Zhi Yong Wu wrote:
> >> >> >> Here also has another question.
> >> >> >>
> >> >> >> How to save the file temperature among the umount to be able to
> >> >> >> preserve the file tempreture after reboot?
> >> >> >>
> >> >> >> This above is the requirement from DB product.
> >> >> >> I thought that we can save file temperature in its inode struct, that
> >> >> >> is, add one new field in struct inode, then this info will be written
> >> >> >> to disk with inode.
> >> >> >>
> >> >> >> Any comments or ideas are appreciated, thanks.
> >> >> >>
> >> >> >>
> >> >> >
> >> >> > Maybe could save the last file temperature with extended attributes.
> >> >> It seems that only ext4 has the concept of extended attributes.
> >> >
> >> > All major filesystems have xattr support. They are used extensively
> >> > by the security and integrity subsystems, for example.
> >> got it, thanks.
> >> >
> >> > Saving the information might be something that is useful to certian
> >> > applications, but lets have the people that need that functionality
> >> > spell out their requirements before discussing how or what to
> >> > implement.  Indeed, discussion shoul dreally focus on getting the
> >> > core, in-memory infrastructure sorted out first before trying to
> >> > expand the functionality further...
> >> ah, but the latest patchset need some love from experienced FS 
> >> guys:)...
> >
> > There is one other possible issue with saving the data into the
> > filesystem, which is that it may disturb what you are trying to measure.
> > Some filesystems (GFS2 is one) store data for small inodes in the same
> > block as the inode itself. So that means the accesses to the saved hot
> > tracking info may potentially affect the data access times too. Also
> > there is a very limited amount of space to expand the number of fields
> > in the inode, so xattr may be the only solution, depending on how much
> > data needs to be stored in each case.
> Very good analysis, two possible issues are very meanful, thanks.
> >
> > In the GFS2 case (I don't think it is unique in this) xattrs are stored
> > out of line and having to access them in every open means an extra block
> > read per inode, which again has performance implications.
> >
> > So that is not an insurmountable problem, but something to take into
> > account in selecting a solution,
> In summary, you look like preferring to xattr as its solution.
> 

Well, that depends on exactly how large the data to be stored is, and
other factors. It will add overhead to the storage/retrieval but at
least it is fairly generic (wrt on-disk format) so likely to be easier
to retrofit to existing filesystems.

I suspect this may be one of those cases where there is no obvious right
answer and it is a case of selecting the least worst option, if that
makes sense?

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC v4+ hot_track 09/19] vfs: add one work queue

2012-11-05 Thread Steven Whitehouse

Hi,

On Mon, 2012-11-05 at 19:55 +0800, Zhi Yong Wu wrote:
> On Mon, Nov 5, 2012 at 7:21 PM, Steven Whitehouse  wrote:
> > Hi,
> >
> > On Mon, 2012-10-29 at 12:30 +0800, zwu.ker...@gmail.com wrote:
> >> From: Zhi Yong Wu 
> >>
> >>   Add a per-superblock workqueue and a delayed_work
> >> to run periodic work to update map info on each superblock.
> >>
> >> Signed-off-by: Zhi Yong Wu 
> >> ---
> >>  fs/hot_tracking.c|   85 
> >> ++
> >>  fs/hot_tracking.h|3 +
> >>  include/linux/hot_tracking.h |3 +
> >>  3 files changed, 91 insertions(+), 0 deletions(-)
> >>
> >> diff --git a/fs/hot_tracking.c b/fs/hot_tracking.c
> >> index fff0038..0ef9cad 100644
> >> --- a/fs/hot_tracking.c
> >> +++ b/fs/hot_tracking.c
> >> @@ -15,9 +15,12 @@
> >>  #include 
> >>  #include 
> >>  #include 
> >> +#include 
> >> +#include 
> >>  #include 
> >>  #include 
> >>  #include 
> >> +#include 
> >>  #include 
> >>  #include "hot_tracking.h"
> >>
> >> @@ -557,6 +560,67 @@ static void hot_map_array_exit(struct hot_info *root)
> >>   }
> >>  }
> >>
> >> +/* Temperature compare function*/
> >> +static int hot_temp_cmp(void *priv, struct list_head *a,
> >> + struct list_head *b)
> >> +{
> >> + struct hot_comm_item *ap =
> >> + container_of(a, struct hot_comm_item, n_list);
> >> + struct hot_comm_item *bp =
> >> + container_of(b, struct hot_comm_item, n_list);
> >> +
> >> + int diff = ap->hot_freq_data.last_temp
> >> + - bp->hot_freq_data.last_temp;
> >> + if (diff > 0)
> >> + return -1;
> >> + if (diff < 0)
> >> + return 1;
> >> + return 0;
> >> +}
> >> +
> >> +/*
> >> + * Every sync period we update temperatures for
> >> + * each hot inode item and hot range item for aging
> >> + * purposes.
> >> + */
> >> +static void hot_update_worker(struct work_struct *work)
> >> +{
> >> + struct hot_info *root = container_of(to_delayed_work(work),
> >> + struct hot_info, update_work);
> >> + struct hot_inode_item *hi_nodes[8];
> >> + u64 ino = 0;
> >> + int i, n;
> >> +
> >> + while (1) {
> >> + n = radix_tree_gang_lookup(&root->hot_inode_tree,
> >> +(void **)hi_nodes, ino,
> >> +ARRAY_SIZE(hi_nodes));
> >> + if (!n)
> >> + break;
> >> +
> >> + ino = hi_nodes[n - 1]->i_ino + 1;
> >> + for (i = 0; i < n; i++) {
> >> + kref_get(&hi_nodes[i]->hot_inode.refs);
> >> + hot_map_array_update(
> >> + &hi_nodes[i]->hot_inode.hot_freq_data, root);
> >> + hot_range_update(hi_nodes[i], root);
> >> + hot_inode_item_put(hi_nodes[i]);
> >> + }
> >> + }
> >> +
> >> + /* Sort temperature map info */
> >> + for (i = 0; i < HEAT_MAP_SIZE; i++) {
> >> + list_sort(NULL, &root->heat_inode_map[i].node_list,
> >> + hot_temp_cmp);
> >> + list_sort(NULL, &root->heat_range_map[i].node_list,
> >> + hot_temp_cmp);
> >> + }
> >> +
> >
> > If this list can potentially have one (or more) entries per inode, then
> Only one hot_inode_item per inode, while maybe multiple
> hot_range_items per inode.
> > filesystems with a lot of inodes (millions) may potentially exceed the
> > max size of list which list_sort() can handle. If that happens it still
> > works, but you'll get a warning message and it won't be as efficient.
> I haven't do so large scale test. If we want to find that issue, we
> need to do large scale performance test, before that, i want to make
> sure the code change is correct at first.
> To be honest, for that issue you pointed to, i also have such
> concern.But list_sort() performance looks good from the test result of
&g

Re: VFS hot tracking: How to calculate data temperature?

2012-11-05 Thread Steven Whitehouse

Hi,

On Mon, 2012-11-05 at 20:18 +0800, Zhi Yong Wu wrote:
> On Mon, Nov 5, 2012 at 7:57 PM, Steven Whitehouse  wrote:
> > Hi,
> >
> > On Mon, 2012-11-05 at 19:46 +0800, Zhi Yong Wu wrote:
> >> On Mon, Nov 5, 2012 at 6:33 PM, Steven Whitehouse  
> >> wrote:
> >> > Hi,
> >> >
> >> > On Mon, 2012-11-05 at 16:44 +0800, Zhi Yong Wu wrote:
> >> >> On Mon, Nov 5, 2012 at 4:28 PM, Dave Chinner  
> >> >> wrote:
> >> >> > On Mon, Nov 05, 2012 at 10:35:50AM +0800, Zhi Yong Wu wrote:
> >> >> >> On Sat, Nov 3, 2012 at 5:27 AM, Mingming.cao  wrote:
> >> >> >> > On Fri, 2012-11-02 at 14:38 +0800, Zhi Yong Wu wrote:
> >> >> >> >> Here also has another question.
> >> >> >> >>
> >> >> >> >> How to save the file temperature among the umount to be able to
> >> >> >> >> preserve the file tempreture after reboot?
> >> >> >> >>
> >> >> >> >> This above is the requirement from DB product.
> >> >> >> >> I thought that we can save file temperature in its inode struct, 
> >> >> >> >> that
> >> >> >> >> is, add one new field in struct inode, then this info will be 
> >> >> >> >> written
> >> >> >> >> to disk with inode.
> >> >> >> >>
> >> >> >> >> Any comments or ideas are appreciated, thanks.
> >> >> >> >>
> >> >> >> >>
> >> >> >> >
> >> >> >> > Maybe could save the last file temperature with extended 
> >> >> >> > attributes.
> >> >> >> It seems that only ext4 has the concept of extended attributes.
> >> >> >
> >> >> > All major filesystems have xattr support. They are used extensively
> >> >> > by the security and integrity subsystems, for example.
> >> >> got it, thanks.
> >> >> >
> >> >> > Saving the information might be something that is useful to certian
> >> >> > applications, but lets have the people that need that functionality
> >> >> > spell out their requirements before discussing how or what to
> >> >> > implement.  Indeed, discussion shoul dreally focus on getting the
> >> >> > core, in-memory infrastructure sorted out first before trying to
> >> >> > expand the functionality further...
> >> >> ah, but the latest patchset need some love from experienced FS 
> >> >> guys:)...
> >> >
> >> > There is one other possible issue with saving the data into the
> >> > filesystem, which is that it may disturb what you are trying to measure.
> >> > Some filesystems (GFS2 is one) store data for small inodes in the same
> >> > block as the inode itself. So that means the accesses to the saved hot
> >> > tracking info may potentially affect the data access times too. Also
> >> > there is a very limited amount of space to expand the number of fields
> >> > in the inode, so xattr may be the only solution, depending on how much
> >> > data needs to be stored in each case.
> >> Very good analysis, two possible issues are very meanful, thanks.
> >> >
> >> > In the GFS2 case (I don't think it is unique in this) xattrs are stored
> >> > out of line and having to access them in every open means an extra block
> >> > read per inode, which again has performance implications.
> >> >
> >> > So that is not an insurmountable problem, but something to take into
> >> > account in selecting a solution,
> >> In summary, you look like preferring to xattr as its solution.
> >>
> >
> > Well, that depends on exactly how large the data to be stored is, and
> > other factors. It will add overhead to the storage/retrieval but at
> > least it is fairly generic (wrt on-disk format) so likely to be easier
> > to retrofit to existing filesystems.
> Do you have some idea with more details about how to retrofit to existing 
> FS?:)

Well I think we've already covered the obvious ways...

> >
> > I suspect this may be one of those cases where there is no obvious right
> > answer and it is a case of selecting the least worst option, if that
> > makes sense?
> Then we can only check which solution is better via large scale
> performance test.

Indeed, and that will be to a certain extent fs dependent too,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 15/16] gfs2: nuke pdflush from comments

2012-07-26 Thread Steven Whitehouse

Hi,

On Wed, 2012-07-25 at 11:16 -0400, Bob Peterson wrote:
> - Original Message -
> | From: Artem Bityutskiy 
> | 
> | The pdflush thread is long gone, so this patch removes references to
> | pdflush
> | from gfs comments.
> | 
> (snip)
> | -* potentially cause a busy-wait loop from pdflush and kswapd
> | +* potentially cause a busy-wait loop from flusher thread and
> 
> Hi,
> 
> ACK,
> 
> Just FYI: Steve Whitehouse is on holiday today.
> You may want to send this patch to cluster-de...@redhat.com as well,
> which is often where we send and review GFS2 patches.
> 
> Regards,
> 
> Bob Peterson
> Red Hat File Systems

That ok, I spotted it anyway and it is fairly minor as patches go.
Artem, do you want to keep this patch in your series or should I queue
it up for the GFS2 tree? I don't mind which and it has my ack anyway,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

GFS2: Pre-pull patch posting (merge window)

2012-09-26 Thread Steven Whitehouse

Hi,

We've collected up a goodly number of patches in the -nmw tree now
and we can hold off any further changes until the following merge
window, so here is the current tree content.

The major feature this time is the "rbm" conversion in the resource
group code. The new struct gfs2_rbm specifies the location of an
allocatable block in (resource group, bitmap, offset) form. There
are a number of added helper functions, and later patches then
rewrite some of the resource group code in terms of this new
structure. Not only does this give us a nice code clean up, but
it also removes some of the previous restructions where extents
could not cross bitmap boundaries, for example.

In addition to that, there are a few bug fixes and clean ups, but
the rbm work is by far the majority of this patch set in terms of
number of changed lines.

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 01/27] GFS2: Merge two nearly identical xattr functions

2012-09-26 Thread Steven Whitehouse

There were two functions in the xattr code which were nearly
identical, the only difference being that one was copy data into
the unstuffed xattrs and the other was copying data out from it.

This patch merges the two functions such that the code which deal
with iteration over the unstuffed xattrs is no longer duplicated.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 27a0b4a..5404ed1 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -448,17 +448,18 @@ ssize_t gfs2_listxattr(struct dentry *dentry, char 
*buffer, size_t size)
 }
 
 /**
- * ea_get_unstuffed - actually copies the unstuffed data into the
- *request buffer
+ * ea_iter_unstuffed - copies the unstuffed xattr data to/from the
+ * request buffer
  * @ip: The GFS2 inode
  * @ea: The extended attribute header structure
- * @data: The data to be copied
+ * @din: The data to be copied in
+ * @dout: The data to be copied out (one of din,dout will be NULL)
  *
  * Returns: errno
  */
 
-static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
-   char *data)
+static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header 
*ea,
+  const char *din, char *dout)
 {
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head **bh;
@@ -467,6 +468,8 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct 
gfs2_ea_header *ea,
__be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
unsigned int x;
int error = 0;
+   unsigned char *pos;
+   unsigned cp_size;
 
bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
if (!bh)
@@ -497,12 +500,21 @@ static int ea_get_unstuffed(struct gfs2_inode *ip, struct 
gfs2_ea_header *ea,
goto out;
}
 
-   memcpy(data, bh[x]->b_data + sizeof(struct gfs2_meta_header),
-  (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
+   pos = bh[x]->b_data + sizeof(struct gfs2_meta_header);
+   cp_size = (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize;
 
-   amount -= sdp->sd_jbsize;
-   data += sdp->sd_jbsize;
+   if (dout) {
+   memcpy(dout, pos, cp_size);
+   dout += sdp->sd_jbsize;
+   }
+
+   if (din) {
+   gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
+   memcpy(pos, din, cp_size);
+   din += sdp->sd_jbsize;
+   }
 
+   amount -= sdp->sd_jbsize;
brelse(bh[x]);
}
 
@@ -523,7 +535,7 @@ static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct 
gfs2_ea_location *el,
memcpy(data, GFS2_EA2DATA(el->el_ea), len);
return len;
}
-   ret = ea_get_unstuffed(ip, el->el_ea, data);
+   ret = gfs2_iter_unstuffed(ip, el->el_ea, NULL, data);
if (ret < 0)
return ret;
return len;
@@ -1220,69 +1232,23 @@ static int gfs2_xattr_set(struct dentry *dentry, const 
char *name,
size, flags, type);
 }
 
+
 static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
  struct gfs2_ea_header *ea, char *data)
 {
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-   struct buffer_head **bh;
unsigned int amount = GFS2_EA_DATA_LEN(ea);
unsigned int nptrs = DIV_ROUND_UP(amount, sdp->sd_jbsize);
-   __be64 *dataptrs = GFS2_EA2DATAPTRS(ea);
-   unsigned int x;
-   int error;
-
-   bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_NOFS);
-   if (!bh)
-   return -ENOMEM;
-
-   error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0);
-   if (error)
-   goto out;
-
-   for (x = 0; x < nptrs; x++) {
-   error = gfs2_meta_read(ip->i_gl, be64_to_cpu(*dataptrs), 0,
-  bh + x);
-   if (error) {
-   while (x--)
-   brelse(bh[x]);
-   goto fail;
-   }
-   dataptrs++;
-   }
-
-   for (x = 0; x < nptrs; x++) {
-   error = gfs2_meta_wait(sdp, bh[x]);
-   if (error) {
-   for (; x < nptrs; x++)
-   brelse(bh[x]);
-   goto fail;
-   }
-   if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) {
-   for (; x < nptrs; x++)
-   brelse(bh[x]);
-   error = -EIO;
-   goto fail;
-   }
-
-   gfs2_trans_add_bh(ip->i_gl, bh[x], 1);
-
-   memcpy(bh[x]->b_data + size

[PATCH 10/27] GFS2: change function gfs2_direct_IO to use a normal gfs2_glock_dq

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

This patch changes function gfs2_direct_IO so that it uses a normal
call to gfs2_glock_dq rather than a call to a multiple-dq of one item.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 00eaa83..01c4975 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1024,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
  offset, nr_segs, gfs2_get_block_direct,
  NULL, NULL, 0);
 out:
-   gfs2_glock_dq_m(1, &gh);
+   gfs2_glock_dq(&gh);
gfs2_holder_uninit(&gh);
return rv;
 }
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 19/27] GFS2: Fall back to ignoring reservations, if there are no other blocks left

2012-09-26 Thread Steven Whitehouse

When we get to the stage of allocating blocks, we know that the
resource group in question must contain enough free blocks, otherwise
gfs2_inplace_reserve() would have failed. So if we are left with only
free blocks which are reserved, then we must use those. This can happen
if another node has sneeked in and use some blocks reserved on this
node, for example. Generally this will happen very rarely and only
when the resouce group is nearly full.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 55a2651..30c864e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2012,6 +2012,11 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, 
unsigned int *nblocks,
gfs2_rbm_from_block(&rbm, goal);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false);
 
+   if (error == -ENOSPC) {
+   gfs2_rbm_from_block(&rbm, goal);
+   error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, false);
+   }
+
/* Since all blocks are reserved in advance, this shouldn't happen */
if (error) {
fs_warn(sdp, "error=%d, nblocks=%u, full=%d\n", error, *nblocks,
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 22/27] GFS2: Stop block extents at the end of bitmaps

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

This patch stops multiple block allocations if a nonzero
return code is received from gfs2_rbm_from_block. Without
this patch, if enough pressure is put on the file system,
you get a kernel warning quickly followed by:
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [] gfs2_alloc_blocks+0x2c8/0x880 [gfs2]
With this patch, things run normally.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8869541..defb826 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1834,8 +1834,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, 
bool dinode,
block++;
while (*n < elen) {
ret = gfs2_rbm_from_block(&pos, block);
-   WARN_ON(ret);
-   if (gfs2_testbit(&pos) != GFS2_BLKST_FREE)
+   if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)
break;
gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
gfs2_setbit(&pos, true, GFS2_BLKST_USED);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 25/27] GFS2: Fix infinite loop in rbm_find

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

This patch fixes an infinite loop in gfs2_rbm_find that was introduced
by the previous patch. The problem occurred when the length was less
than 3 but the rbm block was byte-aligned, causing it to improperly
return a extent length of zero, which caused it to spin.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 
Tested-by: Bob Peterson 
Tested-by: Barry Marson 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b933cdc..3cc402c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -329,6 +329,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, 
u32 len)
gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len))
goto out;
 
+   n_unaligned = len & 3;
/* Start is now byte aligned */
while (len > 3) {
start = rbm.bi->bi_bh->b_data;
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 27/27] GFS2: Write out dirty inode metadata in delayed deletes

2012-09-26 Thread Steven Whitehouse

From: Benjamin Marzinski 

If a dirty GFS2 inode was being deleted but was in use by another node, its
metadata was not getting written out before GFS2 checked for dirty buffers in
gfs2_ail_flush().  GFS2 was relying on inode_go_sync() to write out the
metadata when the other node tried to free the file, but it failed the error
check before it got that far. This patch writes out the metadata before calling
gfs2_ail_flush()

Signed-off-by: Benjamin Marzinski 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 79cac70..a8d90f2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1545,6 +1545,11 @@ static void gfs2_evict_inode(struct inode *inode)
 
 out_truncate:
gfs2_log_flush(sdp, ip->i_gl);
+   if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) {
+   struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
+   filemap_fdatawrite(metamapping);
+   filemap_fdatawait(metamapping);
+   }
write_inode_now(inode, 1);
gfs2_ail_flush(ip->i_gl, 0);
 
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 26/27] GFS2: fix s_writers.counter imbalance in gfs2_ail_empty_gl

2012-09-26 Thread Steven Whitehouse

From: Eric Sandeen 

gfs2_ail_empty_gl() contains an "inline version" of gfs2_trans_begin(),
so it needs an explicit sb_start_intwrite() as well, to balance the
sb_end_intwrite() which will be called by gfs2_trans_end().

With this, xfstest 068 passes on lock_nolock local gfs2.
Without it, we reach a writer count of -1 and get stuck.

Signed-off-by: Eric Sandeen 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4bdcf37..32cc4fd 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
/* A shortened, inline version of gfs2_trans_begin() */
tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
tr.tr_ip = (unsigned long)__builtin_return_address(0);
+   sb_start_intwrite(sdp->sd_vfs);
gfs2_log_reserve(sdp, tr.tr_reserved);
BUG_ON(current->journal_info);
current->journal_info = &tr;
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 24/27] GFS2: Consolidate free block searching functions

2012-09-26 Thread Steven Whitehouse

With the recently added block reservation code, an additional function
was added to search for free blocks. This had a restriction of only being
able to search for aligned extents of free blocks. As a result the
allocation patterns when reserving blocks were suboptimal when the
existing allocation of blocks for an inode was not aligned to the same
boundary.

This patch resolves that problem by adding the ability for gfs2_rbm_find
to search for extents of a particular minimum size. We can then use
gfs2_rbm_find for both looking for reservations, and also looking for
free blocks on an individual basis when we actually come to do the
allocation later on. As a result we only need a single set of code
to deal with both situations.

The function gfs2_rbm_from_block() is moved up rgrp.c so that it
occurs before all of its callers.

Many thanks are due to Bob for helping track down the final issue in
this patch. That fix to the rb_tree traversal and to not share
block reservations from a dirctory to its children is included here.

Signed-off-by: Steven Whitehouse 
Signed-off-by: Bob Peterson 

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6aaa07c..3d469d3 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -99,7 +99,6 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_MASK  0xf000 /* mask for internal flags */
spinlock_t rd_rsspin;   /* protects reservation related vars */
struct rb_root rd_rstree;   /* multi-block reservation tree */
-   u32 rd_rs_cnt;  /* count of current reservations */
 };
 
 struct gfs2_rbm {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f2709ea..381893c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct 
dentry *dentry,
if (error)
goto fail_gunlock2;
 
-   /* The newly created inode needs a reservation so it can allocate
-  xattrs. At the same time, we want new blocks allocated to the new
-  dinode to be as contiguous as possible. Since we allocated the
-  dinode block under the directory's reservation, we transfer
-  ownership of that reservation to the new inode. The directory
-  doesn't need a reservation unless it needs a new allocation. */
-   ip->i_res = dip->i_res;
-   dip->i_res = NULL;
+   error = gfs2_rs_alloc(ip);
+   if (error)
+   goto fail_gunlock2;
 
error = gfs2_acl_create(dip, inode);
if (error)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index defb826..b933cdc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,9 +35,6 @@
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
-#define RSRV_CONTENTION_FACTOR 4
-#define RGRP_RSRV_MAX_CONTENDERS 2
-
 #if BITS_PER_LONG == 32
 #define LBITMASK   (0xUL)
 #define LBITSKIP55 (0xUL)
@@ -67,6 +64,10 @@ static const char valid_change[16] = {
1, 0, 0, 0
 };
 
+static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext,
+ const struct gfs2_inode *ip, bool nowrap);
+
+
 /**
  * gfs2_setbit - Set a bit in the bitmaps
  * @rbm: The position of the bit to set
@@ -235,6 +236,130 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int 
len,
 }
 
 /**
+ * gfs2_rbm_from_block - Set the rbm based upon rgd and block number
+ * @rbm: The rbm with rgd already set correctly
+ * @block: The block number (filesystem relative)
+ *
+ * This sets the bi and offset members of an rbm based on a
+ * resource group and a filesystem relative block number. The
+ * resource group must be set in the rbm on entry, the bi and
+ * offset members will be set by this function.
+ *
+ * Returns: 0 on success, or an error code
+ */
+
+static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
+{
+   u64 rblock = block - rbm->rgd->rd_data0;
+   u32 goal = (u32)rblock;
+   int x;
+
+   if (WARN_ON_ONCE(rblock > UINT_MAX))
+   return -EINVAL;
+   if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
+   return -E2BIG;
+
+   for (x = 0; x < rbm->rgd->rd_length; x++) {
+   rbm->bi = rbm->rgd->rd_bits + x;
+   if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
+   rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
+   break;
+   }
+   }
+
+   return 0;
+}
+
+/**
+ * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
+ * @rbm: Position to search (value/result)
+ * @n_unaligned: Number of unaligned blocks to check
+ * @len: Decremented for each block found (terminate on zero)
+ *
+ * Returns: true if a non-free block is encountered
+ */
+
+static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 
*len)
+{
+   u64 block;
+   u32 n;
+   u8 res;
+
+   for (n = 0

[PATCH 04/27] GFS2: Replace rgblk_search with gfs2_rbm_find

2012-09-26 Thread Steven Whitehouse

This is part of a series of patches which are introducing the
gfs2_rbm structure throughout the block allocation code. The
main aim of this part is to create a search function which can
deal directly with struct gfs2_rbm. In this case it specifies
the initial position at which to start the search and also the
point at which the search terminates.

The net result of this is to clean up the search code and make
it rather more readable, and the various possible exceptions which
may occur during the search are partitioned into their own functions.

There are some bug fixes too. We should not be checking the reservations
while allocating extents - the time for that is when we are searching
for where to put the extent, not when we've already made that decision.

Also, rgblk_search had two uses, and in only one of those cases did
it make sense to check for reservations. This is fixed in the new
gfs2_rbm_find function, which has a cleaner interface.

The reservation checking has been improved by always checking for
contiguous reservations, and returning the first free block after
all contiguous reservations. This is done under the spin lock to
ensure consistancy of the tree.

The allocation of extents is now in all cases done by the existing
allocation code, and if there is an active reservation, that is updated
after the fact. Again this is done under the spin lock, since it entails
changing the lookup key for the reservation in question.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d5e2546..99d7c64 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -113,6 +113,13 @@ static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm 
*rbm)
return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + 
rbm->offset;
 }
 
+static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
+  const struct gfs2_rbm *rbm2)
+{
+   return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && 
+  (rbm1->offset == rbm2->offset);
+}
+
 enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index eaa4188..bd3b926 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -67,10 +67,6 @@ static const char valid_change[16] = {
1, 0, 0, 0
 };
 
-static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-   unsigned char old_state,
-   struct gfs2_bitmap **rbi);
-
 /**
  * gfs2_setbit - Set a bit in the bitmaps
  * @rgd: the resource group descriptor
@@ -202,36 +198,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct 
gfs2_blkreserv *rs)
 }
 
 /**
- * rs_find - Find a rgrp multi-block reservation that contains a given block
- * @rgd: The rgrp
- * @rgblk: The block we're looking for, relative to the rgrp
- */
-static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
-{
-   struct rb_node **newn;
-   int rc;
-   u64 fsblk = rgblk + rgd->rd_data0;
-
-   spin_lock(&rgd->rd_rsspin);
-   newn = &rgd->rd_rstree.rb_node;
-   while (*newn) {
-   struct gfs2_blkreserv *cur =
-   rb_entry(*newn, struct gfs2_blkreserv, rs_node);
-   rc = rs_cmp(fsblk, 1, cur);
-   if (rc < 0)
-   newn = &((*newn)->rb_left);
-   else if (rc > 0)
-   newn = &((*newn)->rb_right);
-   else {
-   spin_unlock(&rgd->rd_rsspin);
-   return cur;
-   }
-   }
-   spin_unlock(&rgd->rd_rsspin);
-   return NULL;
-}
-
-/**
  * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
  *   a block in a given allocation state.
  * @buf: the buffer that holds the bitmaps
@@ -1306,9 +1272,6 @@ static struct gfs2_blkreserv *rs_insert(struct 
gfs2_bitmap *bi,
rb_link_node(&rs->rs_node, parent, newn);
rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
 
-   /* Do our inode accounting for the reservation */
-   /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
-
/* Do our rgrp accounting for the reservation */
rgd->rd_reserved += amount; /* blocks reserved */
rgd->rd_rs_cnt++; /* number of in-tree reservations */
@@ -1464,6 +1427,199 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct 
gfs2_inode *ip,
 }
 
 /**
+ * gfs2_next_unreserved_block - Return next block that is not reserved
+ * @rgd: The resource group
+ * @block: The starting block
+ * @ip: Ignore any reservations for this inode
+ *
+ * If the block does not appear in any reservation, then return the
+ * block number unchanged. If it does appear in the reservation, then
+ * keep looking through the tree of reservations i

[PATCH 12/27] GFS2: Combine functions gfs2_glock_wait and wait_on_holder

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

Function gfs2_glock_wait only called function wait_on_holder and
returned its return code, so they were combined for readability.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 67f3e42..5c87909 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -869,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word)
return 0;
 }
 
-static void wait_on_holder(struct gfs2_holder *gh)
+/**
+ * gfs2_glock_wait - wait on a glock acquisition
+ * @gh: the glock holder
+ *
+ * Returns: 0 on success
+ */
+
+int gfs2_glock_wait(struct gfs2_holder *gh)
 {
unsigned long time1 = jiffies;
 
@@ -880,6 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh)
gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
  GL_GLOCK_HOLD_INCR,
  GL_GLOCK_MAX_HOLD);
+   return gh->gh_error;
 }
 
 static void wait_on_demote(struct gfs2_glock *gl)
@@ -915,19 +923,6 @@ static void handle_callback(struct gfs2_glock *gl, 
unsigned int state,
trace_gfs2_demote_rq(gl);
 }
 
-/**
- * gfs2_glock_wait - wait on a glock acquisition
- * @gh: the glock holder
- *
- * Returns: 0 on success
- */
-
-int gfs2_glock_wait(struct gfs2_holder *gh)
-{
-   wait_on_holder(gh);
-   return gh->gh_error;
-}
-
 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
 {
struct va_format vaf;
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 15/27] GFS2: Eliminate unnecessary check for state > 3 in bitfit

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

Function gfs2_bitfit was checking for state > 3, but that's
impossible since it is only called from rgblk_search, which receives
only GFS2_BLKST_ constants.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c267118..47d2346 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -228,8 +228,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int 
len,
u64 mask = 0xULL;
u32 bit;
 
-   BUG_ON(state > 3);
-
/* Mask off bits we don't care about at the start of the search */
mask <<= spoint;
tmp = gfs2_bit_search(ptr, mask, state);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 17/27] GFS2: Use rbm for gfs2_setbit()

2012-09-26 Thread Steven Whitehouse

Use the rbm structure for gfs2_setbit() in order to simplify the
arguments to the function. We have to add a bool to control whether
the clone bitmap should be updated (if it exists) but otherwise it
is a more or less direct substitution.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 3a288ce..55a2651 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -69,47 +69,42 @@ static const char valid_change[16] = {
 
 /**
  * gfs2_setbit - Set a bit in the bitmaps
- * @rgd: the resource group descriptor
- * @buf2: the clone buffer that holds the bitmaps
- * @bi: the bitmap structure
- * @block: the block to set
+ * @rbm: The position of the bit to set
+ * @do_clone: Also set the clone bitmap, if it exists
  * @new_state: the new state of the block
  *
  */
 
-static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2,
-  struct gfs2_bitmap *bi, u32 block,
+static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,
   unsigned char new_state)
 {
unsigned char *byte1, *byte2, *end, cur_state;
-   unsigned int buflen = bi->bi_len;
-   const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
+   unsigned int buflen = rbm->bi->bi_len;
+   const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 
-   byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY);
-   end = bi->bi_bh->b_data + bi->bi_offset + buflen;
+   byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / 
GFS2_NBBY);
+   end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen;
 
BUG_ON(byte1 >= end);
 
cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
 
if (unlikely(!valid_change[new_state * 4 + cur_state])) {
-   printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
-  "new_state=%d\n",
-  (unsigned long long)block, cur_state, new_state);
-   printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
-  (unsigned long long)rgd->rd_addr,
-  (unsigned long)bi->bi_start);
-   printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
-  (unsigned long)bi->bi_offset,
-  (unsigned long)bi->bi_len);
+   printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, "
+  "new_state=%d\n", rbm->offset, cur_state, new_state);
+   printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n",
+  (unsigned long long)rbm->rgd->rd_addr,
+  rbm->bi->bi_start);
+   printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n",
+  rbm->bi->bi_offset, rbm->bi->bi_len);
dump_stack();
-   gfs2_consist_rgrpd(rgd);
+   gfs2_consist_rgrpd(rbm->rgd);
return;
}
*byte1 ^= (cur_state ^ new_state) << bit;
 
-   if (buf2) {
-   byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY);
+   if (do_clone && rbm->bi->bi_clone) {
+   byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / 
GFS2_NBBY);
cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;
*byte2 ^= (cur_state ^ new_state) << bit;
}
@@ -1852,8 +1847,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, 
bool dinode,
*n = 1;
block = gfs2_rbm_to_block(rbm);
gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
-   gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset,
-   dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
+   gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
block++;
while (*n < elen) {
ret = gfs2_rbm_from_block(&pos, block);
@@ -1861,7 +1855,7 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, 
bool dinode,
if (gfs2_testbit(&pos) != GFS2_BLKST_FREE)
break;
gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
-   gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, 
GFS2_BLKST_USED);
+   gfs2_setbit(&pos, true, GFS2_BLKST_USED);
(*n)++;
block++;
}
@@ -1900,7 +1894,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd 
*sdp, u64 bstart,
   rbm.bi->bi_len);
}
gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
-   gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_sta

[PATCH 20/27] GFS2: Improve block reservation tracing

2012-09-26 Thread Steven Whitehouse

This patch improves the tracing of block reservations by
removing some corner cases and also providing more useful
detail in the traces.

A new field is added to the reservation structure to contain
the inode number. This is used since in certain contexts it is
not possible to access the inode itself to obtain this information.
As a result we can then display the inode number for all tracepoints
and also in case we dump the resource group.

The "del" tracepoint operation has been removed. This could be called
with the reservation rgrp set to NULL. That resulted in not printing
the device number, and thus making the information largely useless
anyway. Also, the conditional on the rgrp being NULL can then be
removed from the tracepoint. After this change, all the block
reservation tracepoint calls will be called with the rgrp information.

The existing ins,clm and tdel calls to the block reservation tracepoint
are sufficient to track the entire life of the block reservation.

In gfs2_block_alloc() the error detection is updated to print out
the inode number of the problematic inode. This can then be compared
against the information in the glock dump,tracepoints, etc.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 99d7c64..6aaa07c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -268,13 +268,11 @@ struct gfs2_blkreserv {
/* components used during write (step 1): */
atomic_t rs_sizehint; /* hint of the write size */
 
-   /* components used during get_local_rgrp (step 3): */
-   struct gfs2_rbm rs_rbm;
struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
struct rb_node rs_node;   /* link to other block reservations */
-
-   /* components used during block searches and assignments (step 4): */
+   struct gfs2_rbm rs_rbm;   /* Start of reservation */
u32 rs_free;  /* how many blocks are still free */
+   u64 rs_inum;  /* Inode number for reservation */
 
/* ancillary quota stuff */
struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 30c864e..87ee0b7 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -448,10 +448,11 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
return error;
 }
 
-static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
+static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs)
 {
-   gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
-  rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm),
+   gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n",
+  (unsigned long long)rs->rs_inum,
+  (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
   rs->rs_rbm.offset, rs->rs_free);
 }
 
@@ -468,7 +469,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct 
gfs2_blkreserv *rs)
return;
 
rgd = rs->rs_rbm.rgd;
-   trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL);
+   trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
rb_erase(&rs->rs_node, &rgd->rd_rstree);
RB_CLEAR_NODE(&rs->rs_node);
BUG_ON(!rgd->rd_rs_cnt);
@@ -511,7 +512,6 @@ void gfs2_rs_delete(struct gfs2_inode *ip)
down_write(&ip->i_rw_mutex);
if (ip->i_res) {
gfs2_rs_deltree(ip, ip->i_res);
-   trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
BUG_ON(ip->i_res->rs_free);
kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
ip->i_res = NULL;
@@ -1253,6 +1253,7 @@ static struct gfs2_blkreserv *rs_insert(struct 
gfs2_bitmap *bi,
rs->rs_free = amount;
rs->rs_rbm.offset = biblk;
rs->rs_rbm.bi = bi;
+   rs->rs_inum = ip->i_no_addr;
rb_link_node(&rs->rs_node, parent, newn);
rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
 
@@ -1260,7 +1261,7 @@ static struct gfs2_blkreserv *rs_insert(struct 
gfs2_bitmap *bi,
rgd->rd_reserved += amount; /* blocks reserved */
rgd->rd_rs_cnt++; /* number of in-tree reservations */
spin_unlock(&rgd->rd_rsspin);
-   trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
+   trace_gfs2_rs(rs, TRACE_RS_INSERT);
return rs;
 }
 
@@ -1966,7 +1967,7 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
rlen = min(rs->rs_free, len);
rs->rs_free -= rlen;
rgd->rd_reserved -= rlen;
-   trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
+   trace_gfs2_rs(rs, TRACE_RS_CLAIM);
if (rs->rs_free && !ret)
goto out;
}
@@ -2005,10 +2006,6 @@ int

[PATCH 23/27] GFS2: Get rid of I_MUTEX_QUOTA usage

2012-09-26 Thread Steven Whitehouse

From: Jan Kara 

GFS2 uses i_mutex on its system quota inode to synchronize writes to
quota file. Since this is an internal inode to GFS2 (not part of directory
hiearchy or visible by user) we are safe to define locking rules for it. So
let's just get it its own locking class to make it clear.

Signed-off-by: Jan Kara 
Signed-off-by: J. Bruce Fields 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e5af9dc..e443966 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "gfs2.h"
 #include "incore.h"
@@ -766,6 +767,7 @@ fail:
return error;
 }
 
+static struct lock_class_key gfs2_quota_imutex_key;
 
 static int init_inodes(struct gfs2_sbd *sdp, int undo)
 {
@@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
fs_err(sdp, "can't get quota file inode: %d\n", error);
goto fail_rindex;
}
+   /*
+* i_mutex on quota files is special. Since this inode is hidden system
+* file, we are safe to define locking ourselves.
+*/
+   lockdep_set_class(&sdp->sd_quota_inode->i_mutex,
+ &gfs2_quota_imutex_key);
 
error = gfs2_rindex_update(sdp);
if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 420bc38..4021dec 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -782,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct 
gfs2_quota_data **qda)
return -ENOMEM;
 
sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
-   mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA);
+   mutex_lock(&ip->i_inode.i_mutex);
for (qx = 0; qx < num_qd; qx++) {
error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
   GL_NOCACHE, &ghs[qx]);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 21/27] GFS2: Fix unclaimed_blocks() wrapping bug and clean up

2012-09-26 Thread Steven Whitehouse

When rgd->rd_free_clone is less than rgd->rd_reserved, the
unclaimed_blocks() calculation would wrap and produce
incorrect results. This patch checks for this condition
when this function is called from gfs2_mblk_search()

In addition, the use of this particular function in other
places in the code has been dropped by means of a general
clean up of gfs2_inplace_reserve(). This function is now
much easier to follow.

Also the setting of the rgd->rd_last_alloc field is corrected.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 87ee0b7..8869541 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1231,7 +1231,7 @@ static struct gfs2_blkreserv *rs_insert(struct 
gfs2_bitmap *bi,
BUG_ON(!ip->i_res);
BUG_ON(gfs2_rs_active(rs));
/* Figure out where to put new node */
-   /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
+
while (*newn) {
struct gfs2_blkreserv *cur =
rb_entry(*newn, struct gfs2_blkreserv, rs_node);
@@ -1276,17 +1276,16 @@ static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
 /**
  * rg_mblk_search - find a group of multiple free blocks
  * @rgd: the resource group descriptor
- * @rs: the block reservation
  * @ip: pointer to the inode for which we're reserving blocks
+ * @requested: number of blocks required for this allocation
  *
  * This is very similar to rgblk_search, except we're looking for whole
  * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
  * on aligned dwords for speed's sake.
  *
- * Returns: 0 if successful or BFITNOENT if there isn't enough free space
  */
 
-static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, 
unsigned requested)
+static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, 
unsigned requested)
 {
struct gfs2_bitmap *bi = rgd->rd_bits;
const u32 length = rgd->rd_length;
@@ -1299,11 +1298,16 @@ static int rg_mblk_search(struct gfs2_rgrpd *rgd, 
struct gfs2_inode *ip, unsigne
u32 best_rs_bytes, unclaimed;
int best_rs_blocks;
 
+   if ((rgd->rd_free_clone < rgd->rd_reserved) ||
+   (unclaimed_blocks(rgd) < max(requested, RGRP_RSRV_MINBLKS)))
+   return;
+
/* Find bitmap block that contains bits for goal block */
if (rgrp_contains_block(rgd, ip->i_goal))
goal = ip->i_goal - rgd->rd_data0;
else
goal = rgd->rd_last_alloc;
+
for (buf = 0; buf < length; buf++) {
bi = rgd->rd_bits + buf;
/* Convert scope of "goal" from rgrp-wide to within
@@ -1366,10 +1370,8 @@ do_search:
BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
rs = rs_insert(bi, ip, blk,
   rsv_bytes * GFS2_NBBY);
-   if (IS_ERR(rs))
-   return PTR_ERR(rs);
if (rs)
-   return 0;
+   return;
}
ptr += ALIGN(search_bytes, sizeof(u64));
}
@@ -1380,35 +1382,6 @@ skip:
buf %= length;
goal = 0;
}
-
-   return BFITNOENT;
-}
-
-/**
- * try_rgrp_fit - See if a given reservation will fit in a given RG
- * @rgd: the RG data
- * @ip: the inode
- *
- * If there's room for the requested blocks to be allocated from the RG:
- * This will try to get a multi-block reservation first, and if that doesn't
- * fit, it will take what it can.
- *
- * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
- */
-
-static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
-   unsigned requested)
-{
-   if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
-   return 0;
-   /* Look for a multi-block reservation. */
-   if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
-   rg_mblk_search(rgd, ip, requested) != BFITNOENT)
-   return 1;
-   if (unclaimed_blocks(rgd) >= requested)
-   return 1;
-
-   return 0;
 }
 
 /**
@@ -1678,6 +1651,19 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 
*last_unlinked, u64 skip
return;
 }
 
+static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd 
*begin)
+{
+   struct gfs2_rgrpd *rgd = *pos;
+
+   rgd = gfs2_rgrpd_get_next(rgd);
+   if (rgd == NULL)
+   rgd = gfs2_rgrpd_get_next(NULL);
+   *pos = rgd;
+   if (rgd != begin) /* If we didn't wrap */
+   return true;
+   return false;
+}
+
 /**
  * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve s

[PATCH 18/27] GFS2: Fix ->show_options() for statfs slow

2012-09-26 Thread Steven Whitehouse

The ->show_options() function for GFS2 was not correctly displaying
the value when statfs slow in in use.

Signed-off-by: Steven Whitehouse 
Reported-by: Milos Jakubicek 

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3cbac68..79cac70 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1366,6 +1366,8 @@ static int gfs2_show_options(struct seq_file *s, struct 
dentry *root)
val = sdp->sd_tune.gt_statfs_quantum;
if (val != 30)
seq_printf(s, ",statfs_quantum=%d", val);
+   else if (sdp->sd_tune.gt_statfs_slow)
+   seq_puts(s, ",statfs_quantum=0");
val = sdp->sd_tune.gt_quota_quantum;
if (val != 60)
seq_printf(s, ",quota_quantum=%d", val);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 16/27] GFS2: Use rbm for gfs2_testbit()

2012-09-26 Thread Steven Whitehouse

Change the arguments to gfs2_testbit() so that it now just takes an
rbm specifying the position of the two bit entry to return.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 47d2346..3a288ce 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -117,30 +117,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, 
unsigned char *buf2,
 
 /**
  * gfs2_testbit - test a bit in the bitmaps
- * @rgd: the resource group descriptor
- * @buffer: the buffer that holds the bitmaps
- * @buflen: the length (in bytes) of the buffer
- * @block: the block to read
+ * @rbm: The bit to test
  *
+ * Returns: The two bit block state of the requested bit
  */
 
-static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
-const unsigned char *buffer,
-unsigned int buflen, u32 block)
+static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)
 {
-   const unsigned char *byte, *end;
-   unsigned char cur_state;
+   const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset;
+   const u8 *byte;
unsigned int bit;
 
-   byte = buffer + (block / GFS2_NBBY);
-   bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
-   end = buffer + buflen;
-
-   gfs2_assert(rgd->rd_sbd, byte < end);
-
-   cur_state = (*byte >> bit) & GFS2_BIT_MASK;
+   byte = buffer + (rbm->offset / GFS2_NBBY);
+   bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE;
 
-   return cur_state;
+   return (*byte >> bit) & GFS2_BIT_MASK;
 }
 
 /**
@@ -1837,8 +1828,7 @@ static unsigned char gfs2_get_block_type(struct 
gfs2_rgrpd *rgd, u64 block)
ret = gfs2_rbm_from_block(&rbm, block);
WARN_ON_ONCE(ret != 0);
 
-   return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
-   rbm.bi->bi_len, rbm.offset);
+   return gfs2_testbit(&rbm);
 }
 
 
@@ -1846,42 +1836,35 @@ static unsigned char gfs2_get_block_type(struct 
gfs2_rgrpd *rgd, u64 block)
  * gfs2_alloc_extent - allocate an extent from a given bitmap
  * @rbm: the resource group information
  * @dinode: TRUE if the first block we allocate is for a dinode
- * @n: The extent length
+ * @n: The extent length (value/result)
  *
- * Add the found bitmap buffer to the transaction.
+ * Add the bitmap buffer to the transaction.
  * Set the found bits to @new_state to change block's allocation state.
- * Returns: starting block number of the extent (fs scope)
  */
-static u64 gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
+static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode,
 unsigned int *n)
 {
-   struct gfs2_rgrpd *rgd = rbm->rgd;
-   struct gfs2_bitmap *bi = rbm->bi;
-   u32 blk = rbm->offset;
+   struct gfs2_rbm pos = { .rgd = rbm->rgd, };
const unsigned int elen = *n;
-   u32 goal;
-   const u8 *buffer = NULL;
+   u64 block;
+   int ret;
 
-   *n = 0;
-   buffer = bi->bi_bh->b_data + bi->bi_offset;
-   gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-   gfs2_setbit(rgd, bi->bi_clone, bi, blk,
+   *n = 1;
+   block = gfs2_rbm_to_block(rbm);
+   gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1);
+   gfs2_setbit(rbm->rgd, rbm->bi->bi_clone, rbm->bi, rbm->offset,
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
-   (*n)++;
-   goal = blk;
+   block++;
while (*n < elen) {
-   goal++;
-   if (goal >= (bi->bi_len * GFS2_NBBY))
-   break;
-   if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
-   GFS2_BLKST_FREE)
+   ret = gfs2_rbm_from_block(&pos, block);
+   WARN_ON(ret);
+   if (gfs2_testbit(&pos) != GFS2_BLKST_FREE)
break;
-   gfs2_setbit(rgd, bi->bi_clone, bi, goal, GFS2_BLKST_USED);
+   gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1);
+   gfs2_setbit(pos.rgd, pos.bi->bi_clone, pos.bi, pos.offset, 
GFS2_BLKST_USED);
(*n)++;
+   block++;
}
-   blk = gfs2_bi2rgd_blk(bi, blk);
-   rgd->rd_last_alloc = blk + *n - 1;
-   return rgd->rd_data0 + blk;
 }
 
 /**
@@ -2042,7 +2025,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, 
unsigned int *nblocks,
goto rgrp_error;
}
 
-   block = gfs2_alloc_extent(&rbm, dinode, nblocks);
+   gfs2_alloc_extent(&rbm, dinode, nblocks);
+   block = gfs2_rbm_to_block(&rbm);
if (gfs2_rs_active(ip->i_res))
gfs2_adjust_reservation(ip, &rbm, *nblocks);
ndata = *nblocks;
-- 
1.7.4

--
To unsubscribe from this list: send the line &quo

[PATCH 14/27] GFS2: Eliminate redundant calls to may_grant

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

Function add_to_queue was checking may_grant for the passed-in
holder for every iteration of its gh2 loop. Now it only checks it
once at the beginning to see if a try lock is futile.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index fca6a87..e6c2fd5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -954,7 +954,7 @@ __acquires(&gl->gl_spin)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct list_head *insert_pt = NULL;
struct gfs2_holder *gh2;
-   int try_lock = 0;
+   int try_futile = 0;
 
BUG_ON(gh->gh_owner_pid == NULL);
if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
@@ -962,7 +962,7 @@ __acquires(&gl->gl_spin)
 
if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
if (test_bit(GLF_LOCK, &gl->gl_flags))
-   try_lock = 1;
+   try_futile = !may_grant(gl, gh);
if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
goto fail;
}
@@ -971,9 +971,8 @@ __acquires(&gl->gl_spin)
if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
(gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
goto trap_recursive;
-   if (try_lock &&
-   !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) &&
-   !may_grant(gl, gh)) {
+   if (try_futile &&
+   !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
 fail:
gh->gh_error = GLR_TRYFAILED;
gfs2_holder_wake(gh);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 13/27] GFS2: Combine functions gfs2_glock_dq_wait and wait_on_demote

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

Function gfs2_glock_dq_wait called two-line function wait_on_demote,
so they were combined.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 5c87909..fca6a87 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -890,12 +890,6 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
return gh->gh_error;
 }
 
-static void wait_on_demote(struct gfs2_glock *gl)
-{
-   might_sleep();
-   wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, 
TASK_UNINTERRUPTIBLE);
-}
-
 /**
  * handle_callback - process a demote request
  * @gl: the glock
@@ -1123,7 +1117,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
 {
struct gfs2_glock *gl = gh->gh_gl;
gfs2_glock_dq(gh);
-   wait_on_demote(gl);
+   might_sleep();
+   wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, 
TASK_UNINTERRUPTIBLE);
 }
 
 /**
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 11/27] GFS2: inline __gfs2_glock_schedule_for_reclaim

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

Since function gfs2_glock_schedule_for_reclaim is only two
significant lines, we can eliminate it, simplifying the code
and making it more readable.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1ed81f4..67f3e42 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -186,20 +186,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock 
*gl)
 }
 
 /**
- * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
- * @gl: the glock
- *
- * If the glock is demotable, then we add it (or move it) to the end
- * of the glock LRU list.
- */
-
-static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
-{
-   if (demote_ok(gl))
-   gfs2_glock_add_to_lru(gl);
-}
-
-/**
  * gfs2_glock_put_nolock() - Decrement reference count on glock
  * @gl: The glock to put
  *
@@ -1121,8 +1107,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
-   if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
-   __gfs2_glock_schedule_for_reclaim(gl);
+   if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
+   gfs2_glock_add_to_lru(gl);
+
trace_gfs2_glock_queue(gh, 0);
spin_unlock(&gl->gl_spin);
if (likely(fast_path))
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 09/27] GFS2: rbm code cleanup

2012-09-26 Thread Steven Whitehouse

From: Bob Peterson 

This patch fixes a few small rbm related things. First, it fixes
a corner case where the rbm needs to switch bitmaps and wasn't
adjusting its buffer pointer. Second, there's a white space issue
fixed. Third, the logic in function gfs2_rbm_from_block was optimized
a bit. Lastly, a check for goal block overflows was added to function
gfs2_alloc_blocks.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c17029a..c267118 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -467,7 +467,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
 static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
 {
gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
-  rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), 
+  rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm),
   rs->rs_rbm.offset, rs->rs_free);
 }
 
@@ -1493,16 +1493,18 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, 
u64 block)
 
if (WARN_ON_ONCE(rblock > UINT_MAX))
return -EINVAL;
+   if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
+   return -E2BIG;
 
for (x = 0; x < rbm->rgd->rd_length; x++) {
rbm->bi = rbm->rgd->rd_bits + x;
if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) {
rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY);
-   return 0;
+   break;
}
}
 
-   return -E2BIG;
+   return 0;
 }
 
 /**
@@ -1579,7 +1581,6 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state,
WARN_ON(!buffer_uptodate(bh));
if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone)
buffer = rbm->bi->bi_clone + rbm->bi->bi_offset;
-find_next:
initial_offset = rbm->offset;
offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, 
state);
if (offset == BFITNOENT)
@@ -1594,7 +1595,7 @@ find_next:
return 0;
if (ret > 0) {
n += (rbm->bi - initial_bi);
-   goto find_next;
+   goto next_iter;
}
if (ret == -E2BIG) {
index = 0;
@@ -1619,6 +1620,7 @@ res_covered_end_of_rgrp:
if ((index == 0) && nowrap)
break;
n++;
+next_iter:
if (n >= iters)
break;
}
@@ -2028,6 +2030,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, 
unsigned int *nblocks,
else
goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0;
 
+   if ((goal < rbm.rgd->rd_data0) ||
+   (goal >= rbm.rgd->rd_data0 + rbm.rgd->rd_data))
+   rbm.rgd = gfs2_blk2rgrpd(sdp, goal, 1);
+
gfs2_rbm_from_block(&rbm, goal);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, ip, false);
 
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 08/27] GFS2: Fix case where reservation finished at end of rgrp

2012-09-26 Thread Steven Whitehouse

One corner case which the original patch failed to take into
account was when there is a reservation which ended such that
the following block was one beyond the end of the rgrp in
question. This extra test fixes that case.

Signed-off-by: Steven Whitehouse 
Reported-by: Bob Peterson 
Tested-by: Bob Peterson 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7ce22d8..c17029a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1596,6 +1596,12 @@ find_next:
n += (rbm->bi - initial_bi);
goto find_next;
}
+   if (ret == -E2BIG) {
+   index = 0;
+   rbm->offset = 0;
+   n += (rbm->bi - initial_bi);
+   goto res_covered_end_of_rgrp;
+   }
return ret;
 
 bitmap_full:   /* Mark bitmap as full and fall through */
@@ -1608,6 +1614,7 @@ next_bitmap:  /* Find next bitmap in the rgrp */
index++;
if (index == rbm->rgd->rd_length)
index = 0;
+res_covered_end_of_rgrp:
rbm->bi = &rbm->rgd->rd_bits[index];
if ((index == 0) && nowrap)
break;
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 05/27] GFS2: Update gfs2_get_block_type() to use rbm

2012-09-26 Thread Steven Whitehouse

Use the new gfs2_rbm_from_block() function to replace an open
coded version of the same code.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bd3b926..0c1be38 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1824,27 +1824,14 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 
 static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 {
-   struct gfs2_bitmap *bi = NULL;
-   u32 length, rgrp_block, buf_block;
-   unsigned int buf;
-   unsigned char type;
-
-   length = rgd->rd_length;
-   rgrp_block = block - rgd->rd_data0;
-
-   for (buf = 0; buf < length; buf++) {
-   bi = rgd->rd_bits + buf;
-   if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
-   break;
-   }
-
-   gfs2_assert(rgd->rd_sbd, buf < length);
-   buf_block = rgrp_block - bi->bi_start * GFS2_NBBY;
+   struct gfs2_rbm rbm = { .rgd = rgd, };
+   int ret;
 
-   type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-  bi->bi_len, buf_block);
+   ret = gfs2_rbm_from_block(&rbm, block);
+   WARN_ON_ONCE(ret != 0);
 
-   return type;
+   return gfs2_testbit(rgd, rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
+   rbm.bi->bi_len, rbm.offset);
 }
 
 
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 07/27] GFS2: Use RB_CLEAR_NODE() rather than rb_init_node()

2012-09-26 Thread Steven Whitehouse

From: Michel Lespinasse 

gfs2 calls RB_EMPTY_NODE() to check if nodes are not on an rbtree.
The corresponding initialization function is RB_CLEAR_NODE().
rb_init_node() was never clearly defined and is going away.

Signed-off-by: Michel Lespinasse 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 06476b3..7ce22d8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -453,7 +453,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
if (!res)
error = -ENOMEM;
 
-   rb_init_node(&res->rs_node);
+   RB_CLEAR_NODE(&res->rs_node);
 
down_write(&ip->i_rw_mutex);
if (ip->i_res)
@@ -486,7 +486,7 @@ static void __rs_deltree(struct gfs2_inode *ip, struct 
gfs2_blkreserv *rs)
rgd = rs->rs_rbm.rgd;
trace_gfs2_rs(ip, rs, TRACE_RS_TREEDEL);
rb_erase(&rs->rs_node, &rgd->rd_rstree);
-   rb_init_node(&rs->rs_node);
+   RB_CLEAR_NODE(&rs->rs_node);
BUG_ON(!rgd->rd_rs_cnt);
rgd->rd_rs_cnt--;
 
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 06/27] GFS2: Update rgblk_free() to use rbm

2012-09-26 Thread Steven Whitehouse

Replace open coded version with a call to gfs2_rbm_from_block()

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0c1be38..06476b3 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1890,46 +1890,30 @@ static u64 gfs2_alloc_extent(const struct gfs2_rbm 
*rbm, bool dinode,
 static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
 u32 blen, unsigned char new_state)
 {
-   struct gfs2_rgrpd *rgd;
-   struct gfs2_bitmap *bi = NULL;
-   u32 length, rgrp_blk, buf_blk;
-   unsigned int buf;
+   struct gfs2_rbm rbm;
 
-   rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
-   if (!rgd) {
+   rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1);
+   if (!rbm.rgd) {
if (gfs2_consist(sdp))
fs_err(sdp, "block = %llu\n", (unsigned long 
long)bstart);
return NULL;
}
 
-   length = rgd->rd_length;
-
-   rgrp_blk = bstart - rgd->rd_data0;
-
while (blen--) {
-   for (buf = 0; buf < length; buf++) {
-   bi = rgd->rd_bits + buf;
-   if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY)
-   break;
+   gfs2_rbm_from_block(&rbm, bstart);
+   bstart++;
+   if (!rbm.bi->bi_clone) {
+   rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size,
+  GFP_NOFS | __GFP_NOFAIL);
+   memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset,
+  rbm.bi->bi_bh->b_data + rbm.bi->bi_offset,
+  rbm.bi->bi_len);
}
-
-   gfs2_assert(rgd->rd_sbd, buf < length);
-
-   buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY;
-   rgrp_blk++;
-
-   if (!bi->bi_clone) {
-   bi->bi_clone = kmalloc(bi->bi_bh->b_size,
-  GFP_NOFS | __GFP_NOFAIL);
-   memcpy(bi->bi_clone + bi->bi_offset,
-  bi->bi_bh->b_data + bi->bi_offset,
-  bi->bi_len);
-   }
-   gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-   gfs2_setbit(rgd, NULL, bi, buf_blk, new_state);
+   gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1);
+   gfs2_setbit(rbm.rgd, NULL, rbm.bi, rbm.offset, new_state);
}
 
-   return rgd;
+   return rbm.rgd;
 }
 
 /**
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 03/27] GFS2: Add structure to contain rgrp, bitmap, offset tuple

2012-09-26 Thread Steven Whitehouse

This patch introduces a new structure, gfs2_rbm, which is a
tuple of a resource group, a bitmap within the resource group
and an offset within that bitmap. This is designed to make
manipulating these sets of variables easier. There is also a
new helper function which converts this representation back
to a disk block address.

In addition, the rbtree nodes which are used for the reservations
were not being correctly initialised, which is now fixed. Also,
the tracing was not passing through the inode where it should
have been. That is mostly fixed aside from one corner case. This
needs to be revisited since there can also be a NULL rgrp in
some cases which results in the device being incorrect in the
trace.

This is intended to be the first step towards cleaning up some
of the allocation code, and some further bug fixes.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 49cd7dd..1fd3ae2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct 
buffer_head *dibh,
goto out_rlist;
 
if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock 
held */
-   gfs2_rs_deltree(ip->i_res);
+   gfs2_rs_deltree(ip, ip->i_res);
 
error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
 RES_INDIRECT + RES_STATFS + RES_QUOTA,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 52078a1..d5e2546 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -102,6 +102,17 @@ struct gfs2_rgrpd {
u32 rd_rs_cnt;  /* count of current reservations */
 };
 
+struct gfs2_rbm {
+   struct gfs2_rgrpd *rgd;
+   struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */
+   u32 offset; /* The offset is bitmap relative */
+};
+
+static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
+{
+   return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + 
rbm->offset;
+}
+
 enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
@@ -251,13 +262,11 @@ struct gfs2_blkreserv {
atomic_t rs_sizehint; /* hint of the write size */
 
/* components used during get_local_rgrp (step 3): */
-   struct gfs2_rgrpd *rs_rgd;/* pointer to the gfs2_rgrpd */
+   struct gfs2_rbm rs_rbm;
struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
struct rb_node rs_node;   /* link to other block reservations */
 
/* components used during block searches and assignments (step 4): */
-   struct gfs2_bitmap *rs_bi;/* bitmap for the current allocation */
-   u32 rs_biblk; /* start block relative to the bi */
u32 rs_free;  /* how many blocks are still free */
 
/* ancillary quota stuff */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index a2b43bb..eaa4188 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -192,7 +192,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 
mask, u8 state)
  */
 static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
 {
-   u64 startblk = gfs2_rs_startblk(rs);
+   u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
 
if (blk >= startblk + rs->rs_free)
return 1;
@@ -487,6 +487,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
if (!res)
error = -ENOMEM;
 
+   rb_init_node(&res->rs_node);
+
down_write(&ip->i_rw_mutex);
if (ip->i_res)
kmem_cache_free(gfs2_rsrv_cachep, res);
@@ -499,8 +501,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip)
 static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
 {
gfs2_print_dbg(seq, "  r: %llu s:%llu b:%u f:%u\n",
-  rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
-  rs->rs_free);
+  rs->rs_rbm.rgd->rd_addr, gfs2_rbm_to_block(&rs->rs_rbm), 
+  rs->rs_rbm.offset, rs->rs_free);
 }
 
 /**
@@ -508,40 +510,28 @@ static void dump_rs(struct seq_file *seq, struct 
gfs2_blkreserv *rs)
  * @rs: The reservation to remove
  *
  */
-static void __rs_deltree(struct gfs2_blkreserv *rs)
+static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs)
 {
struct gfs2_rgrpd *rgd;
 
if (!gfs2_rs_active(rs))
return;
 
-   rgd = rs->rs_rgd;
-   /* We can't do this: The reason is that when the rgrp is invalidated,
-  it's in the "middle" of acquiring the glock, but the HOLDER bit
-  isn't set yet:
-  BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
-   trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
-
-   if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
-   rb_erase(&rs-&g

[PATCH 02/27] GFS2: Remove rs_requested field from reservations

2012-09-26 Thread Steven Whitehouse

The rs_requested field is left over from the original allocation
code, however this should have been a parameter passed to the
various functions from gfs2_inplace_reserve() and not a member of the
reservation structure as the value is not required after the
initial allocation.

This also helps simplify the code since we no longer need to set
the rs_requested to zero. Also the gfs2_inplace_release()
function can also be simplified since the reservation structure
will always be defined when it is called, and the only remaining
task is to unlock the rgrp if required. It can also now be
called unconditionally too, resulting in a further simplification.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index d652634..00eaa83 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct 
address_space *mapping,
struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+   unsigned requested = 0;
int alloc_required;
int error = 0;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
@@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct 
address_space *mapping,
if (error)
goto out_unlock;
 
-   error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
+   requested = data_blocks + ind_blocks;
+   error = gfs2_inplace_reserve(ip, requested);
if (error)
goto out_qunlock;
}
@@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct 
address_space *mapping,
if (&ip->i_inode == sdp->sd_rindex)
rblocks += 2 * RES_STATFS;
if (alloc_required)
-   rblocks += gfs2_rg_blocks(ip);
+   rblocks += gfs2_rg_blocks(ip, requested);
 
error = gfs2_trans_begin(sdp, rblocks,
 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct 
address_space *mapping,
brelse(dibh);
 failed:
gfs2_trans_end(sdp);
-   if (gfs2_mb_reserved(ip))
-   gfs2_inplace_release(ip);
+   gfs2_inplace_release(ip);
if (ip->i_res->rs_qa_qd_num)
gfs2_quota_unlock(ip);
if (inode == sdp->sd_rindex) {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 382000f..30e2199 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -441,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, 
struct vm_fault *vmf)
rblocks += data_blocks ? data_blocks : 1;
if (ind_blocks || data_blocks) {
rblocks += RES_STATFS + RES_QUOTA;
-   rblocks += gfs2_rg_blocks(ip);
+   rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
}
ret = gfs2_trans_begin(sdp, rblocks, 0);
if (ret)
@@ -845,7 +845,7 @@ retry:
&max_bytes, &data_blocks, &ind_blocks);
 
rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
- RES_RG_HDR + gfs2_rg_blocks(ip);
+ RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + 
ind_blocks);
if (gfs2_is_jdata(ip))
rblocks += data_blocks ? data_blocks : 1;
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index aaecc80..52078a1 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -250,9 +250,6 @@ struct gfs2_blkreserv {
/* components used during write (step 1): */
atomic_t rs_sizehint; /* hint of the write size */
 
-   /* components used during inplace_reserve (step 2): */
-   u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
-
/* components used during get_local_rgrp (step 3): */
struct gfs2_rgrpd *rs_rgd;/* pointer to the gfs2_rgrpd */
struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 753af3d..f2709ea 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -737,10 +737,7 @@ static int gfs2_create_inode(struct inode *dir, struct 
dentry *dentry,
brelse(bh);
 
gfs2_trans_end(sdp);
-   /* Check if we reserved space in the rgrp. Function link_dinode may
-  not, depending on whether alloc is required. */
-   if (gfs2_mb_reserved(dip))
-   gfs2_inplace_release(dip);
+   gfs2_inplace_release(dip);
gfs2_quota_unlock(dip);
mark_inode_dirty(inode);
gfs2_glock_dq_uninit_m(2, ghs);
@@ -897,7 +894,7 @@ static int gfs2_link(struct dentry *old_dentry, struct 
inode *dir,
goto out_gunlock_q;
 
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-

Re: [PATCH 07/11] gfs2: pass correct dentry to finish_open() in __gfs2_lookup()

2013-09-16 Thread Steven Whitehouse

Hi,

On Mon, 2013-09-16 at 14:52 +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi 
> 
> AFAICS if d_splice_alias() returned non-NULL, this code would Oops
> (finish_open expects an instantiated dentry).
> 
> Signed-off-by: Miklos Szeredi 
> Cc: Steven Whitehouse 
> Cc: sta...@vger.kernel.org
> ---
>  fs/gfs2/inode.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 6d7f976..abe7dae 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -774,7 +774,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, 
> struct dentry *dentry,
>  
>   d = d_splice_alias(inode, dentry);
>   if (file && S_ISREG(inode->i_mode))
> - error = finish_open(file, dentry, gfs2_open_common, opened);
> + error = finish_open(file, d ? d : dentry, gfs2_open_common, 
> opened);
>  
>   gfs2_glock_dq_uninit(&gh);
>   if (error)

Not sure I understand why this is required... when the inode is a
regular file, d can only be an error (if the inode is an error) or it
will be NULL. Since the __gfs2_lookup would terminate further up if the
inode were an error, then d must always be NULL in the regular file
case, so I'm not sure that this is a bug,

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 06/11] gfs2: d_splice_alias() cant return error

2013-09-16 Thread Steven Whitehouse

Hi,

On Mon, 2013-09-16 at 14:52 +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi 
> 
> unless it was given an IS_ERR(inode), which isn't the case here.  So clean
> up the unnecessary error handling in gfs2_create_inode().
> 
> This paves the way for real fixes (hence the stable Cc).
> 
That makes send to me:

Acked-by: Steven Whitehouse 

I can put this in the gfs2 tree if that makes sense to do,

Steve.

> Signed-off-by: Miklos Szeredi 
> Cc: Steven Whitehouse 
> Cc: sta...@vger.kernel.org
> ---
>  fs/gfs2/inode.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 64915ee..6d7f976 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -584,7 +584,7 @@ static int gfs2_create_inode(struct inode *dir, struct 
> dentry *dentry,
>   if (!IS_ERR(inode)) {
>   d = d_splice_alias(inode, dentry);
>   error = 0;
> - if (file && !IS_ERR(d)) {
> + if (file) {
>   if (d == NULL)
>   d = dentry;
>   if (S_ISREG(inode->i_mode))
> @@ -593,8 +593,6 @@ static int gfs2_create_inode(struct inode *dir, struct 
> dentry *dentry,
>   error = finish_no_open(file, d);
>   }
>   gfs2_glock_dq_uninit(ghs);
> - if (IS_ERR(d))
> - return PTR_ERR(d);
>   return error;
>   } else if (error != -ENOENT) {
>   goto fail_gunlock;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 09/11] gfs2: set FILE_CREATED

2013-09-16 Thread Steven Whitehouse

Hi,

On Mon, 2013-09-16 at 14:52 +0200, Miklos Szeredi wrote:
> From: Miklos Szeredi 
> 
> In gfs2_create_inode() set FILE_CREATED in *opened.
> 
Acked-by: Steven Whitehouse 

Thanks for spotting this issue,

Steve.


> Signed-off-by: Miklos Szeredi 
> Cc: Steven Whitehouse 
> ---
>  fs/gfs2/inode.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 9a1be62..ef411a3 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -694,8 +694,10 @@ static int gfs2_create_inode(struct inode *dir, struct 
> dentry *dentry,
>  
>   mark_inode_dirty(inode);
>   d_instantiate(dentry, inode);
> - if (file)
> + if (file) {
> + *opened |= FILE_CREATED;
>   error = finish_open(file, dentry, gfs2_open_common, opened);
> + }
>   gfs2_glock_dq_uninit(ghs);
>   gfs2_glock_dq_uninit(ghs + 1);
>   return error;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 07/11] gfs2: pass correct dentry to finish_open() in __gfs2_lookup()

2013-09-16 Thread Steven Whitehouse

Hi,

On Mon, 2013-09-16 at 15:34 +0200, Miklos Szeredi wrote:
> On Mon, Sep 16, 2013 at 02:13:14PM +0100, Steven Whitehouse wrote:
> > Hi,
> > 
> > On Mon, 2013-09-16 at 14:52 +0200, Miklos Szeredi wrote:
> > > From: Miklos Szeredi 
> > > 
> > > AFAICS if d_splice_alias() returned non-NULL, this code would Oops
> > > (finish_open expects an instantiated dentry).
> > > 
> > > Signed-off-by: Miklos Szeredi 
> > > Cc: Steven Whitehouse 
> > > Cc: sta...@vger.kernel.org
> > > ---
> > >  fs/gfs2/inode.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> > > index 6d7f976..abe7dae 100644
> > > --- a/fs/gfs2/inode.c
> > > +++ b/fs/gfs2/inode.c
> > > @@ -774,7 +774,7 @@ static struct dentry *__gfs2_lookup(struct inode 
> > > *dir, struct dentry *dentry,
> > >  
> > >   d = d_splice_alias(inode, dentry);
> > >   if (file && S_ISREG(inode->i_mode))
> > > - error = finish_open(file, dentry, gfs2_open_common, opened);
> > > + error = finish_open(file, d ? d : dentry, gfs2_open_common, 
> > > opened);
> > >  
> > >   gfs2_glock_dq_uninit(&gh);
> > >   if (error)
> > 
> > Not sure I understand why this is required... when the inode is a
> > regular file, d can only be an error (if the inode is an error) or it
> > will be NULL.
> 
> Okay, you're right.  Still, something like the following should make this 
> clear
> and ensure things don't break in the future.
> 
or better still, just add a comment to explain the situation as the
reader may still be wondering why that BUG_ON() will never trigger,

Steve.

> Thanks,
> Miklos
> 
> ---
>  fs/gfs2/inode.c |4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -775,8 +775,10 @@ static struct dentry *__gfs2_lookup(stru
>   }
>  
>   d = d_splice_alias(inode, dentry);
> - if (file && S_ISREG(inode->i_mode))
> + if (file && S_ISREG(inode->i_mode)) {
> + BUG_ON(d);
>   error = finish_open(file, dentry, gfs2_open_common, opened);
> + }
>  
>   gfs2_glock_dq_uninit(&gh);
>   if (error)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 06/11] gfs2: d_splice_alias() cant return error

2013-09-16 Thread Steven Whitehouse

Hi,

On Mon, 2013-09-16 at 15:35 +0200, Miklos Szeredi wrote:
> On Mon, Sep 16, 2013 at 02:17:49PM +0100, Steven Whitehouse wrote:
> > Hi,
> > 
> > On Mon, 2013-09-16 at 14:52 +0200, Miklos Szeredi wrote:
> > > From: Miklos Szeredi 
> > > 
> > > unless it was given an IS_ERR(inode), which isn't the case here.  So clean
> > > up the unnecessary error handling in gfs2_create_inode().
> > > 
> > > This paves the way for real fixes (hence the stable Cc).
> > > 
> > That makes send to me:
> > 
> > Acked-by: Steven Whitehouse 
> > 
> > I can put this in the gfs2 tree if that makes sense to do,
> 
> Sure, please do.
> 
> Thanks,
> Miklos
> 
Ok. I'll add the patches shortly. I need to try and wrap my brain around
patch 8 too,

Steve.

> 
> > Steve.
> > 
> > > Signed-off-by: Miklos Szeredi 
> > > Cc: Steven Whitehouse 
> > > Cc: sta...@vger.kernel.org
> > > ---
> > >  fs/gfs2/inode.c | 4 +---
> > >  1 file changed, 1 insertion(+), 3 deletions(-)
> > > 
> > > diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> > > index 64915ee..6d7f976 100644
> > > --- a/fs/gfs2/inode.c
> > > +++ b/fs/gfs2/inode.c
> > > @@ -584,7 +584,7 @@ static int gfs2_create_inode(struct inode *dir, 
> > > struct dentry *dentry,
> > >   if (!IS_ERR(inode)) {
> > >   d = d_splice_alias(inode, dentry);
> > >   error = 0;
> > > - if (file && !IS_ERR(d)) {
> > > + if (file) {
> > >   if (d == NULL)
> > >   d = dentry;
> > >   if (S_ISREG(inode->i_mode))
> > > @@ -593,8 +593,6 @@ static int gfs2_create_inode(struct inode *dir, 
> > > struct dentry *dentry,
> > >   error = finish_no_open(file, d);
> > >   }
> > >   gfs2_glock_dq_uninit(ghs);
> > > - if (IS_ERR(d))
> > > - return PTR_ERR(d);
> > >   return error;
> > >   } else if (error != -ENOENT) {
> > >   goto fail_gunlock;
> > 
> > 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/6] GFS2: Merge ordered and writeback writepage

2013-09-05 Thread Steven Whitehouse

The writepages function was recently merged between writeback
and ordered mode. This completes the change by doing the same
with writepage. The remaining differences in writepage were
left over from some earlier time and not actually doing anything
useful.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ee48ad3..a9ea6f0 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -122,14 +122,13 @@ out:
 }
 
 /**
- * gfs2_writeback_writepage - Write page for writeback mappings
+ * gfs2_writepage - Write page for writeback mappings
  * @page: The page
  * @wbc: The writeback control
  *
  */
 
-static int gfs2_writeback_writepage(struct page *page,
-   struct writeback_control *wbc)
+static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 {
int ret;
 
@@ -141,32 +140,6 @@ static int gfs2_writeback_writepage(struct page *page,
 }
 
 /**
- * gfs2_ordered_writepage - Write page for ordered data files
- * @page: The page to write
- * @wbc: The writeback control
- *
- */
-
-static int gfs2_ordered_writepage(struct page *page,
- struct writeback_control *wbc)
-{
-   struct inode *inode = page->mapping->host;
-   struct gfs2_inode *ip = GFS2_I(inode);
-   int ret;
-
-   ret = gfs2_writepage_common(page, wbc);
-   if (ret <= 0)
-   return ret;
-
-   if (!page_has_buffers(page)) {
-   create_empty_buffers(page, inode->i_sb->s_blocksize,
-(1 << BH_Dirty)|(1 << BH_Uptodate));
-   }
-   gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
-   return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
-}
-
-/**
  * __gfs2_jdata_writepage - The core of jdata writepage
  * @page: The page to write
  * @wbc: The writeback control
@@ -1107,7 +1080,7 @@ cannot_release:
 }
 
 static const struct address_space_operations gfs2_writeback_aops = {
-   .writepage = gfs2_writeback_writepage,
+   .writepage = gfs2_writepage,
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
@@ -1123,7 +1096,7 @@ static const struct address_space_operations 
gfs2_writeback_aops = {
 };
 
 static const struct address_space_operations gfs2_ordered_aops = {
-   .writepage = gfs2_ordered_writepage,
+   .writepage = gfs2_writepage,
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

GFS2 Pre-pull patch posting (merge window)

2013-09-05 Thread Steven Whitehouse

Hi,

This is the smallest merge window patch set for GFS2 for quite
some time. Only one of the patches (moving gfs2_sync_meta) is
a non-bug fix patch, although the merge ordered and writeback
writepage patch is also a nice clean up.

A couple of the patches are quite recently added, due to my only
having recently returned from holiday, so I'll give them a couple
of extra days in -next before sending the pull request.

Steve.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 6/6] GFS2: dirty inode correctly in gfs2_write_end

2013-09-05 Thread Steven Whitehouse

From: Benjamin Marzinski 

GFS2 was only setting I_DIRTY_DATASYNC on files that it wrote to, when
it actually increased the file size.  If gfs2_fsync was called without
I_DIRTY_DATASYNC set, it didn't flush the incore data to the log before
returning, so any metadata or journaled data changes were not getting
fsynced. This meant that writes to the middle of files were not always
getting fsynced properly.

This patch makes gfs2 set I_DIRTY_DATASYNC whenever metadata has been
updated during a write. It also make gfs2_sync flush the incore log
if I_DIRTY_PAGES is set, and the file is using data journalling. This
will make sure that all incore logged data gets written to disk before
returning from a fsync.

Signed-off-by: Benjamin Marzinski 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index a9ea6f0..1f7d805 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -815,6 +815,8 @@ static int gfs2_write_end(struct file *file, struct 
address_space *mapping,
unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
unsigned int to = from + len;
int ret;
+   struct gfs2_trans *tr = current->journal_info;
+   BUG_ON(!tr);
 
BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
 
@@ -825,8 +827,6 @@ static int gfs2_write_end(struct file *file, struct 
address_space *mapping,
goto failed;
}
 
-   gfs2_trans_add_meta(ip->i_gl, dibh);
-
if (gfs2_is_stuffed(ip))
return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, 
page);
 
@@ -834,6 +834,11 @@ static int gfs2_write_end(struct file *file, struct 
address_space *mapping,
gfs2_page_add_databufs(ip, page, from, to);
 
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+   if (tr->tr_num_buf_new)
+   __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+   else
+   gfs2_trans_add_meta(ip->i_gl, dibh);
+
 
if (inode == sdp->sd_rindex) {
adjust_fs_space(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 72c3866..0621b46 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -650,7 +650,7 @@ static int gfs2_fsync(struct file *file, loff_t start, 
loff_t end,
 {
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
-   int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
+   int sync_state = inode->i_state & I_DIRTY;
struct gfs2_inode *ip = GFS2_I(inode);
int ret = 0, ret1 = 0;
 
@@ -660,6 +660,8 @@ static int gfs2_fsync(struct file *file, loff_t start, 
loff_t end,
return ret1;
}
 
+   if (!gfs2_is_jdata(ip))
+   sync_state &= ~I_DIRTY_PAGES;
if (datasync)
sync_state &= ~I_DIRTY_SYNC;
 
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 4/6] GFS2: Remove unnecessary memory barrier

2013-09-05 Thread Steven Whitehouse

From: Bob Peterson 

Function test_and_clear_bit implies a memory barrier, so subsequent
memory barriers are unnecessary.

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ce7078d..722329c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1411,7 +1411,6 @@ __acquires(&lru_lock)
if (demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
-   smp_mb__after_clear_bit();
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put_nolock(gl);
spin_unlock(&gl->gl_spin);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/6] GFS2: Take glock reference in examine_bucket()

2013-09-05 Thread Steven Whitehouse

We need to check the glock ref counter in a race free way
in order to ensure that the gfs2_glock_hold() call will
succeed. The easiest way to do that is to simply take the
reference count early in the common code of examine_bucket,
skipping any glocks with zero ref count.

That means that the examiner functions all need to put their
reference on the glock once they've performed their function.

Signed-off-by: Steven Whitehouse 
Reported-by: David Teigland 
Tested-by: David Teigland 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 544a809..ce7078d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1488,7 +1488,7 @@ static void examine_bucket(glock_examiner examiner, const 
struct gfs2_sbd *sdp,
 
rcu_read_lock();
hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
-   if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
+   if ((gl->gl_sbd == sdp) && atomic_inc_not_zero(&gl->gl_ref))
examiner(gl);
}
rcu_read_unlock();
@@ -1508,18 +1508,17 @@ static void glock_hash_walk(glock_examiner examiner, 
const struct gfs2_sbd *sdp)
  * thaw_glock - thaw out a glock which has an unprocessed reply waiting
  * @gl: The glock to thaw
  *
- * N.B. When we freeze a glock, we leave a ref to the glock outstanding,
- * so this has to result in the ref count being dropped by one.
  */
 
 static void thaw_glock(struct gfs2_glock *gl)
 {
if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
-   return;
+   goto out;
set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
-   gfs2_glock_hold(gl);
-   if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+   if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) {
+out:
gfs2_glock_put(gl);
+   }
 }
 
 /**
@@ -1536,7 +1535,6 @@ static void clear_glock(struct gfs2_glock *gl)
if (gl->gl_state != LM_ST_UNLOCKED)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
spin_unlock(&gl->gl_spin);
-   gfs2_glock_hold(gl);
if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
gfs2_glock_put(gl);
 }
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 5/6] GFS2: Don't flag consistency error if first mounter is a spectator

2013-09-05 Thread Steven Whitehouse

From: Bob Peterson 

This patch checks for the first mounter being a specator. If so, it
makes sure all the journals are clean. If there's a dirty journal,
the mount fails.

Testing results:

# insmod gfs2.ko
# mount -tgfs2 -o spectator /dev/sasdrives/scratch /mnt/gfs2
mount: permission denied
# dmesg | tail -2
[ 3390.655996] GFS2: fsid=MUSKETEER:home: Now mounting FS...
[ 3390.841336] GFS2: fsid=MUSKETEER:home.s: jid=0: Journal is dirty, so the 
first mounter must not be a spectator.
# mount -tgfs2 /dev/sasdrives/scratch /mnt/gfs2
# umount /mnt/gfs2
# mount -tgfs2 -o spectator /dev/sasdrives/scratch /mnt/gfs2
# ls /mnt/gfs2|wc -l
352
# umount /mnt/gfs2

Signed-off-by: Bob Peterson 
Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 0262c19..19ff5e8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -646,6 +646,48 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct 
gfs2_holder *ji_gh)
return error;
 }
 
+/**
+ * check_journal_clean - Make sure a journal is clean for a spectator mount
+ * @sdp: The GFS2 superblock
+ * @jd: The journal descriptor
+ *
+ * Returns: 0 if the journal is clean or locked, else an error
+ */
+static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
+{
+   int error;
+   struct gfs2_holder j_gh;
+   struct gfs2_log_header_host head;
+   struct gfs2_inode *ip;
+
+   ip = GFS2_I(jd->jd_inode);
+   error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
+  GL_EXACT | GL_NOCACHE, &j_gh);
+   if (error) {
+   fs_err(sdp, "Error locking journal for spectator mount.\n");
+   return -EPERM;
+   }
+   error = gfs2_jdesc_check(jd);
+   if (error) {
+   fs_err(sdp, "Error checking journal for spectator mount.\n");
+   goto out_unlock;
+   }
+   error = gfs2_find_jhead(jd, &head);
+   if (error) {
+   fs_err(sdp, "Error parsing journal for spectator mount.\n");
+   goto out_unlock;
+   }
+   if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+   error = -EPERM;
+   fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter "
+  "must not be a spectator.\n", jd->jd_jid);
+   }
+
+out_unlock:
+   gfs2_glock_dq_uninit(&j_gh);
+   return error;
+}
+
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
struct inode *master = sdp->sd_master_dir->d_inode;
@@ -732,8 +774,15 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
if (sdp->sd_lockstruct.ls_first) {
unsigned int x;
for (x = 0; x < sdp->sd_journals; x++) {
-   error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x),
-true);
+   struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x);
+
+   if (sdp->sd_args.ar_spectator) {
+   error = check_journal_clean(sdp, jd);
+   if (error)
+   goto fail_jinode_gh;
+   continue;
+   }
+   error = gfs2_recover_journal(jd, true);
if (error) {
fs_err(sdp, "error recovering journal %u: %d\n",
   x, error);
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/6] GFS2: Move gfs2_sync_meta to lops.c

2013-09-05 Thread Steven Whitehouse

Since gfs2_sync_meta() is only called from a single file, lets move
it to lops.c where it is used, and mark it static. At the same
time, we can clean up the meta_io.h header too.

Signed-off-by: Steven Whitehouse 

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 17c5b5d..010b9fb 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -579,6 +579,24 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, 
unsigned int start,
return error;
 }
 
+/**
+ * gfs2_meta_sync - Sync all buffers associated with a glock
+ * @gl: The glock
+ *
+ */
+
+static void gfs2_meta_sync(struct gfs2_glock *gl)
+{
+   struct address_space *mapping = gfs2_glock2aspace(gl);
+   int error;
+
+   filemap_fdatawrite(mapping);
+   error = filemap_fdatawait(mapping);
+
+   if (error)
+   gfs2_io_error(gl->gl_sbd);
+}
+
 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 {
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0da3906..9324150 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -98,24 +98,6 @@ const struct address_space_operations gfs2_meta_aops = {
 };
 
 /**
- * gfs2_meta_sync - Sync all buffers associated with a glock
- * @gl: The glock
- *
- */
-
-void gfs2_meta_sync(struct gfs2_glock *gl)
-{
-   struct address_space *mapping = gfs2_glock2aspace(gl);
-   int error;
-
-   filemap_fdatawrite(mapping);
-   error = filemap_fdatawait(mapping);
-
-   if (error)
-   gfs2_io_error(gl->gl_sbd);
-}
-
-/**
  * gfs2_getbuf - Get a buffer with a given address space
  * @gl: the glock
  * @blkno: the block number (filesystem scope)
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 0d4c843..4823b93 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -48,21 +48,17 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct 
address_space *mapping)
return inode->i_sb->s_fs_info;
 }
 
-void gfs2_meta_sync(struct gfs2_glock *gl);
-
-struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
-int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
-  int flags, struct buffer_head **bhp);
-int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
-struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
-
-void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
- int meta);
-
-void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
-
-int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
- struct buffer_head **bhp);
+extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
+extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
+ struct buffer_head **bhp);
+extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
+extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
+  int create);
+extern void gfs2_remove_from_journal(struct buffer_head *bh,
+struct gfs2_trans *tr, int meta);
+extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 
num,
+struct buffer_head **bhp);
 
 static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
 struct buffer_head **bhp)
-- 
1.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

1 2 3 4 5 6 7 >

1 - 100 of 685 matches

Mail list logo