CVSROOT:        /cvs/cluster
Module name:    cluster
Changes by:     [EMAIL PROTECTED]       2007-10-19 15:06:56

Modified files:
        gfs2/fsck      : fs_recovery.c fs_recovery.h initialize.c 
        gfs2/libgfs2   : Makefile libgfs2.h 
Added files:
        gfs2/libgfs2   : recovery.c 

Log message:
        Resolves: bz 291551: gfs2_fsck clears journals without asking.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/fs_recovery.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/fs_recovery.h.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/fsck/initialize.c.diff?cvsroot=cluster&r1=1.9&r2=1.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/recovery.c.diff?cvsroot=cluster&r1=NONE&r2=1.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/Makefile.diff?cvsroot=cluster&r1=1.11&r2=1.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/libgfs2/libgfs2.h.diff?cvsroot=cluster&r1=1.20&r2=1.21

--- cluster/gfs2/fsck/fs_recovery.c     2007/02/12 19:28:49     1.5
+++ cluster/gfs2/fsck/fs_recovery.c     2007/10/19 15:06:56     1.6
@@ -11,6 +11,7 @@
 *******************************************************************************
 ******************************************************************************/
 
+#include <errno.h>
 #include <inttypes.h>
 #include <linux_endian.h>
 #include <stdlib.h>
@@ -18,36 +19,421 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "fsck.h"
+#include "fs_recovery.h"
 #include "libgfs2.h"
 #include "util.h"
-#include "fs_recovery.h"
 
 #define RANDOM(values) ((values) * (random() / (RAND_MAX + 1.0)))
 
+unsigned int sd_found_jblocks = 0, sd_replayed_jblocks = 0;
+unsigned int sd_found_metablocks = 0, sd_replayed_metablocks = 0;
+unsigned int sd_found_revokes = 0;
+osi_list_t sd_revoke_list;
+unsigned int sd_replay_tail;
+
+struct gfs2_revoke_replay {
+       osi_list_t rr_list;
+       uint64_t rr_blkno;
+       unsigned int rr_where;
+};
+
+int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+{
+       osi_list_t *tmp, *head = &sd_revoke_list;
+       struct gfs2_revoke_replay *rr;
+       int found = 0;
+
+       osi_list_foreach(tmp, head) {
+               rr = osi_list_entry(tmp, struct gfs2_revoke_replay, rr_list);
+               if (rr->rr_blkno == blkno) {
+                       found = 1;
+                       break;
+               }
+       }
+
+       if (found) {
+               rr->rr_where = where;
+               return 0;
+       }
+
+       rr = malloc(sizeof(struct gfs2_revoke_replay));
+       if (!rr)
+               return -ENOMEM;
+
+       rr->rr_blkno = blkno;
+       rr->rr_where = where;
+       osi_list_add(&rr->rr_list, head);
+       return 1;
+}
+
+int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where)
+{
+       osi_list_t *tmp;
+       struct gfs2_revoke_replay *rr;
+       int wrap, a, b, revoke;
+       int found = 0;
+
+       osi_list_foreach(tmp, &sd_revoke_list) {
+               rr = osi_list_entry(tmp, struct gfs2_revoke_replay, rr_list);
+               if (rr->rr_blkno == blkno) {
+                       found = 1;
+                       break;
+               }
+       }
+
+       if (!found)
+               return 0;
+
+       wrap = (rr->rr_where < sd_replay_tail);
+       a = (sd_replay_tail < where);
+       b = (where < rr->rr_where);
+       revoke = (wrap) ? (a || b) : (a && b);
+       return revoke;
+}
+
+void gfs2_revoke_clean(struct gfs2_sbd *sdp)
+{
+       osi_list_t *head = &sd_revoke_list;
+       struct gfs2_revoke_replay *rr;
+
+       while (!osi_list_empty(head)) {
+               rr = osi_list_entry(head->next, struct gfs2_revoke_replay, 
rr_list);
+               osi_list_del(&rr->rr_list);
+               free(rr);
+       }
+}
+
+static int buf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+                               struct gfs2_log_descriptor *ld, __be64 *ptr,
+                               int pass)
+{
+       struct gfs2_sbd *sdp = ip->i_sbd;
+       unsigned int blks = be32_to_cpu(ld->ld_data1);
+       struct gfs2_buffer_head *bh_log, *bh_ip;
+       uint64_t blkno;
+       int error = 0;
+       enum update_flags if_modified;
+
+       if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
+               return 0;
+
+       gfs2_replay_incr_blk(ip, &start);
+
+       for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+               uint32_t check_magic;
+
+               sd_found_metablocks++;
+
+               blkno = be64_to_cpu(*ptr++);
+               if (gfs2_revoke_check(sdp, blkno, start))
+                       continue;
+
+               error = gfs2_replay_read_block(ip, start, &bh_log);
+               if (error)
+                       return error;
+
+               bh_ip = bget(sdp, blkno);
+               memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+               check_magic = ((struct gfs2_meta_header *)
+                              (bh_ip->b_data))->mh_magic;
+               check_magic = be32_to_cpu(check_magic);
+               if (check_magic != GFS2_MAGIC) {
+                       if_modified = not_updated;
+                       error = -EIO;
+               } else
+                       if_modified = updated;
+
+               brelse(bh_log, not_updated);
+               brelse(bh_ip, if_modified);
+               if (error)
+                       break;
+
+               sd_replayed_metablocks++;
+       }
+       return error;
+}
+
+static int revoke_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+                                  struct gfs2_log_descriptor *ld, __be64 *ptr,
+                                  int pass)
+{
+       struct gfs2_sbd *sdp = ip->i_sbd;
+       unsigned int blks = be32_to_cpu(ld->ld_length);
+       unsigned int revokes = be32_to_cpu(ld->ld_data1);
+       struct gfs2_buffer_head *bh;
+       unsigned int offset;
+       uint64_t blkno;
+       int first = 1;
+       int error;
+
+       if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
+               return 0;
+
+       offset = sizeof(struct gfs2_log_descriptor);
+
+       for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+               error = gfs2_replay_read_block(ip, start, &bh);
+               if (error)
+                       return error;
+
+               if (!first) {
+                       if (gfs2_check_meta(bh, GFS2_METATYPE_LB))
+                               continue;
+               }
+               while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) {
+                       blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
+                       error = gfs2_revoke_add(sdp, blkno, start);
+                       if (error < 0)
+                               return error;
+                       else if (error)
+                               sd_found_revokes++;
+
+                       if (!--revokes)
+                               break;
+                       offset += sizeof(uint64_t);
+               }
+
+               brelse(bh, updated);
+               offset = sizeof(struct gfs2_meta_header);
+               first = 0;
+       }
+       return 0;
+}
+
+static int databuf_lo_scan_elements(struct gfs2_inode *ip, unsigned int start,
+                                   struct gfs2_log_descriptor *ld,
+                                   __be64 *ptr, int pass)
+{
+       struct gfs2_sbd *sdp = ip->i_sbd;
+       unsigned int blks = be32_to_cpu(ld->ld_data1);
+       struct gfs2_buffer_head *bh_log, *bh_ip;
+       uint64_t blkno;
+       uint64_t esc;
+       int error = 0;
+
+       if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
+               return 0;
+
+       gfs2_replay_incr_blk(ip, &start);
+       for (; blks; gfs2_replay_incr_blk(ip, &start), blks--) {
+               blkno = be64_to_cpu(*ptr++);
+               esc = be64_to_cpu(*ptr++);
+
+               sd_found_jblocks++;
+
+               if (gfs2_revoke_check(sdp, blkno, start))
+                       continue;
+
+               error = gfs2_replay_read_block(ip, start, &bh_log);
+               if (error)
+                       return error;
+
+               bh_ip = bget(sdp, blkno);
+               memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+               /* Unescape */
+               if (esc) {
+                       __be32 *eptr = (__be32 *)bh_ip->b_data;
+                       *eptr = cpu_to_be32(GFS2_MAGIC);
+               }
+
+               brelse(bh_log, not_updated);
+               brelse(bh_ip, updated);
+
+               sd_replayed_jblocks++;
+       }
+       return error;
+}
+
+/**
+ * foreach_descriptor - go through the active part of the log
+ * @ip: the journal incore inode
+ * @start: the first log header in the active region
+ * @end: the last log header (don't process the contents of this entry))
+ *
+ * Call a given function once for every log descriptor in the active
+ * portion of the log.
+ *
+ * Returns: errno
+ */
+
+int foreach_descriptor(struct gfs2_inode *ip, unsigned int start,
+                      unsigned int end, int pass)
+{
+       struct gfs2_buffer_head *bh;
+       struct gfs2_log_descriptor *ld;
+       int error = 0;
+       uint32_t length;
+       __be64 *ptr;
+       unsigned int offset = sizeof(struct gfs2_log_descriptor);
+       offset += sizeof(__be64) - 1;
+       offset &= ~(sizeof(__be64) - 1);
+
+       while (start != end) {
+               uint32_t check_magic;
+
+               error = gfs2_replay_read_block(ip, start, &bh);
+               if (error)
+                       return error;
+               check_magic = ((struct gfs2_meta_header *)
+                              (bh->b_data))->mh_magic;
+               check_magic = be32_to_cpu(check_magic);
+               if (check_magic != GFS2_MAGIC) {
+                       brelse(bh, updated);
+                       return -EIO;
+               }
+               ld = (struct gfs2_log_descriptor *)bh->b_data;
+               length = be32_to_cpu(ld->ld_length);
+
+               if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
+                       struct gfs2_log_header lh;
+
+                       error = get_log_header(ip, start, &lh);
+                       if (!error) {
+                               gfs2_replay_incr_blk(ip, &start);
+                               brelse(bh, updated);
+                               continue;
+                       }
+                       if (error == 1)
+                               error = -EIO;
+                       brelse(bh, updated);
+                       return error;
+               } else if (gfs2_check_meta(bh, GFS2_METATYPE_LD)) {
+                       brelse(bh, updated);
+                       return -EIO;
+               }
+               ptr = (__be64 *)(bh->b_data + offset);
+               error = databuf_lo_scan_elements(ip, start, ld, ptr, pass);
+               if (error) {
+                       brelse(bh, updated);
+                       return error;
+               }
+               error = buf_lo_scan_elements(ip, start, ld, ptr, pass);
+               if (error) {
+                       brelse(bh, updated);
+                       return error;
+               }
+               error = revoke_lo_scan_elements(ip, start, ld, ptr, pass);
+               if (error) {
+                       brelse(bh, updated);
+                       return error;
+               }
+
+               while (length--)
+                       gfs2_replay_incr_blk(ip, &start);
+
+               brelse(bh, updated);
+       }
+
+       return 0;
+}
+
+/**
+ * gfs2_recover_journal - recovery a given journal
+ * @ip: the journal incore inode
+ *
+ * Acquire the journal's lock, check to see if the journal is clean, and
+ * do recovery if necessary.
+ *
+ * Returns: errno
+ */
+
+int gfs2_recover_journal(struct gfs2_inode *ip, int j)
+{
+       struct gfs2_sbd *sdp = ip->i_sbd;
+       struct gfs2_log_header head;
+       unsigned int pass;
+       int error;
+
+       log_info("jid=%u: Looking at journal...\n", j);
+
+       osi_list_init(&sd_revoke_list);
+       error = gfs2_find_jhead(ip, &head);
+       if (error)
+               goto out;
+
+       if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT) {
+               log_info("jid=%u: Journal is clean.\n", j);
+               return 0;
+       }
+       if (query(&opts, "\nJournal #%d (\"journal%d\") is dirty.  Okay to 
replay it? (y/n)",
+                   j+1, j)) {
+               log_info("jid=%u: Replaying journal...\n", j);
+
+               sd_found_jblocks = sd_replayed_jblocks = 0;
+               sd_found_metablocks = sd_replayed_metablocks = 0;
+               sd_found_revokes = 0;
+               sd_replay_tail = head.lh_tail;
+               for (pass = 0; pass < 2; pass++) {
+                       error = foreach_descriptor(ip, head.lh_tail,
+                                                  head.lh_blkno, pass);
+                       if (error)
+                               goto out;
+               }
+               log_info("jid=%u: Found %u revoke tags\n", j,
+                        sd_found_revokes);
+               gfs2_revoke_clean(sdp);
+               error = clean_journal(ip, &head);
+               if (error)
+                       goto out;
+               log_err("jid=%u: Replayed %u of %u journaled data blocks\n",
+                       j, sd_replayed_jblocks, sd_found_jblocks);
+               log_err("jid=%u: Replayed %u of %u metadata blocks\n",
+                       j, sd_replayed_metablocks, sd_found_metablocks);
+       } else {
+               if (query(&opts, "Do you want to clear the dirty journal 
instead? (y/n)")) {
+                       write_journal(sdp, sdp->md.journal[j], j,
+                                     sdp->md.journal[j]->i_di.di_size /
+                                     sdp->sd_sb.sb_bsize);
+                       
+               } else
+                       log_err("jid=%u: Dirty journal not replayed or 
cleared.\n", j);
+       }
+
+out:
+       log_info("jid=%u: %s\n", j, (error) ? "Failed" : "Done");
+       return error;
+}
+
 /*
- * reconstruct_journals - write fresh journals
+ * replay_journals - replay the journals
  * sdp: the super block
  *
- * FIXME: it would be nice to get this to actually replay the journals
- * - there should be a flag to the fsck to enable/disable this
- * feature, and the fsck should probably fall back to clearing the
- * journal if an inconsitancy is found, but only for the bad journal
+ * There should be a flag to the fsck to enable/disable this
+ * feature.  The fsck falls back to clearing the journal if an 
+ * inconsistency is found, but only for the bad journal.
  *
  * Returns: 0 on success, -1 on failure
  */
-int reconstruct_journals(struct gfs2_sbd *sdp){
+int replay_journals(struct gfs2_sbd *sdp){
        int i;
 
-       log_notice("Clearing journals (this may take a while)");
-       for(i=0; i < sdp->md.journals; i++) {
-               /* Journal replay seems to have slowed down quite a bit in
-                * the gfs2_fsck */
+       log_notice("Recovering journals (this may take a while)");
+
+       /* Get master dinode */
+       sdp->master_dir = gfs2_load_inode(sdp,
+                                         sdp->sd_sb.sb_master_dir.no_addr);
+       gfs2_lookupi(sdp->master_dir, "jindex", 6, &sdp->md.jiinode);
+
+       /* read in the journal index data */
+       if (ji_update(sdp)){
+               log_err("Unable to read in jindex inode.\n");
+               return -1;
+       }
+
+       for(i = 0; i < sdp->md.journals; i++) {
                if((i % 2) == 0)
                        log_at_notice(".");
-               write_journal(sdp, sdp->md.journal[i], i,
-                                         sdp->md.journal[i]->i_di.di_size / 
sdp->sd_sb.sb_bsize);
-               /* Can't use d_di.di_blocks because that also includes 
metadata. */
+               gfs2_recover_journal(sdp->md.journal[i], i);
+               inode_put(sdp->md.journal[i],
+                         (opts.no ? not_updated : updated));
        }
-       log_notice("\nJournals cleared.\n");
+       log_notice("\nJournal recovery complete.\n");
+       inode_put(sdp->master_dir, not_updated);
+       inode_put(sdp->md.jiinode, not_updated);
+       /* Sync the buffers to disk so we get a fresh start. */
+       bsync(sdp);
        return 0;
 }
--- cluster/gfs2/fsck/fs_recovery.h     2006/06/06 14:49:31     1.2
+++ cluster/gfs2/fsck/fs_recovery.h     2007/10/19 15:06:56     1.3
@@ -16,7 +16,7 @@
 
 #include "libgfs2.h"
 
-int reconstruct_journals(struct gfs2_sbd *sdp);
+int replay_journals(struct gfs2_sbd *sdp);
 
 #endif /* __FS_RECOVERY_H__ */
 
--- cluster/gfs2/fsck/initialize.c      2007/06/26 01:43:17     1.9
+++ cluster/gfs2/fsck/initialize.c      2007/10/19 15:06:56     1.10
@@ -42,8 +42,7 @@
 int init_journals(struct gfs2_sbd *sbp)
 {
        if(!opts.no) {
-               /* ATTENTION -- Journal replay is not supported */
-               if(reconstruct_journals(sbp))
+               if(replay_journals(sbp))
                        return 1;
        }
        return 0;
@@ -192,51 +191,18 @@
 }
 
 /**
- * fill_super_block
- * @sdp:
+ * init_system_inodes
  *
  * Returns: 0 on success, -1 on failure
  */
-static int fill_super_block(struct gfs2_sbd *sdp)
+static int init_system_inodes(struct gfs2_sbd *sdp)
 {
-       uint32_t i;
-       char *buf;
        uint64_t inumbuf;
+       char *buf;
        struct gfs2_statfs_change sc;
        int rgcount;
-       uint64_t addl_mem_needed;
        enum rgindex_trust_level trust_lvl;
-
-       sync();
-
-       /********************************************************************
-        ***************** First, initialize all lists **********************
-        ********************************************************************/
-       log_info("Initializing lists...\n");
-       osi_list_init(&sdp->rglist);
-       osi_list_init(&sdp->buf_list);
-       for(i = 0; i < BUF_HASH_SIZE; i++) {
-               osi_list_init(&dir_hash[i]);
-               osi_list_init(&inode_hash[i]);
-               osi_list_init(&sdp->buf_hash[i]);
-       }
-
-       /********************************************************************
-        ************  next, read in on-disk SB and set constants  **********
-        ********************************************************************/
-       sdp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
-       sdp->bsize = sdp->sd_sb.sb_bsize;
-
-       if(sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize){
-               log_crit("GFS superblock is larger than the blocksize!\n");
-               log_debug("sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize\n");
-               return -1;
-       }
-
-       compute_constants(sdp);
-       if(read_sb(sdp) < 0){
-               return -1;
-       }
+       uint64_t addl_mem_needed;
 
        /*******************************************************************
         ******************  Initialize important inodes  ******************
@@ -298,9 +264,10 @@
        }
        if (trust_lvl > distrust) {
                log_err("RG recovery impossible; I can't fix this file 
system.\n");
-               goto fail;
+               return -1;
        }
        log_info("%u resource groups found.\n", rgcount);
+
        /*******************************************************************
         *******  Now, set boundary fields in the super block  *************
         *******************************************************************/
@@ -318,7 +285,6 @@
                goto fail;
        }
        return 0;
-
  fail:
        empty_super_block(sdp);
 
@@ -326,6 +292,50 @@
 }
 
 /**
+ * fill_super_block
+ * @sdp:
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int fill_super_block(struct gfs2_sbd *sdp)
+{
+       uint32_t i;
+
+       sync();
+
+       /********************************************************************
+        ***************** First, initialize all lists **********************
+        ********************************************************************/
+       log_info("Initializing lists...\n");
+       osi_list_init(&sdp->rglist);
+       osi_list_init(&sdp->buf_list);
+       for(i = 0; i < BUF_HASH_SIZE; i++) {
+               osi_list_init(&dir_hash[i]);
+               osi_list_init(&inode_hash[i]);
+               osi_list_init(&sdp->buf_hash[i]);
+       }
+
+       /********************************************************************
+        ************  next, read in on-disk SB and set constants  **********
+        ********************************************************************/
+       sdp->sd_sb.sb_bsize = GFS2_DEFAULT_BSIZE;
+       sdp->bsize = sdp->sd_sb.sb_bsize;
+
+       if(sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize){
+               log_crit("GFS superblock is larger than the blocksize!\n");
+               log_debug("sizeof(struct gfs2_sb) > sdp->sd_sb.sb_bsize\n");
+               return -1;
+       }
+
+       compute_constants(sdp);
+       if(read_sb(sdp) < 0){
+               return -1;
+       }
+
+       return 0;
+}
+
+/**
  * init_sbp - initialize superblock pointer
  *
  */
@@ -365,6 +375,9 @@
                return -1;
        }
 
+       if (init_system_inodes(sbp))
+               return -1;
+
        return 0;
 }
 
/cvs/cluster/cluster/gfs2/libgfs2/recovery.c,v  -->  standard output
revision 1.1
--- cluster/gfs2/libgfs2/recovery.c
+++ -   2007-10-19 15:06:57.244036000 +0000
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ *
+ */
+
+/*
+ * NOTE:
+ *
+ * This code was pilfered from the gfs2 kernel and adapted to userland.
+ * If you change this part, you should evaluate whether the upstream kernel
+ * version of recovery.c should be changed as well.  Likewise, if the
+ * upstream version changes, this part should be kept in sync.
+ * 
+ */
+
+#include <errno.h>
+#include <string.h>
+#include "libgfs2.h"
+
+void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk)
+{
+       uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+
+        if (++*blk == jd_blocks)
+                *blk = 0;
+}
+
+int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk,
+                          struct gfs2_buffer_head **bh)
+{
+       int new = 0;
+       uint64_t dblock;
+       uint32_t extlen;
+
+       block_map(ip, blk, &new, &dblock, &extlen, FALSE, not_updated);
+       if (!dblock)
+               return -EIO;
+
+       *bh = bread(ip->i_sbd, dblock);
+       return 0;
+}
+
+/**
+ * get_log_header - read the log header for a given segment
+ * @ip: the journal incore inode
+ * @blk: the block to look at
+ * @lh: the log header to return
+ *
+ * Read the log header for a given segement in a given journal.  Do a few
+ * sanity checks on it.
+ *
+ * Returns: 0 on success,
+ *          1 if the header was invalid or incomplete,
+ *          errno on error
+ */
+
+int get_log_header(struct gfs2_inode *ip, unsigned int blk,
+                  struct gfs2_log_header *head)
+{
+       struct gfs2_buffer_head *bh;
+       struct gfs2_log_header lh, *tmp;
+       uint32_t hash, saved_hash;
+       int error;
+
+       error = gfs2_replay_read_block(ip, blk, &bh);
+       if (error)
+               return error;
+
+       tmp = (struct gfs2_log_header *)bh->b_data;
+       saved_hash = tmp->lh_hash;
+       tmp->lh_hash = 0;
+       hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
+       tmp->lh_hash = saved_hash;
+       gfs2_log_header_in(&lh, bh->b_data);
+       brelse(bh, not_updated);
+
+       if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
+               return 1;
+
+       *head = lh;
+
+       return 0;
+}
+
+/**
+ * find_good_lh - find a good log header
+ * @ip: the journal incore inode
+ * @blk: the segment to start searching from
+ * @lh: the log header to fill in
+ * @forward: if true search forward in the log, else search backward
+ *
+ * Call get_log_header() to get a log header for a segment, but if the
+ * segment is bad, either scan forward or backward until we find a good one.
+ *
+ * Returns: errno
+ */
+
+int find_good_lh(struct gfs2_inode *ip, unsigned int *blk,
+                struct gfs2_log_header *head)
+{
+       unsigned int orig_blk = *blk;
+       int error;
+       uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+
+       for (;;) {
+               error = get_log_header(ip, *blk, head);
+               if (error <= 0)
+                       return error;
+
+               if (++*blk == jd_blocks)
+                       *blk = 0;
+
+               if (*blk == orig_blk)
+                       return -EIO;
+       }
+}
+
+/**
+ * jhead_scan - make sure we've found the head of the log
+ * @jd: the journal
+ * @head: this is filled in with the log descriptor of the head
+ *
+ * At this point, seg and lh should be either the head of the log or just
+ * before.  Scan forward until we find the head.
+ *
+ * Returns: errno
+ */
+
+int jhead_scan(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+       unsigned int blk = head->lh_blkno;
+       uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+       struct gfs2_log_header lh;
+       int error;
+
+       for (;;) {
+               if (++blk == jd_blocks)
+                       blk = 0;
+
+               error = get_log_header(ip, blk, &lh);
+               if (error < 0)
+                       return error;
+               if (error == 1)
+                       continue;
+
+               if (lh.lh_sequence == head->lh_sequence)
+                       return -EIO;
+               if (lh.lh_sequence < head->lh_sequence)
+                       break;
+
+               *head = lh;
+       }
+
+       return 0;
+}
+
+/**
+ * gfs2_find_jhead - find the head of a log
+ * @jd: the journal
+ * @head: the log descriptor for the head of the log is returned here
+ *
+ * Do a binary search of a journal and find the valid log entry with the
+ * highest sequence number.  (i.e. the log head)
+ *
+ * Returns: errno
+ */
+
+int gfs2_find_jhead(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+       struct gfs2_log_header lh_1, lh_m;
+       uint32_t blk_1, blk_2, blk_m;
+       uint32_t jd_blocks = ip->i_di.di_size / ip->i_sbd->sd_sb.sb_bsize;
+       int error;
+
+       blk_1 = 0;
+       blk_2 = jd_blocks - 1;
+
+       for (;;) {
+               blk_m = (blk_1 + blk_2) / 2;
+
+               error = find_good_lh(ip, &blk_1, &lh_1);
+               if (error)
+                       return error;
+
+               error = find_good_lh(ip, &blk_m, &lh_m);
+               if (error)
+                       return error;
+
+               if (blk_1 == blk_m || blk_m == blk_2)
+                       break;
+
+               if (lh_1.lh_sequence <= lh_m.lh_sequence)
+                       blk_1 = blk_m;
+               else
+                       blk_2 = blk_m;
+       }
+
+       error = jhead_scan(ip, &lh_1);
+       if (error)
+               return error;
+
+       *head = lh_1;
+
+       return error;
+}
+
+/**
+ * clean_journal - mark a dirty journal as being clean
+ * @sdp: the filesystem
+ * @jd: the journal
+ * @head: the head journal to start from
+ *
+ * Returns: errno
+ */
+
+int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head)
+{
+       unsigned int lblock;
+       struct gfs2_log_header *lh;
+       uint32_t hash, extlen;
+       struct gfs2_buffer_head *bh;
+       int new = 0;
+       uint64_t dblock;
+
+       lblock = head->lh_blkno;
+       gfs2_replay_incr_blk(ip, &lblock);
+       block_map(ip, lblock, &new, &dblock, &extlen, 0, not_updated);
+       if (!dblock)
+               return -EIO;
+
+       bh = bread(ip->i_sbd, dblock);
+       memset(bh->b_data, 0, bh->b_size);
+
+       lh = (struct gfs2_log_header *)bh->b_data;
+       memset(lh, 0, sizeof(struct gfs2_log_header));
+       lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+       lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
+       lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
+       lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
+       lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
+       lh->lh_blkno = cpu_to_be32(lblock);
+       hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
+       lh->lh_hash = cpu_to_be32(hash);
+
+       brelse(bh, updated);
+
+       return 0;
+}
+
--- cluster/gfs2/libgfs2/Makefile       2007/08/28 04:35:43     1.11
+++ cluster/gfs2/libgfs2/Makefile       2007/10/19 15:06:56     1.12
@@ -31,6 +31,7 @@
        gfs2_log.o \
        misc.o \
        ondisk.o \
+       recovery.o \
        size.o \
        structures.o \
        super.o \
--- cluster/gfs2/libgfs2/libgfs2.h      2007/10/17 20:16:35     1.20
+++ cluster/gfs2/libgfs2/libgfs2.h      2007/10/19 15:06:56     1.21
@@ -515,6 +515,22 @@
 void lock_for_admin(struct gfs2_sbd *sdp);
 void cleanup_metafs(struct gfs2_sbd *sdp);
 
+/* recovery.c */
+void gfs2_replay_incr_blk(struct gfs2_inode *ip, unsigned int *blk);
+int gfs2_replay_read_block(struct gfs2_inode *ip, unsigned int blk,
+                          struct gfs2_buffer_head **bh);
+int gfs2_revoke_add(struct gfs2_sbd *sdp, uint64_t blkno, unsigned int where);
+int gfs2_revoke_check(struct gfs2_sbd *sdp, uint64_t blkno,
+                     unsigned int where);
+void gfs2_revoke_clean(struct gfs2_sbd *sdp);
+int get_log_header(struct gfs2_inode *ip, unsigned int blk,
+                  struct gfs2_log_header *head);
+int find_good_lh(struct gfs2_inode *ip, unsigned int *blk,
+                struct gfs2_log_header *head);
+int jhead_scan(struct gfs2_inode *ip, struct gfs2_log_header *head);
+int gfs2_find_jhead(struct gfs2_inode *ip, struct gfs2_log_header *head);
+int clean_journal(struct gfs2_inode *ip, struct gfs2_log_header *head);
+
 /* rgrp.c */
 int gfs2_compute_bitstructs(struct gfs2_sbd *sdp, struct rgrp_list *rgd);
 struct rgrp_list *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk);

Reply via email to