Hi!

BEWARE: Don't use the following patch in production, it might eat your RAID
set for breakfest.

Attached is a patch which does 4 things:
- tries to solve cleanly the RAID superblock issues on non-x86 architectures
(where sizeof(md_super_t) was bigger than 4096, usually 4104) by introducing
RAID 0.91.x on-disk format which is binary compatible with the 0.90 for
i386 (and at the same time changes it from native-endian to little-endian).
- introduces reserved-bytes setting in raidtab, for which the default is
auto-probed by mkraid if not specified. If non-zero, the RAID array will
make sure first reserved_bytes on the disk are never touched (resynced or
whatever). This makes it possible e.g. to place RAID partition to cylinder 0
on a disk with Sun partition table.
- in raid1.c raid1_kmalloc allocated a wrong size
The patch is against 2.2.14 with 2.2.14-B1 RAID patch, because 2.3.99-pre2
is missing the raid1/5 bits. I can make try to port the remaining files
changes to 2.3.99-pre2 though.
- raidtab.5 man page fix

I'm looking for testers both on x86 and non-x86.

Cheers,
    Jakub
___________________________________________________________________
Jakub Jelinek | [EMAIL PROTECTED] | http://sunsite.mff.cuni.cz/~jj
Linux version 2.3.99-pre2 on a sparc64 machine (1343.49 BogoMips)
___________________________________________________________________
--- linux/arch/sparc64/kernel/ioctl32.c.jj      Mon Jan 24 11:36:41 2000
+++ linux/arch/sparc64/kernel/ioctl32.c Fri Mar 10 17:32:37 2000
@@ -2022,12 +2022,14 @@ asmlinkage int sys32_ioctl(unsigned int 
        
        /* 0x09 */
        case /* RAID_VERSION */         _IOR (MD_MAJOR, 0x10, char[12]):
-       case /* GET_ARRAY_INFO */       _IOR (MD_MAJOR, 0x11, char[72]):
+       case /* GET_ARRAY_INFO */       _IOR (MD_MAJOR, 0x11, char[128]):
+       case /* OLD_GET_ARRAY_INFO */   _IOR (MD_MAJOR, 0x11, char[72]):
        case /* GET_DISK_INFO */        _IOR (MD_MAJOR, 0x12, char[20]):
        case /* CLEAR_ARRAY */          _IO (MD_MAJOR, 0x20):
        case /* ADD_NEW_DISK */         _IOW (MD_MAJOR, 0x21, char[20]):
        case /* HOT_REMOVE_DISK */      _IO (MD_MAJOR, 0x22):
-       case /* SET_ARRAY_INFO */       _IOW (MD_MAJOR, 0x23, char[72]):
+       case /* SET_ARRAY_INFO */       _IOW (MD_MAJOR, 0x23, char[128]):
+       case /* OLD_SET_ARRAY_INFO */   _IOW (MD_MAJOR, 0x23, char[72]):
        case /* SET_DISK_INFO */        _IO (MD_MAJOR, 0x24):
        case /* WRITE_RAID_INFO */      _IO (MD_MAJOR, 0x25):
        case /* UNPROTECT_ARRAY */      _IO (MD_MAJOR, 0x26):
--- linux/drivers/block/md.c.jj Mon Jan 24 11:36:42 2000
+++ linux/drivers/block/md.c    Thu Mar 16 14:29:46 2000
@@ -11,6 +11,8 @@
    - kerneld support by Boris Tobotras <[EMAIL PROTECTED]>
    - kmod support by: Cyrus Durgin
    - RAID0 bugfixes: Mark Anthony Lisher <[EMAIL PROTECTED]>
+   - superblock layout on non-x86 fixes and reserved_bytes support by
+     Jakub Jelinek <[EMAIL PROTECTED]>
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -299,9 +301,18 @@ static unsigned int calc_dev_sboffset (k
        return size;
 }
 
+static inline unsigned int calc_dev_reserved (mddev_t *mddev)
+{
+       unsigned int reserved = mddev->sb->reserved_bytes;
+
+       reserved += mddev->sb->chunk_size - 1;
+       reserved &= ~(mddev->sb->chunk_size - 1);
+       return reserved / 1024;
+}
+
 static unsigned int calc_dev_size (kdev_t dev, mddev_t *mddev, int persistent)
 {
-       unsigned int size;
+       unsigned int size, reserved;
 
        size = calc_dev_sboffset(dev, mddev, persistent);
        if (!mddev->sb) {
@@ -310,9 +321,25 @@ static unsigned int calc_dev_size (kdev_
        }
        if (mddev->sb->chunk_size)
                size &= ~(mddev->sb->chunk_size/1024 - 1);
+       reserved = calc_dev_reserved (mddev);
+       if (reserved > size)
+               size = 0;
+       else
+               size -= reserved;
        return size;
 }
 
+__u64 __inline__ md_read_events (mdp_super_t *sb)
+{
+       return (((__u64)sb->eventshi) << 32) | sb->eventslo;
+}
+
+void __inline__ md_write_events (__u64 events, mdp_super_t *sb)
+{
+       sb->eventshi = events >> 32;
+       sb->eventslo = events;
+}
+
 /*
  * We check wether all devices are numbered from 0 to nb_dev-1. The
  * order is guaranteed even after device name changes.
@@ -376,28 +403,13 @@ abort:
        return 1;
 }
 
-static unsigned int zoned_raid_size (mddev_t *mddev)
+static inline unsigned int zoned_raid_size (mddev_t *mddev)
 {
-       unsigned int mask;
        mdk_rdev_t * rdev;
        struct md_list_head *tmp;
 
-       if (!mddev->sb) {
-               MD_BUG();
-               return -EINVAL;
-       }
-       /*
-        * do size and offset calculations.
-        */
-       mask = ~(mddev->sb->chunk_size/1024 - 1);
-printk("mask %08x\n", mask);
-
        ITERATE_RDEV(mddev,rdev,tmp) {
-printk(" rdev->size: %d\n", rdev->size);
-               rdev->size &= mask;
-printk(" masked rdev->size: %d\n", rdev->size);
                md_size[mdidx(mddev)] += rdev->size;
-printk("  new md_size: %d\n", md_size[mdidx(mddev)]);
        }
        return 0;
 }
@@ -492,6 +504,17 @@ static void mark_rdev_faulty (mdk_rdev_t
        restore_flags(flags);
 }
 
+static unsigned int calc_sb_csum (mdp_super_t * sb)
+{
+       unsigned int disk_csum, csum;
+
+       disk_csum = sb->sb_csum;
+       sb->sb_csum = 0;
+       csum = csum_partial((void *)sb, MD_SB_BYTES, 0);
+       sb->sb_csum = disk_csum;
+       return csum;
+}
+
 static int read_disk_sb (mdk_rdev_t * rdev)
 {
        int ret = -EINVAL;
@@ -518,13 +541,33 @@ static int read_disk_sb (mdk_rdev_t * rd
        bh = bread (dev, sb_offset / MD_SB_BLOCKS, MD_SB_BYTES);
 
        if (bh) {
-               sb = (mdp_super_t *) bh->b_data;
-               memcpy (rdev->sb, sb, MD_SB_BYTES);
+               sb = rdev->sb;
+               memcpy (sb, (mdp_super_t *) bh->b_data, MD_SB_BYTES);
+               if (sb->md_magic != MD_SB_MAGIC &&
+                   sb->md_magic == cpu_to_le32(MD_SB_MAGIC) &&
+                   (le32_to_cpu(sb->major_version) > 0 ||
+                    le32_to_cpu(sb->minor_version) > 90)) {
+                       int i;
+                       u32 *sbp = (u32 *) sb;
+
+                       for (i = 0; i < MD_SB_WORDS; i++, sbp++)
+                               le32_to_cpus(sbp);
+               }
+
+               rdev->csum_valid = calc_sb_csum(sb) == sb->sb_csum;
+
+               if (sb->major_version == 0 && sb->minor_version <= 90 &&
+                   sizeof(mdp_old_super_t) >= sizeof(mdp_super_t)) {
+                       /* Uh oh, 64bit events member moved half of the superblock */
+                       md_write_events(get_unaligned(&((mdp_old_super_t 
+*)sb)->events), sb);
+                       memmove(sb->gstate_sreserved, ((mdp_old_super_t 
+*)sb)->gstate_sreserved,
+                               (long)sb + MD_SB_BYTES - (long)(((mdp_old_super_t 
+*)sb)->gstate_sreserved));
+               }
        } else {
                printk (NO_SB,partition_name(rdev->dev));
                goto abort;
        }
-       printk(" [events: %08lx]\n", (unsigned long)get_unaligned(&rdev->sb->events));
+       printk(" [events: %08lx]\n", (unsigned long)md_read_events(rdev->sb));
        ret = 0;
 abort:
        if (bh)
@@ -532,17 +575,6 @@ abort:
        return ret;
 }
 
-static unsigned int calc_sb_csum (mdp_super_t * sb)
-{
-       unsigned int disk_csum, csum;
-
-       disk_csum = sb->sb_csum;
-       sb->sb_csum = 0;
-       csum = csum_partial((void *)sb, MD_SB_BYTES, 0);
-       sb->sb_csum = disk_csum;
-       return csum;
-}
-
 /*
  * Check one RAID superblock for generic plausibility
  */
@@ -569,7 +601,7 @@ static int check_disk_sb (mdk_rdev_t * r
                goto abort;
        }
 
-       if (calc_sb_csum(sb) != sb->sb_csum)
+       if (!rdev->csum_valid)
                printk(BAD_CSUM, partition_name(rdev->dev));
        ret = 0;
 abort:
@@ -767,7 +799,7 @@ static void print_sb(mdp_super_t *sb)
        printk("     UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%08lx\n",
                sb->utime, sb->state, sb->active_disks, sb->working_disks,
                sb->failed_disks, sb->spare_disks,
-               sb->sb_csum, (unsigned long)get_unaligned(&sb->events));
+               sb->sb_csum, (unsigned long)md_read_events(sb));
 
        for (i = 0; i < MD_SB_DISKS; i++) {
                mdp_disk_t *desc;
@@ -827,16 +859,16 @@ static int sb_equal ( mdp_super_t *sb1, 
        int ret;
        mdp_super_t *tmp1, *tmp2;
 
-       tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
-       tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
+       tmp1 = kmalloc(MD_SB_GENERIC_CONSTANT_WORDS * 4,GFP_KERNEL);
+       tmp2 = kmalloc(MD_SB_GENERIC_CONSTANT_WORDS * 4,GFP_KERNEL);
 
        if (!tmp1 || !tmp2) {
                ret = 0;
                goto abort;
        }
 
-       *tmp1 = *sb1;
-       *tmp2 = *sb2;
+       memcpy(tmp1, sb1, MD_SB_GENERIC_CONSTANT_WORDS * 4);
+       memcpy(tmp2, sb2, MD_SB_GENERIC_CONSTANT_WORDS * 4);
 
        /*
         * nr_disks is not constant
@@ -935,7 +967,25 @@ static int write_disk_sb(mdk_rdev_t * rd
        }
        memset(bh->b_data,0,bh->b_size);
        sb = (mdp_super_t *) bh->b_data;
-       memcpy(sb, rdev->sb, MD_SB_BYTES);
+       if (rdev->sb->major_version == 0 &&
+           rdev->sb->minor_version <= 90 &&
+           sizeof(mdp_old_super_t) >= sizeof(mdp_super_t)) {
+               /* Uh oh, 64bit events member moved half of the superblock */
+               memcpy(sb, rdev->sb, (long)&sb->eventslo - (long)sb);
+               put_unaligned(md_read_events(rdev->sb), &((mdp_old_super_t 
+*)sb)->events);
+               memcpy(((mdp_old_super_t *)sb)->gstate_sreserved, 
+rdev->sb->gstate_sreserved,
+                      (long)sb + MD_SB_BYTES - (long)(((mdp_old_super_t 
+*)sb)->gstate_sreserved));
+       } else
+               memcpy(sb, rdev->sb, MD_SB_BYTES);
+       sb->sb_csum = calc_sb_csum(sb);
+       if (rdev->sb->major_version > 0 ||
+           rdev->sb->minor_version > 90) {
+               int i;
+               u32 *sbp = (u32 *) sb;
+
+               for (i = 0; i < MD_SB_WORDS; i++, sbp++)
+                       cpu_to_le32s(sbp);
+       }
 
        mark_buffer_uptodate(bh, 1);
        mark_buffer_dirty(bh, 1);
@@ -985,9 +1035,9 @@ static int sync_sbs(mddev_t * mddev)
                if (rdev->faulty)
                        continue;
                sb = rdev->sb;
-               *sb = *mddev->sb;
+               memcpy(sb, mddev->sb, MD_SB_BYTES);
                set_this_disk(mddev, rdev);
-               sb->sb_csum = calc_sb_csum(sb);
+               rdev->csum_valid = 1;
        }
        return 0;
 }
@@ -1001,9 +1051,9 @@ int md_update_sb(mddev_t * mddev)
 
 repeat:
        mddev->sb->utime = CURRENT_TIME;
-       ev = get_unaligned(&mddev->sb->events);
+       ev = md_read_events(mddev->sb);
        ++ev;
-       put_unaligned(ev,&mddev->sb->events);
+       md_write_events(ev,mddev->sb);
        if (ev == (__u64)0) {
                /*
                 * oops, this 64-bit counter should never wrap.
@@ -1012,7 +1062,7 @@ repeat:
                 */
                MD_BUG();
                --ev;
-               put_unaligned(ev,&mddev->sb->events);
+               md_write_events(ev,mddev->sb);
        }
        sync_sbs(mddev);
 
@@ -1038,7 +1088,7 @@ repeat:
                printk("%s ", partition_name(rdev->dev));
                if (!rdev->faulty) {
                        printk("[events: %08lx]",
-                              (unsigned long)get_unaligned(&rdev->sb->events));
+                              (unsigned long)md_read_events(rdev->sb));
                        err += write_disk_sb(rdev);
                } else
                        printk(")\n");
@@ -1124,7 +1174,8 @@ static int md_import_device (kdev_t newd
                rdev->old_dev = MKDEV(rdev->sb->this_disk.major,
                                        rdev->sb->this_disk.minor);
                rdev->desc_nr = rdev->sb->this_disk.number;
-       }
+       } else
+               rdev->csum_valid = 1;
        md_list_add(&rdev->all, &all_raid_disks);
        MD_INIT_LIST_HEAD(&rdev->pending);
 
@@ -1220,16 +1271,16 @@ static int analyze_sbs (mddev_t * mddev)
                 * only as a last resort. (decrease it's age by
                 * one event)
                 */
-               if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) {
-                       __u64 ev = get_unaligned(&rdev->sb->events);
+               if (!rdev->csum_valid) {
+                       __u64 ev = md_read_events(rdev->sb);
                        if (ev != (__u64)0) {
                                --ev;
-                               put_unaligned(ev,&rdev->sb->events);
+                               md_write_events(ev,rdev->sb);
                        }
                }
 
                printk("%s's event counter: %08lx\n", partition_name(rdev->dev),
-                      (unsigned long)get_unaligned(&rdev->sb->events));
+                      (unsigned long)md_read_events(rdev->sb));
                if (!freshest) {
                        freshest = rdev;
                        continue;
@@ -1237,8 +1288,8 @@ static int analyze_sbs (mddev_t * mddev)
                /*
                 * Find the newest superblock version
                 */
-               ev1 = get_unaligned(&rdev->sb->events);
-               ev2 = get_unaligned(&freshest->sb->events);
+               ev1 = md_read_events(rdev->sb);
+               ev2 = md_read_events(freshest->sb);
                if (ev1 != ev2) {
                        out_of_date = 1;
                        if (ev1 > ev2)
@@ -1249,7 +1300,7 @@ static int analyze_sbs (mddev_t * mddev)
                printk(OUT_OF_DATE);
                printk("freshest: %s\n", partition_name(freshest->dev));
        }
-       memcpy (sb, freshest->sb, sizeof(*sb));
+       memcpy (sb, freshest->sb, MD_SB_BYTES);
 
        /*
         * at this point we have picked the 'best' superblock
@@ -1262,8 +1313,8 @@ static int analyze_sbs (mddev_t * mddev)
                 * Kick all non-fresh devices faulty
                 */
                __u64 ev1, ev2;
-               ev1 = get_unaligned(&rdev->sb->events);
-               ev2 = get_unaligned(&sb->events);
+               ev1 = md_read_events(rdev->sb);
+               ev2 = md_read_events(sb);
                ++ev1;
                if (ev1 < ev2) {
                        printk("md: kicking non-fresh %s from array!\n",
@@ -1283,8 +1334,8 @@ static int analyze_sbs (mddev_t * mddev)
                        MD_BUG();
                        goto abort;
                }
-               ev1 = get_unaligned(&rdev->sb->events);
-               ev2 = get_unaligned(&sb->events);
+               ev1 = md_read_events(rdev->sb);
+               ev2 = md_read_events(sb);
                ev3 = ev2;
                --ev3;
                if ((rdev->dev != rdev->old_dev) &&
@@ -1451,7 +1502,7 @@ abort:
 
 static int device_size_calculation (mddev_t * mddev)
 {
-       int data_disks = 0, persistent;
+       int data_disks, persistent;
        unsigned int readahead;
        mdp_super_t *sb = mddev->sb;
        struct md_list_head *tmp;
@@ -1463,6 +1514,7 @@ static int device_size_calculation (mdde
         * because device size has to be modulo chunk_size)
         */ 
        persistent = !mddev->sb->not_persistent;
+       mddev->reserved = calc_dev_reserved(mddev);
        ITERATE_RDEV(mddev,rdev,tmp) {
                if (rdev->faulty)
                        continue;
@@ -1480,23 +1532,18 @@ static int device_size_calculation (mdde
                }
        }
 
+       data_disks = 1;
        switch (sb->level) {
                case -3:
-                       data_disks = 1;
-                       break;
                case -2:
-                       data_disks = 1;
-                       break;
-               case -1:
-                       zoned_raid_size(mddev);
-                       data_disks = 1;
                        break;
                case 0:
-                       zoned_raid_size(mddev);
                        data_disks = sb->raid_disks;
+                       /* Fall through */
+               case -1:
+                       zoned_raid_size(mddev);
                        break;
                case 1:
-                       data_disks = 1;
                        break;
                case 4:
                case 5:
@@ -2086,7 +2133,7 @@ static int get_version (void * arg)
 }
 
 #define SET_FROM_SB(x) info.x = mddev->sb->x
-static int get_array_info (mddev_t * mddev, void * arg)
+static int get_array_info (mddev_t * mddev, unsigned int cmd, void * arg)
 {
        mdu_array_info_t info;
 
@@ -2103,6 +2150,7 @@ static int get_array_info (mddev_t * mdd
        SET_FROM_SB(raid_disks);
        SET_FROM_SB(md_minor);
        SET_FROM_SB(not_persistent);
+       SET_FROM_SB(reserved_bytes);
 
        SET_FROM_SB(utime);
        SET_FROM_SB(state);
@@ -2114,7 +2162,10 @@ static int get_array_info (mddev_t * mdd
        SET_FROM_SB(layout);
        SET_FROM_SB(chunk_size);
 
-       if (md_copy_to_user(arg, &info, sizeof(info)))
+       if (cmd == OLD_GET_ARRAY_INFO) {
+               if (md_copy_to_user(arg, &info, sizeof(mdu_old_array_info_t)))
+                       return -EFAULT;
+       } else if (md_copy_to_user(arg, &info, sizeof(info)))
                return -EFAULT;
 
        return 0;
@@ -2403,7 +2454,7 @@ abort_export:
 }
 
 #define SET_SB(x) mddev->sb->x = info.x
-static int set_array_info (mddev_t * mddev, void * arg)
+static int set_array_info (mddev_t * mddev, unsigned int cmd, void * arg)
 {
        mdu_array_info_t info;
 
@@ -2413,14 +2464,30 @@ static int set_array_info (mddev_t * mdd
                return -EBUSY;
        }
 
-       if (md_copy_from_user(&info, arg, sizeof(info)))
+       if (cmd == OLD_SET_ARRAY_INFO) {
+               memset(&info, 0, sizeof(info));
+               if (md_copy_from_user(&info, arg,
+                                       sizeof(mdu_old_array_info_t)))
+                       return -EFAULT;
+       } else if (md_copy_from_user(&info, arg, sizeof(info)))
                return -EFAULT;
 
        if (alloc_array_sb(mddev))
                return -ENOMEM;
 
        mddev->sb->major_version = MD_MAJOR_VERSION;
-       mddev->sb->minor_version = MD_MINOR_VERSION;
+       if (MD_MAJOR_VERSION == 0 && MD_MINOR_VERSION == 91 &&
+           sizeof(mdp_old_super_t) == sizeof(mdp_super_t) &&
+           !info.reserved_bytes)
+               /* Change between 0.90 and 0.91 is relevant only
+                * to architectures where 0.90 superblock was longer
+                * than MD_SB_BYTES (unless non-zero reserved_bytes is
+                * used), so lets do users of ia32 a favor
+                * and stay up and down compatible.
+                */
+               mddev->sb->minor_version = 90;
+       else
+               mddev->sb->minor_version = MD_MINOR_VERSION;
        mddev->sb->patch_version = MD_PATCHLEVEL_VERSION;
        mddev->sb->ctime = CURRENT_TIME;
 
@@ -2440,6 +2507,9 @@ static int set_array_info (mddev_t * mdd
        SET_SB(layout);
        SET_SB(chunk_size);
 
+       if (info.major_version > 0 || info.minor_version >= 91)
+               SET_SB(reserved_bytes);
+
        mddev->sb->md_magic = MD_SB_MAGIC;
 
        /*
@@ -2567,6 +2637,7 @@ static int md_ioctl (struct inode *inode
        switch (cmd)
        {
                case SET_ARRAY_INFO:
+               case OLD_SET_ARRAY_INFO:
                case START_ARRAY:
                        if (mddev) {
                                printk("array md%d already exists!\n",
@@ -2580,6 +2651,7 @@ static int md_ioctl (struct inode *inode
        switch (cmd)
        {
                case SET_ARRAY_INFO:
+               case OLD_SET_ARRAY_INFO:
                        mddev = alloc_mddev(dev);
                        if (!mddev) {
                                err = -ENOMEM;
@@ -2593,7 +2665,7 @@ static int md_ioctl (struct inode *inode
                                printk("ioctl, reason %d, cmd %d\n", err, cmd);
                                goto abort;
                        }
-                       err = set_array_info(mddev, (void *)arg);
+                       err = set_array_info(mddev, cmd, (void *)arg);
                        if (err) {
                                printk("couldnt set array info. %d\n", err);
                                goto abort;
@@ -2635,7 +2707,8 @@ static int md_ioctl (struct inode *inode
        switch (cmd)
        {
                case GET_ARRAY_INFO:
-                       err = get_array_info(mddev, (void *)arg);
+               case OLD_GET_ARRAY_INFO:
+                       err = get_array_info(mddev, cmd, (void *)arg);
                        goto done_unlock;
 
                case GET_DISK_INFO:
@@ -3895,7 +3968,7 @@ md__initfunc(void do_md_setup(char *str,
                chunk_size = ints[i++]; /* Chunksize  */
                fault = ints[i++]; /* Faultlevel */
    
-               pers = pers | chunk_size | (fault << FAULT_SHIFT);   
+               pers = pers | chunk_size | (fault << FAULT_SHIFT);
    
                while( str && (dev = name_to_kdev_t(str))) {
                        do_md_add (minor, dev);
@@ -4018,7 +4091,8 @@ static void md_geninit (struct gendisk *
                md_gendisk.part[i].nr_sects = 0;
        }
 
-       printk("md.c: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
+       if (sizeof(mdp_super_t) != MD_SB_BYTES)
+               panic("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
 
        blksize_size[MD_MAJOR] = md_blocksizes;
        md_set_global_readahead(md_maxreadahead);
@@ -4027,4 +4101,3 @@ static void md_geninit (struct gendisk *
        proc_register(&proc_root, &proc_md);
 #endif
 }
-
--- linux/drivers/block/linear.c.jj     Mon Oct  4 14:21:20 1999
+++ linux/drivers/block/linear.c        Thu Mar  9 15:09:46 2000
@@ -151,7 +151,7 @@ static int linear_map (mddev_t *mddev, k
                block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset);
   
        *rdev = tmp_dev->dev;
-       *rsector = (block - tmp_dev->offset) << 1;
+       *rsector = (block - tmp_dev->offset + mddev->reserved) << 1;
 
        return 0;
 }
--- linux/drivers/block/raid0.c.jj      Mon Jan 24 11:36:42 2000
+++ linux/drivers/block/raid0.c Fri Mar 10 14:58:01 2000
@@ -143,8 +143,8 @@ static int raid0_run (mddev_t *mddev)
        printk("raid0 : nb_zone is %d.\n", nb_zone);
        conf->nr_zones = nb_zone;
 
-       printk("raid0 : Allocating %d bytes for hash.\n",
-                               sizeof(struct raid0_hash)*nb_zone);
+       printk("raid0 : Allocating %ld bytes for hash.\n",
+                               (long)sizeof(struct raid0_hash)*nb_zone);
 
        conf->hash_table = vmalloc (sizeof (struct raid0_hash)*nb_zone);
        if (!conf->hash_table)
@@ -229,7 +229,7 @@ static int raid0_map (mddev_t *mddev, kd
        struct strip_zone *zone;
        mdk_rdev_t *tmp_dev;
        int blk_in_chunk, chunksize_bits, chunk, chunk_size;
-       long block, rblock;
+       unsigned long block, rblock;
 
        chunk_size = mddev->param.chunk_size >> 10;
        chunksize_bits = ffz(~chunk_size);
@@ -237,7 +237,7 @@ static int raid0_map (mddev_t *mddev, kd
        hash = conf->hash_table + block / conf->smallest->size;
 
        if (hash - conf->hash_table > conf->nr_zones) {
-               printk(KERN_DEBUG "raid0_map: invalid block %ul\n", block);
+               printk(KERN_DEBUG "raid0_map: invalid block %lu\n", block);
                return -1;
        }
 
@@ -261,7 +261,7 @@ static int raid0_map (mddev_t *mddev, kd
        blk_in_chunk = block & (chunk_size -1);
        chunk = (block - zone->zone_offset) / (zone->nb_dev << chunksize_bits);
        tmp_dev = zone->dev[(block >> chunksize_bits) % zone->nb_dev];
-       rblock = (chunk << chunksize_bits) + blk_in_chunk + zone->dev_offset;
+       rblock = (chunk << chunksize_bits) + blk_in_chunk + zone->dev_offset + 
+mddev->reserved;
   
        *rdev = tmp_dev->dev;
        *rsector = rblock << 1;
--- linux/drivers/block/raid5.c.jj      Mon Oct  4 14:22:12 1999
+++ linux/drivers/block/raid5.c Thu Mar  9 17:24:46 2000
@@ -586,7 +586,7 @@ static void raid5_build_block (struct st
        mddev_t *mddev = conf->mddev;
        char *b_data;
        kdev_t dev = mddev_to_kdev(mddev);
-       int block = sh->sector / (sh->size >> 9);
+       int block = sh->sector / (sh->size >> 9) + (mddev->reserved << 1);
 
        b_data = ((volatile struct buffer_head *) bh)->b_data;
        memset (bh, 0, sizeof (struct buffer_head));
@@ -1462,7 +1462,7 @@ static int __check_consistency (mddev_t 
 
 static int check_consistency (mddev_t *mddev)
 {
-       if (__check_consistency(mddev, 0))
+       if (__check_consistency(mddev, mddev->reserved))
 /*
  * We are not checking this currently, as it's legitimate to have
  * an inconsistent array, at creation time.
--- linux/drivers/block/raid1.c.jj      Mon Oct  4 14:22:09 1999
+++ linux/drivers/block/raid1.c Thu Mar 16 18:15:08 2000
@@ -40,7 +40,7 @@ static void * raid1_kmalloc (int size)
         * simply can not afford to fail an allocation because
         * there is no failure return path (eg. make_request())
         */
-       while (!(ptr = kmalloc (sizeof (raid1_conf_t), GFP_KERNEL)))
+       while (!(ptr = kmalloc (size, GFP_KERNEL)))
                printk ("raid1: out of memory, retrying...\n");
 
        memset(ptr, 0, size);
@@ -266,6 +266,7 @@ static int raid1_make_request (mddev_t *
                memcpy(bh_req, bh, sizeof(*bh));
                bh_req->b_end_io = raid1_end_request;
                bh_req->b_dev_id = r1_bh;
+               bh_req->b_rsector += (mddev->reserved << 1);
                map_and_make_request (rw, bh_req);
                return 0;
        }
@@ -311,7 +312,7 @@ static int raid1_make_request (mddev_t *
                mirror_bh[i]->b_blocknr    = bh->b_blocknr;
                mirror_bh[i]->b_dev        = bh->b_dev;
                mirror_bh[i]->b_rdev       = conf->mirrors[i].dev;
-               mirror_bh[i]->b_rsector    = bh->b_rsector;
+               mirror_bh[i]->b_rsector    = bh->b_rsector + (mddev->reserved << 1);
                mirror_bh[i]->b_state      = (1<<BH_Req) | (1<<BH_Dirty);
                if (lowprio)
                        mirror_bh[i]->b_state |= (1<<BH_LowPrio);
@@ -866,7 +867,7 @@ static int __check_consistency (mddev_t 
 
 static int check_consistency (mddev_t *mddev)
 {
-       if (__check_consistency(mddev, 0))
+       if (__check_consistency(mddev, mddev->reserved))
 /*
  * we do not do this currently, as it's perfectly possible to
  * have an inconsistent array when it's freshly created. Only
--- linux/include/linux/raid/md_p.h.jj  Mon Oct  4 15:41:29 1999
+++ linux/include/linux/raid/md_p.h     Fri Mar 10 09:55:05 2000
@@ -115,8 +115,9 @@ typedef struct mdp_superblock_s {
        __u32 not_persistent;   /* 12 does it have a persistent superblock    */
        __u32 set_uuid1;        /* 13 Raid set identifier #2                  */
        __u32 set_uuid2;        /* 14 Raid set identifier #3                  */
-       __u32 set_uuid3;        /* 14 Raid set identifier #4                  */
-       __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16];
+       __u32 set_uuid3;        /* 15 Raid set identifier #4                  */
+       __u32 reserved_bytes;   /* 16 # of reserv. bytes at start of disks    */
+       __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 17];
 
        /*
         * Generic state information
@@ -128,7 +129,8 @@ typedef struct mdp_superblock_s {
        __u32 failed_disks;     /*  4 Number of failed disks                  */
        __u32 spare_disks;      /*  5 Number of spare disks                   */
        __u32 sb_csum;          /*  6 checksum of the whole superblock        */
-       __u64 events;           /*  7 number of superblock updates (64-bit!)  */
+       __u32 eventslo;         /*  7 number of superblock updates (low bits) */
+       __u32 eventshi;         /*  8 number of superblock updates (high bits)*/
        __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];
 
        /*
@@ -156,6 +158,47 @@ typedef struct mdp_superblock_s {
        mdp_disk_t this_disk;
 
 } mdp_super_t;
+
+typedef struct mdp_old_superblock_s {
+       /*
+        * Constant generic information
+        */
+       __u32 gstate_c[MD_SB_GENERIC_CONSTANT_WORDS];
+
+       /*
+        * Generic state information
+        */
+       __u32 utime;            /*  0 Superblock update time                  */
+       __u32 state;            /*  1 State bits (clean, ...)                 */
+       __u32 active_disks;     /*  2 Number of currently active disks        */
+       __u32 working_disks;    /*  3 Number of working disks                 */
+       __u32 failed_disks;     /*  4 Number of failed disks                  */
+       __u32 spare_disks;      /*  5 Number of spare disks                   */
+       __u32 sb_csum;          /*  6 checksum of the whole superblock        */
+       __u64 events;           /*  7(8) number of superblock updates (64bit) */
+       __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];
+
+       /*
+        * Personality information
+        */
+       __u32 pstate_c[MD_SB_PERSONALITY_WORDS];
+
+       /*
+        * Disks information
+        */
+       mdp_disk_t disks[MD_SB_DISKS];
+
+       /*
+        * Reserved
+        */
+       __u32 reserved[MD_SB_RESERVED_WORDS];
+
+       /*
+        * Active descriptor
+        */
+       mdp_disk_t this_disk;
+
+} mdp_old_super_t;
 
 #endif _MD_P_H
 
--- linux/include/linux/raid/md.h.jj    Wed Feb 16 08:06:11 2000
+++ linux/include/linux/raid/md.h       Fri Mar 10 14:30:47 2000
@@ -57,7 +57,7 @@
  * Different patchlevel versions are downward and upward compatible.
  */
 #define MD_MAJOR_VERSION                0
-#define MD_MINOR_VERSION                90
+#define MD_MINOR_VERSION                91
 #define MD_PATCHLEVEL_VERSION           0
 
 extern int md_size[MAX_MD_DEVS];
--- linux/include/linux/raid/md_k.h.jj  Mon Oct  4 15:41:29 1999
+++ linux/include/linux/raid/md_k.h     Thu Mar  9 14:15:22 2000
@@ -169,6 +169,7 @@ struct mdk_rdev_s
 
        mdp_super_t *sb;
        int sb_offset;
+       int csum_valid;
 
        int faulty;                     /* if faulty do not issue IO requests */
        int desc_nr;                    /* descriptor index in the superblock */
@@ -197,6 +198,7 @@ struct mddev_s
        int                             sb_dirty;
        mdu_param_t                     param;
        int                             ro;
+       int                             reserved;
        unsigned int                    curr_resync;
        unsigned long                   resync_start;
        char                            *name;
--- linux/include/linux/raid/md_u.h.jj  Mon Oct  4 15:41:29 1999
+++ linux/include/linux/raid/md_u.h     Thu Mar  9 14:50:12 2000
@@ -20,6 +20,7 @@
 /* status */
 #define RAID_VERSION           _IOR (MD_MAJOR, 0x10, mdu_version_t)
 #define GET_ARRAY_INFO         _IOR (MD_MAJOR, 0x11, mdu_array_info_t)
+#define OLD_GET_ARRAY_INFO     _IOR (MD_MAJOR, 0x11, mdu_old_array_info_t)
 #define GET_DISK_INFO          _IOR (MD_MAJOR, 0x12, mdu_disk_info_t)
 #define PRINT_RAID_DEBUG       _IO (MD_MAJOR, 0x13)
 
@@ -28,6 +29,7 @@
 #define ADD_NEW_DISK           _IOW (MD_MAJOR, 0x21, mdu_disk_info_t)
 #define HOT_REMOVE_DISK                _IO (MD_MAJOR, 0x22)
 #define SET_ARRAY_INFO         _IOW (MD_MAJOR, 0x23, mdu_array_info_t)
+#define OLD_SET_ARRAY_INFO     _IOW (MD_MAJOR, 0x23, mdu_old_array_info_t)
 #define SET_DISK_INFO          _IO (MD_MAJOR, 0x24)
 #define WRITE_RAID_INFO                _IO (MD_MAJOR, 0x25)
 #define UNPROTECT_ARRAY                _IO (MD_MAJOR, 0x26)
@@ -77,9 +79,53 @@ typedef struct mdu_array_info_s {
         * Personality information
         */
        int layout;             /*  0 the array's physical layout             */
-       int chunk_size; /*  1 chunk size in bytes                     */
+       int chunk_size;         /*  1 chunk size in bytes                     */
+
+       int reserved_bytes;     /*  Number of reserved bytes at the beginning */
+
+       /*
+        * The meaning of these fields can be specified later on
+        * and will be dependent on major/minor version specified
+        * in this structure.
+        * This is so that the ioctl number does not have to change with
+        * every field addition.
+        */
+       int reserved[32 - 19];
 
 } mdu_array_info_t;
+
+typedef struct mdu_old_array_info_s {
+       /*
+        * Generic constant information
+        */
+       int major_version;
+       int minor_version;
+       int patch_version;
+       int ctime;
+       int level;
+       int size;
+       int nr_disks;
+       int raid_disks;
+       int md_minor;
+       int not_persistent;
+
+       /*
+        * Generic state information
+        */
+       int utime;              /*  0 Superblock update time                  */
+       int state;              /*  1 State bits (clean, ...)                 */
+       int active_disks;       /*  2 Number of currently active disks        */
+       int working_disks;      /*  3 Number of working disks                 */
+       int failed_disks;       /*  4 Number of failed disks                  */
+       int spare_disks;        /*  5 Number of spare disks                   */
+
+       /*
+        * Personality information
+        */
+       int layout;             /*  0 the array's physical layout             */
+       int chunk_size;         /*  1 chunk size in bytes                     */
+
+} mdu_old_array_info_t;
 
 typedef struct mdu_disk_info_s {
        /*
--- linux/include/linux/raid/raid1.h.jj Fri Mar 10 14:32:31 2000
+++ linux/include/linux/raid/raid1.h    Thu Mar 16 14:29:13 2000
@@ -29,8 +29,8 @@ struct raid1_private_data {
        int                     last_used;
        unsigned long           next_sect;
        int                     sect_count;
-       mdk_thread_t            *thread, *resync_thread;
        int                     resync_mirrors;
+       mdk_thread_t            *thread, *resync_thread;
        struct mirror_info      *spare;
 };
 
--- raidtools-0.90/md-int.h.jj  Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/md-int.h     Fri Mar 10 14:10:31 2000
@@ -20,6 +20,7 @@
 /* don't include the kernel RAID header! */
 #define _MD_H
 
+typedef unsigned long long md_u64;
 typedef unsigned int md_u32;
 typedef unsigned short md_u16;
 typedef unsigned char md_u8;
@@ -52,6 +53,7 @@ struct md_version {
 /* status */
 #define RAID_VERSION            _IOR (MD_MAJOR, 0x10, struct md_version)
 #define GET_ARRAY_INFO          _IOR (MD_MAJOR, 0x11, md_array_info_t)
+#define OLD_GET_ARRAY_INFO     _IOR (MD_MAJOR, 0x11, md_old_array_info_t)
 #define GET_DISK_INFO           _IOR (MD_MAJOR, 0x12, md_disk_info_t)
 #define PRINT_RAID_DEBUG        _IO (MD_MAJOR, 0x13)
 
@@ -60,6 +62,8 @@ struct md_version {
 #define ADD_NEW_DISK            _IOW (MD_MAJOR, 0x21, md_disk_info_t)
 #define HOT_REMOVE_DISK         _IO (MD_MAJOR, 0x22)
 #define SET_ARRAY_INFO          _IOW (MD_MAJOR, 0x23, md_array_info_t)
+#define OLD_SET_ARRAY_INFO     _IOW (MD_MAJOR, 0x23, md_old_array_info_t)
+
 #define SET_DISK_INFO           _IO (MD_MAJOR, 0x24)
 #define WRITE_RAID_INFO         _IO (MD_MAJOR, 0x25)
 #define UNPROTECT_ARRAY         _IO (MD_MAJOR, 0x26)
@@ -176,14 +180,19 @@ typedef struct md_superblock_s {
        md_u32 minor_version;   /*  2 minor version ...                       */
        md_u32 patch_version;   /*  3 patchlevel version ...                  */
        md_u32 gvalid_words;    /*  4 Number of used words in this section    */
-       md_u32 set_magic;       /*  5 Raid set identifier                     */
+       md_u32 set_uuid0;       /*  5 Raid set identifier                     */
        md_u32 ctime;           /*  6 Creation time                           */
        md_u32 level;           /*  7 Raid personality                        */
        md_u32 size;            /*  8 Apparent size of each individual disk   */
        md_u32 nr_disks;        /*  9 total disks in the raid set             */
        md_u32 raid_disks;      /* 10 disks in a fully functional raid set    */
        md_u32 md_minor;        /* 11 preferred MD minor device number        */
-       md_u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 12];
+       md_u32 not_persistent;  /* 12 does it have a persistent superblock    */
+       md_u32 set_uuid1;       /* 13 Raid set identifier #2                  */
+       md_u32 set_uuid2;       /* 14 Raid set identifier #3                  */
+       md_u32 set_uuid3;       /* 15 Raid set identifier #4                  */
+       md_u32 reserved_bytes;  /* 16 # of reserv. bytes at start of disks    */
+       md_u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 17];
 
        /*
         * Generic state information
@@ -194,14 +203,19 @@ typedef struct md_superblock_s {
        md_u32 working_disks;   /*  3 Number of working disks                 */
        md_u32 failed_disks;    /*  4 Number of failed disks                  */
        md_u32 spare_disks;     /*  5 Number of spare disks                   */
-       md_u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 6];
+       md_u32 sb_csum;         /*  6 checksum of the whole superblock        */
+       md_u32 eventslo;        /*  7 number of superblock updates (low bits) */
+       md_u32 eventshi;        /*  8 number of superblock updates (high bits)*/
+       md_u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];
 
        /*
         * Personality information
         */
        md_u32 layout;          /*  0 the array's physical layout             */
        md_u32 chunk_size;      /*  1 chunk size in bytes                     */
-       md_u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 2];
+       md_u32 root_pv;         /*  2 LV root PV                              */
+       md_u32 root_block;      /*  3 LV root block                           */
+       md_u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4];
 
        /*
         * Disks information
@@ -220,6 +234,49 @@ typedef struct md_superblock_s {
 
 } md_superblock_t;
 
+typedef struct md_old_superblock_s {
+       /*
+        * Constant generic information
+        */
+       md_u32 gstate_c[MD_SB_GENERIC_CONSTANT_WORDS];
+
+       /*
+        * Generic state information
+        */
+       md_u32 utime;           /*  0 Superblock update time                  */
+       md_u32 state;           /*  1 State bits (clean, ...)                 */
+       md_u32 active_disks;    /*  2 Number of currently active disks        */
+       md_u32 working_disks;   /*  3 Number of working disks                 */
+       md_u32 failed_disks;    /*  4 Number of failed disks                  */
+       md_u32 spare_disks;     /*  5 Number of spare disks                   */
+       md_u32 sb_csum;         /*  6 checksum of the whole superblock        */
+       md_u64 events;          /*  7(8) number of superblock updates (64bit) */
+       md_u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 9];
+
+       /*
+        * Personality information
+        */
+       md_u32 pstate_c[MD_SB_PERSONALITY_WORDS];
+
+       /*
+        * Disks information
+        */
+       md_descriptor_t disks[MD_SB_DISKS];
+
+       /*
+        * Reserved
+        */
+       md_u32 reserved[MD_SB_RESERVED_WORDS];
+
+       /*
+        * Active descriptor
+        */
+       md_descriptor_t this_disk;
+
+} md_old_superblock_t;
+
+
+
 /*
  * options passed in raidstart:
  */
@@ -264,7 +321,51 @@ typedef struct md_array_info_s {
        md_u32 layout;          /*  0 the array's physical layout             */
        md_u32 chunk_size;      /*  1 chunk size in bytes                     */
 
+       md_u32 reserved_bytes;  /*  Number of reserved bytes at the beginning */
+
+       /*
+        * The meaning of these fields can be specified later on
+        * and will be dependent on major/minor version specified
+        * in this structure.
+        * This is so that the ioctl number does not have to change with
+        * every field addition.
+        */
+       md_u32 reserved[32 - 19];
+  
 } md_array_info_t;
+
+typedef struct md_old_array_info_s {
+       /*
+        * Generic constant information
+        */
+       md_u32 major_version;
+       md_u32 minor_version;
+       md_u32 patch_version;
+       md_u32 ctime;
+       md_u32 level;
+       md_u32 size;
+       md_u32 nr_disks;
+       md_u32 raid_disks;
+       md_u32 md_minor;
+       md_u32 not_persistent;
+
+       /*
+        * Generic state information
+        */
+       md_u32 utime;           /*  0 Superblock update time                  */
+       md_u32 state;           /*  1 State bits (clean, ...)                 */
+       md_u32 active_disks;    /*  2 Number of currently active disks        */
+       md_u32 working_disks;   /*  3 Number of working disks                 */
+       md_u32 failed_disks;    /*  4 Number of failed disks                  */
+       md_u32 spare_disks;     /*  5 Number of spare disks                   */
+
+       /*
+        * Personality information
+        */
+       md_u32 layout;          /*  0 the array's physical layout             */
+       md_u32 chunk_size;      /*  1 chunk size in bytes                     */
+
+} md_old_array_info_t;
 
 typedef struct md_disk_info_s {
        /*
--- raidtools-0.90/common.h.jj  Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/common.h     Thu Feb 24 12:16:29 2000
@@ -39,7 +39,7 @@ typedef int kdev_t;
 #define RAID_CONFIG                    "/etc/raidtab"
 
 #define MKRAID_MAJOR_VERSION            (0)
-#define MKRAID_MINOR_VERSION            (90)
+#define MKRAID_MINOR_VERSION            (91)
 #define MKRAID_PATCHLEVEL_VERSION       (0)
 
 extern int do_quiet_flag;
--- raidtools-0.90/raid_io.c.jj Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/raid_io.c    Fri Mar 10 17:56:31 2000
@@ -16,6 +16,7 @@
 #include <linux/fs.h>          /* for BLKGETSIZE */
 #endif
 #include <sys/sysmacros.h>
+#include <endian.h>
 
 #ifndef BLOCK_SIZE
 #define BLOCK_SIZE      1024
@@ -34,6 +35,35 @@
 md_cfg_entry_t *p;
 md_superblock_t *sb;
 
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define raid_le16(x) (md_u16)(x)
+#define raid_le32(x) (md_u32)(x)
+#define raid_be16(x) \
+       (md_u16)( \
+                 (((md_u16)(x) & (md_u16)0xff00) >> 8) | \
+                 (((md_u16)(x) & (md_u16)0x00ff) << 8))
+#define raid_be32(x) \
+       ((md_u32)( \
+                 (((md_u32)(x) & (md_u32)0x000000ffUL) << 24) | \
+                 (((md_u32)(x) & (md_u32)0x0000ff00UL) <<  8) | \
+                 (((md_u32)(x) & (md_u32)0x00ff0000UL) >>  8) | \
+                 (((md_u32)(x) & (md_u32)0xff000000UL) >> 24) ))
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define raid_le16(x) \
+       (md_u16)( \
+                 (((md_u16)(x) & (md_u16)0xff00) >> 8) | \
+                 (((md_u16)(x) & (md_u16)0x00ff) << 8))
+#define raid_le32(x) \
+       ((md_u32)( \
+                 (((md_u32)(x) & (md_u32)0x000000ffUL) << 24) | \
+                 (((md_u32)(x) & (md_u32)0x0000ff00UL) <<  8) | \
+                 (((md_u32)(x) & (md_u32)0x00ff0000UL) >>  8) | \
+                 (((md_u32)(x) & (md_u32)0xff000000UL) >> 24) ))
+#define raid_be16(x) (md_u16)(x)
+#define raid_be32(x) (md_u32)(x)
+#else
+#error Unknown byte order
+#endif
 
 #define TIME long long
 
@@ -81,7 +111,7 @@ void progress (unsigned long blocks, uns
 }
 #undef F
 
-#if !(defined(__alpha__) || defined(__sparc_v9__))
+#if !(defined(__alpha__) || (defined(__sparc__) && defined(__arch64__)))
 # ifndef __NR__llseek
 #  ifdef __sparc__
 #   define __NR__llseek                236
@@ -105,7 +135,7 @@ long long raidseek (unsigned int fd, uns
        long long result;
        int retval;
 
-#if defined(__alpha__) || defined(__sparc_v9__)
+#if defined(__alpha__) || (defined(__sparc__) && defined(__arch64__))
        return lseek(fd, offset, SEEK_SET);
 #else
        retval = _llseek (fd, ((unsigned long long) offset) >> 32,
@@ -115,11 +145,39 @@ long long raidseek (unsigned int fd, uns
 #endif
 }
 
-int upgrade_sb (int fd, md_superblock_t *sb, md_cfg_entry_t * cfg, int verbose)
+/* Check if this RAID device might contain some embedded partition table.
+ * In that case we try to reserve bytes at the beginning of the RAID array,
+ * so that it does not get smashed.
+ */
+int check_partition_table (int fd, char **name)
+{
+       md_u16 sun_disk_label[256];
+
+       if (raidseek(fd, 0) == -1)
+               return -1;
+       if (read(fd, sun_disk_label, sizeof(sun_disk_label))
+                                       != sizeof(sun_disk_label))
+               return -1;
+       if (raid_be16(sun_disk_label[254]) == 0xDABE) {
+               md_u16 csum, *p;
+
+               for (csum = 0, p = sun_disk_label;
+                    p < sun_disk_label + 256; p++)
+                       csum ^= *p;
+               if (!csum) {
+                       *name = "Sun disk label";
+                       return 1024;
+               }
+       }
+       return 0;
+}
+
+static int upgrade_sb (int fd, md_superblock_t *sb, md_cfg_entry_t * cfg, int verbose)
 {
        struct stat stat_buf;
        md_descriptor_t *disk;
        int i;
+       int mkraid_minor_version = MKRAID_MINOR_VERSION;
 
        if (
                 (sb->major_version == MKRAID_MAJOR_VERSION) &&
@@ -130,6 +188,22 @@ int upgrade_sb (int fd, md_superblock_t 
        }
 
        if (
+                (MKRAID_MAJOR_VERSION == 0) &&
+                (MKRAID_MINOR_VERSION == 91) &&
+                (sb->major_version == MKRAID_MAJOR_VERSION) &&
+                (sizeof(md_superblock_t) == sizeof(md_old_superblock_t))) {
+               if (sb->minor_version == 90) {
+                       /* No need to upgrade from 0.90 to 0.91 on ia32 and
+                        * other archs where nothing moves between those
+                        * two versions. */
+                       fprintf(stderr, "array needs no upgrade\n");
+                       return 1;
+               }
+               if (sb->minor_version < 90)
+                       mkraid_minor_version = 90;
+       }
+
+       if (
                 (sb->major_version > MKRAID_MAJOR_VERSION) ||
                 ((sb->major_version == MKRAID_MAJOR_VERSION) &&
                 (sb->minor_version > MKRAID_MINOR_VERSION)) ||
@@ -143,18 +217,18 @@ int upgrade_sb (int fd, md_superblock_t 
        if (verbose) {
                printf("MD ID:                   %x\n", sb->md_magic);
                printf("Changing MD version from %d.%d.%d to %d.%d.%d.\n",
-                        sb->major_version, sb->minor_version,sb->patch_version,
-                       MKRAID_MAJOR_VERSION, MKRAID_MINOR_VERSION,
+                        sb->major_version, sb->minor_version, sb->patch_version,
+                       MKRAID_MAJOR_VERSION, mkraid_minor_version,
                        MKRAID_PATCHLEVEL_VERSION);
        }
 
        sb->major_version = MKRAID_MAJOR_VERSION;
-       sb->minor_version = MKRAID_MINOR_VERSION;
+       sb->minor_version = mkraid_minor_version;
        sb->patch_version = MKRAID_PATCHLEVEL_VERSION;
 
        if (verbose)
                if ((sb->major_version > 0) || (sb->minor_version >= 50))
-               printf("preferred minor %d (md%d)\n", sb->md_minor, sb->md_minor);
+                       printf("preferred minor %d (md%d)\n", sb->md_minor, 
+sb->md_minor);
         if (stat(cfg->md_name,&stat_buf)) {
                 fprintf(stderr, "%s: file doesn't exist!\n", cfg->md_name);
                 return 1;
@@ -224,7 +298,7 @@ void print_sb (md_superblock_t *sb)
                                                 sb->md_minor);
 
        printf("gvalid_words:            %d\n", sb->gvalid_words);
-       printf("Raid set ID:             %x\n", sb->set_magic);
+       printf("Raid set ID:             %x %x %x %x\n", sb->set_uuid0, sb->set_uuid1, 
+sb->set_uuid2, sb->set_uuid3);
        t = (time_t) sb->ctime;
        printf("Creation time:           %s", ctime(&t));
        t = (time_t) sb->utime;
@@ -245,6 +319,7 @@ void print_sb (md_superblock_t *sb)
        printf("Number of working disks: %d\n", sb->working_disks);
        printf("Number of failed disks:  %d\n", sb->failed_disks);
        printf("Number of spare disks:   %d\n", sb->spare_disks);
+       printf("Reserved bytes:          %d\n", sb->reserved_bytes);
        printf("\n");
 
        for (i = 0; i < sb->nr_disks; i++) {
@@ -262,13 +337,16 @@ void print_sb (md_superblock_t *sb)
        }
 }
 
-static int sanity_checks (char *name, int fd, int sb_offset,
-        int forceSanity, int upgradeArray, md_cfg_entry_t * cfg, int dowrite)
+static int sanity_checks (struct md_version * ver, char *name, int fd,
+                         int sb_offset, int forceSanity, int upgradeArray,
+                         md_cfg_entry_t * cfg, int dowrite)
 {
        FILE *fp;
        unsigned char tmp[MAX_LINE_LENGTH];
        unsigned char buffer[MD_SB_BYTES];
        md_superblock_t *phys_sb;
+       char *part_name;
+       int reserve;
 
        /*
         * Check if the device is mounted
@@ -288,14 +366,41 @@ static int sanity_checks (char *name, in
        fclose(fp);
 
        if (!upgradeArray) {
-               if (forceSanity)
-                       return 0;
                if (cfg->array.param.not_persistent)
                        /*
                         * We have no business analyzing the contents
                         * of a superblock-less array.
                         */
                        return 0;
+
+               reserve = check_partition_table(fd, &part_name);
+               if (reserve < 0) {
+                       fprintf(stderr, "%s: couldn't read from the start of the 
+disk\n", name);
+                       return 1;
+               }
+
+               if (cfg->array.param.reserved_bytes & 1) {
+                       /* reserved-bytes was not mentioned in the config file */
+                       if (reserve && !ver->major && ver->minor <= 90) {
+                               if (forceSanity)
+                                       return 0;
+                               fprintf(stderr, "%s appears to contain an embedded %s 
+partition table.\n"
+                                               "Use -f to override.\n", name, 
+part_name);
+                               return 1;
+                       } else if (reserve > (cfg->array.param.reserved_bytes & ~1)) {
+                               printf("%s appears to contain an embedded %s partition 
+table.\n"
+                                      "Assuming %d reserved-bytes.\n", name, 
+part_name, reserve);
+                               cfg->array.param.reserved_bytes = reserve | 1;
+                       }
+               } else if (reserve > cfg->array.param.reserved_bytes && !forceSanity) {
+                       fprintf(stderr, "%s appears to contain an embedded %s 
+partition table which needs\n"
+                                       "%d reserved bytes, while only %d 
+reserved-bytes was requested.\n",
+                               name, part_name, reserve, 
+cfg->array.param.reserved_bytes);
+                       return 1;
+               }
+               
+               if (forceSanity)
+                       return 0;
                /*
                 * Check if the device contains an ext2 filesystem
                 */
@@ -312,7 +417,10 @@ static int sanity_checks (char *name, in
                if ((read(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES)
                        return 1;
                phys_sb = (md_superblock_t *) buffer;
-               if (phys_sb->md_magic == MD_SB_MAGIC) {
+               if (phys_sb->md_magic == MD_SB_MAGIC ||
+                   (raid_le32(phys_sb->md_magic) == MD_SB_MAGIC &&
+                    (raid_le32(phys_sb->major_version) > 0 ||
+                     raid_le32(phys_sb->minor_version) > 90))) {
                        fprintf(stderr, "%s appears to be already part of a raid array 
-- use -f to\nforce the destruction of the old superblock\n", name);
                        return 1;
                }
@@ -328,7 +436,29 @@ static int sanity_checks (char *name, in
        if ((read(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES)
                return 1;
        phys_sb = (md_superblock_t *) buffer;
+       if (phys_sb->md_magic != MD_SB_MAGIC &&
+           raid_le32(phys_sb->md_magic) == MD_SB_MAGIC &&
+           (raid_le32(phys_sb->major_version) > 0 ||
+            raid_le32(phys_sb->minor_version) > 90)) {
+               md_u32 *p;
+
+               for (p = (md_u32 *)phys_sb; p < (md_u32 *)(phys_sb + 1); p++)
+                       *p = raid_le32(*p);
+       }
        if (phys_sb->md_magic == MD_SB_MAGIC) {
+               if (phys_sb->major_version == 0 && phys_sb->minor_version == 90 &&
+                   sizeof(md_superblock_t) != sizeof(md_old_superblock_t)) {
+                       /* Duh, backwards compatibility. */
+                       md_u64 events;
+
+                       memmove(&events, &((md_old_superblock_t *)phys_sb)->events, 
+sizeof(md_u64));
+                       phys_sb->eventslo = events;
+                       phys_sb->eventshi = events >> 32;
+                       memmove(phys_sb->gstate_sreserved,
+                               ((md_old_superblock_t *)phys_sb)->gstate_sreserved,
+                               (long)phys_sb + MD_SB_BYTES - 
+(long)(((md_old_superblock_t *)phys_sb)->gstate_sreserved));
+               }
+
                if (dowrite) {
                        fprintf(stderr, "upgrading superblock on %s ...\n",
                                         name);
@@ -338,15 +468,24 @@ static int sanity_checks (char *name, in
                if (upgrade_sb(fd, phys_sb, cfg, dowrite))
                        return 1;
                if (dowrite) {
+                       int minor_version = phys_sb->minor_version;
                        fprintf(stderr, "new superblock:\n");
                        print_sb(phys_sb);
                        if (raidseek(fd, sb_offset) == -1)
                                return 1;
+                       if ((phys_sb->major_version ||
+                            phys_sb->minor_version > 90) &&
+                           MD_SB_MAGIC != raid_le32(MD_SB_MAGIC)) {
+                               md_u32 *p;
+
+                               for (p = (md_u32 *)phys_sb; p < (md_u32 *)(phys_sb + 
+1); p++)
+                                       *p = raid_le32(*p);
+                       }
                        if ((write(fd, buffer, MD_SB_BYTES)) != MD_SB_BYTES) {
                                fprintf(stderr, "could not write new superblock!\n");
                                return 1;
                        }
-                       printf("sb->minor after write: %d\n", phys_sb->minor_version);
+                       printf("sb->minor after write: %d\n", minor_version);
                        fsync(fd);
                }
                return 0;
@@ -459,11 +598,10 @@ int analyze_sb (struct md_version * ver,
                                close(fd);
                                return 1;
                        }
-
                        cfg->sb_block_offset[i] = MD_NEW_SIZE_BLOCKS(nr_blocks);
                        if (!cfg->array.param.not_persistent) {
                                printf("disk %d: %s, %ukB, raid superblock at %dkB\n", 
i, cfg->device_name[i], nr_blocks, cfg->sb_block_offset[i]);
-                               if (sanity_checks(cfg->device_name[i], fd,
+                               if (sanity_checks(ver, cfg->device_name[i], fd,
                                                cfg->sb_block_offset[i], forceSanity,
                                                upgradeArray, cfg, 0)) {
                                        close(fd);
@@ -475,6 +613,15 @@ int analyze_sb (struct md_version * ver,
                        printf("disk %d: %s, failed\n", i, cfg->device_name[i]);
                }
        }
+
+       if (array->param.reserved_bytes & 1)
+               array->param.reserved_bytes &= ~1;
+       if (array->param.reserved_bytes && !ver->major && ver->minor <= 90) {
+               fprintf(stderr, "Non-zero reserved-bytes (%d) is only supported by 
+kernel RAID driver %d.%d.%d\n",
+                       array->param.reserved_bytes, ver->major, ver->minor, 
+ver->patchlevel);
+               return 1;
+       }
+
        /*      
         * second pass, write stuff out ...
         */
@@ -502,7 +649,7 @@ int analyze_sb (struct md_version * ver,
                        }
 
                        cfg->sb_block_offset[i] = MD_NEW_SIZE_BLOCKS(nr_blocks);
-                       if (sanity_checks(cfg->device_name[i], fd,
+                       if (sanity_checks(ver, cfg->device_name[i], fd,
                                        cfg->sb_block_offset[i], forceSanity,
                                        upgradeArray, cfg, 1)) {
                                close(fd);
--- raidtools-0.90/mkraid.c.jj  Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/mkraid.c     Fri Mar 10 14:02:21 2000
@@ -19,7 +19,7 @@
 #include "popt.h"
 #include "version.h"
 
-void printcfg (md_cfg_entry_t * cfg);
+static void printcfg (md_cfg_entry_t * cfg);
 
 void usage (void) {
     printf("usage: mkraid [--configfile] [--version] [--force] [--upgrade]\n");
@@ -43,7 +43,10 @@ int i, ret, file;
 #endif
 
     file = open(cfg->md_name,O_RDONLY);
-    ret = ioctl(file, SET_ARRAY_INFO, (unsigned long)&cfg->array.param);
+    if (ver->major || ver->minor > 90)
+       ret = ioctl(file, SET_ARRAY_INFO, (unsigned long)&cfg->array.param);
+    else
+       ret = ioctl(file, OLD_SET_ARRAY_INFO, (unsigned long)&cfg->array.param);
     if (ret)
         return 1;
 
@@ -275,7 +278,7 @@ abort:
 #define P(x) printf("%18s: \t %d\n",#x,cfg->array.param.x)
 #define DP(x) printf("%18s: \t %d\n",#x,cfg->array.disks[i].x)
 
-void printcfg (md_cfg_entry_t * cfg)
+static void printcfg (md_cfg_entry_t * cfg)
 {
        int i;
 
@@ -298,6 +301,7 @@ void printcfg (md_cfg_entry_t * cfg)
 
         P(layout);
         P(chunk_size);
+        P(reserved_bytes);
 
        for (i = 0; i < cfg->array.param.nr_disks; i++) {
                printf("\n");
--- raidtools-0.90/parser.c.jj  Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/parser.c     Fri Mar 10 11:36:33 2000
@@ -68,6 +68,7 @@ static int process_entry (char *par, cha
                }
                strcpy(cfg->md_name, val_s);
                cfg->array.param.nr_disks = 0;
+               cfg->array.param.reserved_bytes = 1; /* Autodetect */
 
                last = cfg_head;
                while (last && last->next) last = last->next;
@@ -143,6 +144,13 @@ static int process_entry (char *par, cha
                        return 1;
                }
                array->param.chunk_size = val * MD_BLK_SIZ;
+               return 0;
+       } else if (strcmp(par, "reserved-bytes") == 0) {
+               if (val & 511) {
+                       fprintf(stderr, "reserved-bytes %d must be a power of 512\n", 
+val);
+                       return 1;
+               }
+               array->param.reserved_bytes = val;
                return 0;
        } else if (strcmp(par, "device") == 0) {
                if (array->param.nr_disks == MD_SB_DISKS) {
--- raidtools-0.90/raid5.conf.sample.jj Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/raid5.conf.sample    Fri Mar 10 10:06:30 2000
@@ -3,6 +3,7 @@ raiddev                 /dev/md0
 raid-level             5
 nr-raid-disks          3
 chunk-size             4
+#reserved-bytes                1024
 
 # Parity placement algorithm
 
--- raidtools-0.90/raid1.conf.sample.jj Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/raid1.conf.sample    Fri Mar 10 10:06:46 2000
@@ -4,6 +4,7 @@ raid-level              1
 nr-raid-disks          2
 nr-spare-disks         0
 chunk-size             4
+#reserved-bytes                1024
 
 device                 /dev/hda1
 raid-disk              0
--- raidtools-0.90/raidtab.5.jj Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/raidtab.5    Thu Mar 16 16:20:36 2000
@@ -106,11 +106,20 @@ performance on typical disks with rotati
 
 .TP
 \fBchunk-size \fIsize\fR
-Sets the stripe size to \fIsize\fR bytes. Has to be a power of 2 and
+Sets the stripe size to \fIsize\fR kilobytes. Has to be a power of 2 and
 has a compilation-time maximum of 4M. (MAX_CHUNK_SIZE in the kernel
 driver) typical values are anything from 4k to 128k, the best value
 should be determined by experimenting on a given array, alot depends
 on the SCSI and disk configuration.
+
+.TP
+\fBreserved-bytes \fIsize\fR
+Reserves at least the first \fIsize\fR bytes on each disks, so that
+it can contain e.g. embedded partition tables or bootblocks.
+Without this, RAID array can happily resync them with something else.
+If reserved-bytes is not specified, then mkraid checks if it finds some
+known partition table or bootblock magic and sets the default accordingly.
+\fIsize\fR must be a multiple of 512.
 
 .TP
 \fBdevice \fIdevpath\fR
--- raidtools-0.90/raidtab.sample.jj    Tue Aug  3 10:05:53 1999
+++ raidtools-0.90/raidtab.sample       Thu Mar 16 16:16:16 2000
@@ -2,7 +2,8 @@
 # sample raiddev configuration file
 
 #
-# 'persistent' RAID5 setup, with no spare disks:
+# 'persistent' RAID5 setup, with no spare disks
+# and 4KB chunk size
 #
 raiddev /dev/md0
     raid-level                5
@@ -51,3 +52,23 @@ raiddev /dev/md2
     device                    /dev/sdc1
     spare-disk                0
 
+
+#
+# 'persistent' RAID5 setup, with no spare disks
+# with 1024 bytes at the start of each disk reserved
+# for bootblocks or other things
+#
+raiddev /dev/md3
+    raid-level                5
+    nr-raid-disks             3
+    nr-spare-disks            0
+    persistent-superblock     1
+    chunk-size                4
+    reserved-bytes           1024
+
+    device                    /dev/sdb1
+    raid-disk                 0
+    device                    /dev/sda1
+    raid-disk                 1
+    device                    /dev/sdc1
+    raid-disk                 2

Reply via email to