Re: grub RAID heuristics (how can we avoid "superfluous RAID member (2 found)")

2012-02-26 Thread Daniel Kahn Gillmor
On Sat, 25 Feb 2012 16:49:00 -0500, Daniel Kahn Gillmor 
 wrote:
> On Thu, 23 Feb 2012 06:43:23 +0100, Vladimir 'φ-coder/phcoder' Serbinenko 
>  wrote:
> > [dkg wrote:]
> > > Select the smallest known block device that can completely enclose the
> > > RAID member.  The larger block device(s) should not be considered to be
> > > exporting that RAID.

I just built the latest (r4008) bzr head, which i believe includes the
proposed changes, and installed it on a machine with two disks in RAID1:

0 root@trash:/home/dkg/src/grub/grub# cat /proc/partitions 
major minor  #blocks  name

   80   19531250 sda
   816266913 sda1
   82   13256178 sda2
   8   16   13260240 sdb
   8   17   13258192 sdb1
   90   13256064 md0
 25303145728 dm-0
0 root@trash:/home/dkg/src/grub/grub# cat /proc/mdstat 
Personalities : [raid1] 
md0 : active raid1 sdb1[0] sda2[1]
  13256064 blocks [2/2] [UU]
  
unused devices: 
0 root@trash:/home/dkg/src/grub/grub# pvs
  PV VGFmt  Attr PSize  PFree
  /dev/md0   vg_trash0 lvm2 a--  12.64g 9.64g
0 root@trash:/home/dkg/src/grub/grub# vgs
  VG#PV #LV #SN Attr   VSize  VFree
  vg_trash0   1   1   0 wz--n- 12.64g 9.64g
0 root@trash:/home/dkg/src/grub/grub# lvs
  LV   VGAttr   LSize Origin Snap%  Move Log Copy%  Convert
  root vg_trash0 -wi-ao 3.00g  
0 root@trash:/home/dkg/src/grub/grub# 


it boots correctly on this setup, yay!

booting to the grub command line, however, i do see some weirdness:

grub> ls
(lvm/vg_trash0-root) (hd0) (hd0,msdos2) (hd0,msdos1) (hd1) (hd1,msdos1)
(fd0) (lvm/vg_trash0-root)

grub> insmod mdraid09
grub> ls
(lvm/vg_trash0-root) (hd0) (hd0,msdos2) (hd0,msdos1) (hd1) (hd1,msdos1)
(fd0) (md/md0) (md/md0,msdos2) (md/md0,msdos1) (lvm/vg_trash0-root)

grub> rmmod mdraid09
grub> ls
(md/md0) (lvm/vg_trash0-root) (hd0) (hd0,msdos2) (hd0,msdos1) (hd1)
(hd1,msdos1) (fd0) (md/md0)

grub> insmod mdraid09
grub> ls
(md/md0) (lvm/vg_trash0-root) (hd0) (hd0,msdos2) (hd0,msdos1) (hd1)
(hd1,msdos1) (fd0) (md/md0)


 --dkg


pgp7TMzghOykF.pgp
Description: PGP signature
___
Grub-devel mailing list
Grub-devel@gnu.org
https://lists.gnu.org/mailman/listinfo/grub-devel


Re: grub RAID heuristics (how can we avoid "superfluous RAID member (2 found)")

2012-02-25 Thread Daniel Kahn Gillmor
On Thu, 23 Feb 2012 06:43:23 +0100, Vladimir 'φ-coder/phcoder' Serbinenko 
 wrote:
> [dkg wrote:]
> > Select the smallest known block device that can completely enclose the
> > RAID member.  The larger block device(s) should not be considered to be
> > exporting that RAID.

phcoder pointed me to the attached patch for improved heuristics for
this case, which applies cleanly to the current head of bzr.  Applied,
it removes the spurious error messages from grub-probe, as seen here
compared against debian's grub-probe 1.99-14:

0 root@trash:/home/dkg/src/grub/grub# grub-probe --target=device /
error: found two disks with the index 1 for RAID md0.
error: superfluous RAID member (2 found).
/dev/mapper/vg_trash0-root
0 root@trash:/home/dkg/src/grub/grub# ./grub-probe --target=device /
/dev/mapper/vg_trash0-root
0 root@trash:/home/dkg/src/grub/grub# 

I think it's worth applying.

Thanks for your work on this, phcoder!

   --dkg

=== modified file 'grub-core/disk/diskfilter.c'
--- grub-core/disk/diskfilter.c	2012-02-12 14:25:25 +
+++ grub-core/disk/diskfilter.c	2012-02-24 21:06:10 +
@@ -974,33 +974,33 @@
 	struct grub_diskfilter_lv *lv;
 	/* FIXME: Check whether the update time of the superblocks are
 	   the same.  */
+	if (pv->disk && grub_disk_get_size (disk) >= pv->part_size)
+	  return GRUB_ERR_NONE;
+	pv->disk = grub_disk_open (disk->name);
+	if (!pv->disk)
+	  return grub_errno;
 	/* This could happen to LVM on RAID, pv->disk points to the
 	   raid device, we shouldn't change it.  */
-	if (! pv->disk)
-	  {
-	pv->disk = grub_disk_open (disk->name);
-	if (! pv->disk)
-	  return grub_errno;
-	pv->part_start = grub_partition_get_start (disk->partition);
-	pv->part_size = grub_disk_get_size (disk);
+	pv->start_sector -= pv->part_start;
+	pv->part_start = grub_partition_get_start (disk->partition);
+	pv->part_size = grub_disk_get_size (disk);
 
 #ifdef GRUB_UTIL
-	{
-	  grub_size_t s = 1;
-	  grub_partition_t p;
-	  for (p = disk->partition; p; p = p->parent)
-		s++;
-	  pv->partmaps = xmalloc (s * sizeof (pv->partmaps[0]));
-	  s = 0;
-	  for (p = disk->partition; p; p = p->parent)
-		pv->partmaps[s++] = xstrdup (p->partmap->name);
-	  pv->partmaps[s++] = 0;
-	}
+	{
+	  grub_size_t s = 1;
+	  grub_partition_t p;
+	  for (p = disk->partition; p; p = p->parent)
+	s++;
+	  pv->partmaps = xmalloc (s * sizeof (pv->partmaps[0]));
+	  s = 0;
+	  for (p = disk->partition; p; p = p->parent)
+	pv->partmaps[s++] = xstrdup (p->partmap->name);
+	  pv->partmaps[s++] = 0;
+	}
 #endif
-	if (start_sector != (grub_uint64_t)-1)
-	  pv->start_sector = start_sector;
-	pv->start_sector += pv->part_start;
-	  }
+	if (start_sector != (grub_uint64_t)-1)
+	  pv->start_sector = start_sector;
+	pv->start_sector += pv->part_start;
 	/* Add the device to the array. */
 	for (lv = array->lvs; lv; lv = lv->next)
 	  if (!lv->became_readable_at && lv->fullname && is_lv_readable (lv))

=== modified file 'grub-core/disk/mdraid_linux.c'
--- grub-core/disk/mdraid_linux.c	2012-01-29 13:28:01 +
+++ grub-core/disk/mdraid_linux.c	2012-02-24 21:11:29 +
@@ -201,6 +201,11 @@
   return NULL;
 }
 
+  /* No need for explicit check that sb.size is 0 (unspecified) since
+ 0 >= non-0 is false.  */
+  if (((grub_disk_addr_t) grub_le_to_cpu32 (sb.size)) * 2 >= size)
+return NULL;
+
   /* FIXME: Check the checksum.  */
 
   level = grub_le_to_cpu32 (sb.level);
@@ -241,7 +246,8 @@
   grub_snprintf (buf, sizeof (buf), "md%d", grub_le_to_cpu32 (sb.md_minor));
   return grub_diskfilter_make_raid (16, (char *) uuid,
 grub_le_to_cpu32 (sb.raid_disks), buf,
-(sb.size) ? grub_le_to_cpu32 (sb.size) * 2 
+(sb.size) ? ((grub_disk_addr_t)
+		 grub_le_to_cpu32 (sb.size)) * 2
 : sector,
 grub_le_to_cpu32 (sb.chunk_size) >> 9,
 grub_le_to_cpu32 (sb.layout),



pgp5yVvBiCXiu.pgp
Description: PGP signature
___
Grub-devel mailing list
Grub-devel@gnu.org
https://lists.gnu.org/mailman/listinfo/grub-devel


Re: grub RAID heuristics (how can we avoid "superfluous RAID member (2 found)")

2012-02-23 Thread Daniel Kahn Gillmor
On Thu, 23 Feb 2012 06:43:23 +0100, Vladimir 'φ-coder/phcoder' Serbinenko 
 wrote:
> Try attached patch

hrm, i get a segmentation fault from "grub-probe /" even without this
patch when building from bzr trunk (r3964, i think).

The test machine i've set up looks like this uses a common layering
scheme:
 disk → partition → mdadm (0.90 superblock) → lvm → filesystem

Here are the details and a backtrace of me trying:

0 root@trash:~# parted /dev/sda unit s print
Model: ATA WDC WD200BB-75DE (scsi)
Disk /dev/sda: 39062500s
Sector size (logical/physical): 512B/512B
Partition Table: msdos

Number  Start  EndSize   Type File system  Flags
 1  8192s  12542018s  12533827s  primary
 2  12550144s  39062499s  26512356s  primary

0 root@trash:~# parted /dev/sdb unit s print
Model: ATA QUANTUM FIREBALL (scsi)
Disk /dev/sdb: 26520480s
Sector size (logical/physical): 512B/512B
Partition Table: msdos

Number  Start  EndSize   Type File system  Flags
 1  4096s  26520479s  26516384s  primary

0 root@trash:~# cat /proc/mdstat 
Personalities : [raid1] 
md0 : active raid1 sdb1[0] sda2[1]
  13256064 blocks [2/2] [UU]
  
unused devices: 
0 root@trash:~# pvs
  PV VGFmt  Attr PSize  PFree
  /dev/md0   vg_trash0 lvm2 a--  12.64g 9.64g
0 root@trash:~# vgs
  VG#PV #LV #SN Attr   VSize  VFree
  vg_trash0   1   1   0 wz--n- 12.64g 9.64g
0 root@trash:~# lvs
  LV   VGAttr   LSize Origin Snap%  Move Log Copy%  Convert
  root vg_trash0 -wi-ao 3.00g  
0 root@trash:~# grep ' / ' /proc/mounts 
rootfs / rootfs rw 0 0
/dev/mapper/vg_trash0-root / ext3 
rw,relatime,errors=remount-ro,user_xattr,acl,barrier=1,data=ordered 0 0
0 root@trash:~# cd - 
/home/dkg/src/grub/grub
0 root@trash:/home/dkg/src/grub/grub# gdb ./grub-probe
GNU gdb (GDB) 7.4-debian
Copyright (C) 2012 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "i486-linux-gnu".
For bug reporting instructions, please see:
...
Reading symbols from /home/dkg/src/grub/grub/grub-probe...(no debugging symbols 
found)...done.
(gdb) run -v /
Starting program: /home/dkg/src/grub/grub/grub-probe -v /
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/i386-linux-gnu/libthread_db.so.1".
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to 
/dev/mapper.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to /dev.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to md.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to dri.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to snd.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to by-path.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to cpu.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to 
vg_trash0.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to /dev.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to md.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to dri.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to snd.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to by-path.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to cpu.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to 
vg_trash0.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to disk.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to by-uuid.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to by-path.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to by-id.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to block.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to /dev.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to md.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to dri.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to snd.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to by-path.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to cpu.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to 
vg_trash0.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to disk.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to by-uuid.
/home/dkg/src/grub/grub/grub-probe: info: changing current directory to by-path.
/home/dkg/src/grub/grub/grub-probe: 

Re: grub RAID heuristics (how can we avoid "superfluous RAID member (2 found)")

2012-02-22 Thread Vladimir 'φ-coder/phcoder' Serbinenko

On 02.02.2012 01:55, Daniel Kahn Gillmor wrote:

hi folks--

i was speaking with phcoder today on #grub, about getting messages like
this from grub when a partition that is part of a Linux SW RAID set
(with metadata 0.9x) is close to the end of its containing block device:

   superfluous RAID member (2 found)

Try attached patch

Here's phcoder's explanation of the problem:


16:08<  phcoder>  if you have<  64KiB between end of disk and end of partition
  the metadata is exactly in the same place for either if the
  whole disks are raided or only partitions. And no field which
  allows to distinguish those

  [...]

16:09<  phcoder>  thing is that mdraid format rounds it down to a 64K aligned
  boundary
16:10<  dkg0>  64KiB aligned to the parent disk, or to the partition itself?
16:10<  phcoder>  to whatever the host of the raid is. For your error to occur
  it has to be both
16:10<  dkg0>  hm, i suppose if the partition itself starts evenly aligned with
   a 64KiB boundary, it'd be the same thing.


It sounds like there might be some circumstances (e.g. a RAID0 set of
sda and sdb, creating md0, and a partition table on top of md0) where it
is legitimately difficult to decide the correct interpretation.

However, i think there might be a reasonable heuristic that can be used
in the case of RAID1 sets that would avoid the ambiguity (and the scary
messages/warnings to the user:

Select the smallest known block device that can completely enclose the
RAID member.  The larger block device(s) should not be considered to be
exporting that RAID.

these heuristics would mean that RAID1 sets with 0.9x metadata on any
sort of disklabel would only have their member components show up once
during a scan, rather than treating them as a duplicate.

phcoder mentioned that the RAID code was undergoing something of an
overhaul.  perhaps these heuristics could play into that update?

I'm not sure how to address it with RAID0 or RAID10, but if we could fix
the RAID1 case, that would be a big win for a lot of users.

Regards,

--dkg

___
Grub-devel mailing list
Grub-devel@gnu.org
https://lists.gnu.org/mailman/listinfo/grub-devel




--
Regards
Vladimir 'φ-coder/phcoder' Serbinenko

=== modified file 'grub-core/disk/diskfilter.c'
--- grub-core/disk/diskfilter.c	2012-02-12 14:25:25 +
+++ grub-core/disk/diskfilter.c	2012-02-23 05:42:14 +
@@ -972,35 +972,40 @@
 	: (pv->id.id == id->id))
   {
 	struct grub_diskfilter_lv *lv;
+	grub_disk_t disk;
 	/* FIXME: Check whether the update time of the superblocks are
 	   the same.  */
+	disk = grub_disk_open (disk->name);
+	if (!disk)
+	  return grub_errno;
+	if (disk && pv->disk && grub_disk_get_size (disk) >= pv->part_size)
+	  {
+	grub_disk_close (disk);
+	return GRUB_ERR_NONE;
+	  }
+	pv->disk = disk;
 	/* This could happen to LVM on RAID, pv->disk points to the
 	   raid device, we shouldn't change it.  */
-	if (! pv->disk)
-	  {
-	pv->disk = grub_disk_open (disk->name);
-	if (! pv->disk)
-	  return grub_errno;
-	pv->part_start = grub_partition_get_start (disk->partition);
-	pv->part_size = grub_disk_get_size (disk);
+	pv->start_sector -= pv->part_start;
+	pv->part_start = grub_partition_get_start (disk->partition);
+	pv->part_size = grub_disk_get_size (disk);
 
 #ifdef GRUB_UTIL
-	{
-	  grub_size_t s = 1;
-	  grub_partition_t p;
-	  for (p = disk->partition; p; p = p->parent)
-		s++;
-	  pv->partmaps = xmalloc (s * sizeof (pv->partmaps[0]));
-	  s = 0;
-	  for (p = disk->partition; p; p = p->parent)
-		pv->partmaps[s++] = xstrdup (p->partmap->name);
-	  pv->partmaps[s++] = 0;
-	}
+	{
+	  grub_size_t s = 1;
+	  grub_partition_t p;
+	  for (p = disk->partition; p; p = p->parent)
+	s++;
+	  pv->partmaps = xmalloc (s * sizeof (pv->partmaps[0]));
+	  s = 0;
+	  for (p = disk->partition; p; p = p->parent)
+	pv->partmaps[s++] = xstrdup (p->partmap->name);
+	  pv->partmaps[s++] = 0;
+	}
 #endif
-	if (start_sector != (grub_uint64_t)-1)
-	  pv->start_sector = start_sector;
-	pv->start_sector += pv->part_start;
-	  }
+	if (start_sector != (grub_uint64_t)-1)
+	  pv->start_sector = start_sector;
+	pv->start_sector += pv->part_start;
 	/* Add the device to the array. */
 	for (lv = array->lvs; lv; lv = lv->next)
 	  if (!lv->became_readable_at && lv->fullname && is_lv_readable (lv))

___
Grub-devel mailing list
Grub-devel@gnu.org
https://lists.gnu.org/mailman/listinfo/grub-devel