Package: multipath-tools
Version: 0.4.7-1.1etch1
Severity: important
*** Please type your report below this line ***
This also affects 0.4.8-13 as far as we can see.
It would be Really Good to fix this for lenny.
The symptom we observed is a filesystem that we expect to be 10Tb is
only about 1.6Tb, when using 'df' and when doing a test that fills
up the filesystem.
This appears to be the fault of kpartx.
It appears to be caused by using 'long' integers, not 'long long'.
It may not affect 64-bit machines.
How we got to this conclusion:
We attach a storage device with multipath.
The device has a single 10Tb partition.
The kernel is fine with this, as is 'multipath'
scsi4 : ioc1: LSIFC929X, FwRev=01010b00h, Ports=1, MaxQ=1023, IRQ=217
Vendor: Promise Model: VTrak E610f Rev: 1005
Type: Direct-Access ANSI SCSI revision: 05
Vendor: Promise Model: VTrak E610f Rev: 1005
Type: Direct-Access ANSI SCSI revision: 05
Vendor: Promise Model: VTrak E610f Rev: 1005
Type: Direct-Access ANSI SCSI revision: 05
Vendor: Promise Model: VTrak E610f Rev: 1005
Type: Direct-Access ANSI SCSI revision: 05
...
sdd : very big device. try to use READ CAPACITY(16).
SCSI device sdd: 20507805696 512-byte hdwr sectors (10499997 MB)
sdd: Write Protect is off
sdd: Mode Sense: 97 00 10 08
SCSI device sdd: drive cache: write back w/ FUA
sdd : very big device. try to use READ CAPACITY(16).
SCSI device sdd: 20507805696 512-byte hdwr sectors (10499997 MB)
sdd: Write Protect is off
sdd: Mode Sense: 97 00 10 08
SCSI device sdd: drive cache: write back w/ FUA
sdd: sdd1
sd 2:0:2:0: Attached scsi disk sdd
sde : very big device. try to use READ CAPACITY(16).
SCSI device sde: 20507805696 512-byte hdwr sectors (10499997 MB)
sde: Write Protect is off
sde: Mode Sense: 97 00 10 08
SCSI device sde: drive cache: write back w/ FUA
sde : very big device. try to use READ CAPACITY(16).
SCSI device sde: 20507805696 512-byte hdwr sectors (10499997 MB)
sde: Write Protect is off
sde: Mode Sense: 97 00 10 08
SCSI device sde: drive cache: write back w/ FUA
sde: sde1
sd 2:0:3:0: Attached scsi disk sde
sdf : very big device. try to use READ CAPACITY(16).
SCSI device sdf: 20507805696 512-byte hdwr sectors (10499997 MB)
sdf: Write Protect is off
sdf: Mode Sense: 97 00 10 08
SCSI device sdf: drive cache: write back w/ FUA
sdf : very big device. try to use READ CAPACITY(16).
SCSI device sdf: 20507805696 512-byte hdwr sectors (10499997 MB)
sdf: Write Protect is off
sdf: Mode Sense: 97 00 10 08
SCSI device sdf: drive cache: write back w/ FUA
sdf: sdf1
sd 2:0:4:0: Attached scsi disk sdf
sdg : very big device. try to use READ CAPACITY(16).
SCSI device sdg: 20507805696 512-byte hdwr sectors (10499997 MB)
sdg: Write Protect is off
sdg: Mode Sense: 97 00 10 08
SCSI device sdg: drive cache: write back w/ FUA
sdg : very big device. try to use READ CAPACITY(16).
SCSI device sdg: 20507805696 512-byte hdwr sectors (10499997 MB)
sdg: Write Protect is off
sdg: Mode Sense: 97 00 10 08
SCSI device sdg: drive cache: write back w/ FUA
sdg: sdg1
sd 2:0:5:0: Attached scsi disk sdg
# multipath -l
mpath0 (222a60001559596f8) dm-7 Promise,VTrak E610f
[size=9.5T][features=1 queue_if_no_path][hwhandler=0]
\_ round-robin 0 [prio=0][active]
\_ 2:0:2:0 sdd 8:48 [active][undef]
\_ 2:0:3:0 sde 8:64 [active][undef]
\_ 2:0:4:0 sdf 8:80 [active][undef]
\_ 2:0:5:0 sdg 8:96 [active][undef]
We put a GPT partition label on the device, using the entire device.
# parted /dev/sdd print
Disk /dev/sdd: 10.5TB
Sector size (logical/physical): 512B/512B
Partition Table: gpt
Number Start End Size File system Name Flags
1 17.4kB 10.5TB 10.5TB xfs primary
Information: Don't forget to update /etc/fstab, if necessary.
At about this point we ran kpartx.
It seems to create a partition but whines and segfaults.
# kpartx -a /dev/mapper/mpath0
GPT:Primary header thinks Alt. header is not at the end of the disk.
GPT:Alternate GPT header not at the end of the disk.
GPT: Use GNU Parted to correct GPT errors.
Segmentation fault
# ls -l /dev/mapper/mp*
brw-rw---- 1 root disk 254, 7 Jan 21 19:25 mpath0
brw-rw---- 1 root disk 254, 8 Jan 21 19:25 mpath0p1
We made an XFS filesystem on the first partition.
# mkfs.xfs -f -d agcount=16 /dev/mapper/mpath0p1
meta-data=/dev/mapper/mpath0p1 isize=256 agcount=16, agsize=25999504 blks
= sectsz=512 attr=0
data = bsize=4096 blocks=415992055, imaxpct=25
= sunit=0 swidth=0 blks, unwritten=1
naming =version 2 bsize=4096
log =internal log bsize=4096 blocks=32768, version=1
= sectsz=512 sunit=0 blks
realtime =none extsz=65536 blocks=0, rtextents=0
(Note the size here - 415992055*4096/1024/1024/1024 = 1586.884 Tb.
or 1.6639683E+09 kbytes. We didn't spot this at first)
However 'df' reports a much smaller filesystem than expected.
# df |grep mpath
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/mapper/mpath0p1 1663837148 544 1663836604 1% /mnt
As a test to see if 'df' is misreporting the size, we tried to fill up
the filesystem by copying known amounts of data to it. It fails at the
1.6 Tbyte mark. We then also noticed the size given by mkfs.xfs...
We found this report in Gentoo linux regarding a 5Tbyte filesystem:
http://bugs.gentoo.org/show_bug.cgi?id=245615
We applied their patch to the pristine upstream sources downloaded
from http://christophe.varoqui.free.fr and built 'kpartx'.
It built cleanly after we had installed all the build-depends of
the multipath-tools package.
We then cleared the multipath setup and redid it by hand,
running the new version of kpartx. Everything else used the programs
in multipath-tools_0.4.7-1. This created a partition of the correct size.
# /etc/init.d/multipath-tools stop
# multipath -F
# ls -l /dev/mapper/mp*
ls: /dev/mapper/mp*: No such file or directory
# multipath -v2 -l
# /etc/init.d/multipath-tools start
# multipath -list
mpath0 (222a60001559596f8) dm-7 Promise,VTrak E610f
[size=9.5T][features=1 queue_if_no_path][hwhandler=0]
\_ round-robin 0 [prio=0][active]
\_ 2:0:2:0 sdd 8:48 [active][undef]
\_ 2:0:3:0 sde 8:64 [active][undef]
\_ 2:0:4:0 sdf 8:80 [active][undef]
\_ 2:0:5:0 sdg 8:96 [active][undef]
# ls -l /dev/mapper/mp*
brw-rw---- 1 root disk 254, 7 2009-01-22 10:36 /dev/mapper/mpath0
# strace -o /tmp/kpartx.strace -f -s 2048 ./kpartx -a /dev/mapper/mpath0
# ls -l /dev/mapper/mp*
brw-rw---- 1 root disk 254, 7 2009-01-22 10:36 /dev/mapper/mpath0
brw-rw---- 1 root disk 254, 8 2009-01-22 10:40 /dev/mapper/mpath0p1
# mkfs.xfs -f -d agcount=16 /dev/mapper/mpath0p1
meta-data=/dev/mapper/mpath0p1 isize=256 agcount=16, agsize=160217232 blks
= sectsz=512 attr=0
data = bsize=4096 blocks=2563475703, imaxpct=25
= sunit=0 swidth=0 blks, unwritten=1
naming =version 2 bsize=4096
log =internal log bsize=4096 blocks=32768, version=1
= sectsz=512 sunit=0 blks
realtime =none extsz=65536 blocks=0, rtextents=0
# mount /dev/mapper/mpath0p1 /mnt
# df -k|grep mpath
/dev/mapper/mpath0p1 10253771740 544 10253771196 1% /mnt
We are now running a data copying test again.
A patch against debian's 0.4.8-13 is attached. We haven't tested it yet,
we would like some advice on what is likely to explode if we just build
the patched 0.4.8 on an 'etch' system and install it.
We could install a lenny machine and test on that.
--System Information:
Debian Release: 4.0
APT prefers stable
APT policy: (500, 'stable')
Architecture: i386 (i686)
Shell: /bin/sh linked to /bin/bash
Kernel: Linux 2.6.18-6-686
Locale: LANG=en_AU.UTF-8, LC_CTYPE=en_AU.iso88591 (charmap=ISO-8859-1)
Versions of packages multipath-tools depends on:
ii dmsetup 2:1.02.08-1 The Linux Kernel Device Mapper use
ii initscripts 2.86.ds1-38+etchnhalf.1 Scripts for initializing and shutt
ii libc6 2.3.6.ds1-13etch8 GNU C Library: Shared libraries
ii libdevmapper1.02 2:1.02.08-1 The Linux Kernel Device Mapper use
ii libncurses5 5.5-5 Shared libraries for terminal hand
ii libreadline5 5.2-2 GNU readline and history libraries
ii libsysfs2 2.1.0-1 interface library to sysfs
ii udev 0.105-4 /dev/ and hotplug management daemo
multipath-tools recommends no packages.
-- no debconf information
--- kpartx.debian/devmapper.c 2007-08-03 07:05:37.000000000 +1000
+++ kpartx/devmapper.c 2009-01-21 23:01:23.523414000 +1100
@@ -72,7 +72,7 @@ dm_simplecmd (int task, const char *name
extern int
dm_addmap (int task, const char *name, const char *target,
- const char *params, unsigned long size, const char *uuid, int part) {
+ const char *params, unsigned long long size, const char *uuid, int part) {
int r = 0;
struct dm_task *dmt;
char *prefixed_uuid;
--- kpartx.debian/devmapper.h 2007-08-03 07:05:37.000000000 +1000
+++ kpartx/devmapper.h 2009-01-21 23:01:56.962881000 +1100
@@ -1,6 +1,6 @@
int dm_prereq (char *, int, int, int);
int dm_simplecmd (int, const char *);
-int dm_addmap (int, const char *, const char *, const char *, unsigned long,
+int dm_addmap (int, const char *, const char *, const char *, unsigned long long,
char *, int);
int dm_map_present (char *);
char * dm_mapname(int major, int minor);
--- kpartx.debian/gpt.c 2007-08-03 07:05:37.000000000 +1000
+++ kpartx/gpt.c 2009-01-21 23:02:16.354778000 +1100
@@ -53,7 +53,9 @@
#define BLKGETLASTSECT _IO(0x12,108) /* get last sector of block device */
#define BLKGETSIZE _IO(0x12,96) /* return device size */
#define BLKSSZGET _IO(0x12,104) /* get block device sector size */
-#define BLKGETSIZE64 _IOR(0x12,114,sizeof(uint64_t)) /* return device size in bytes (u64 *arg) */
+
+// #define BLKGETSIZE64 _IOR(0x12,114,sizeof(uint64_t)) /* return device size in bytes (u64 *arg) */
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)
struct blkdev_ioctl_param {
unsigned int block;
@@ -145,13 +147,13 @@ _get_num_sectors(int filedes)
{
unsigned long sectors=0;
int rc;
-#if 0
+
uint64_t bytes=0;
rc = ioctl(filedes, BLKGETSIZE64, &bytes);
if (!rc)
return bytes / get_sector_size(filedes);
-#endif
+
rc = ioctl(filedes, BLKGETSIZE, §ors);
if (rc)
return 0;
--- kpartx.debian/kpartx.c 2009-01-22 10:05:35.000000000 +1100
+++ kpartx/kpartx.c 2009-01-21 23:02:31.920161000 +1100
@@ -82,7 +82,7 @@ initpts(void)
addpts("sun", read_sun_pt);
}
-static char short_opts[] = "ladgvp:t:";
+static char short_opts[] = "ladgvnp:t:";
/* Used in gpt.c */
int force_gpt=0;
@@ -94,7 +94,6 @@ usage(void) {
printf("\t-d del partition devmappings\n");
printf("\t-l list partitions devmappings that would be added by -a\n");
printf("\t-p set device name-partition number delimiter\n");
- printf("\t-g force GUID partition table (GPT)\n");
printf("\t-v verbose\n");
return 1;
}
@@ -188,7 +187,8 @@ main(int argc, char **argv){
struct slice all;
struct pt *ptp;
enum action what = LIST;
- char *type, *diskdevice, *device, *progname;
+ char *p, *type, *diskdevice, *device, *progname;
+ int lower, upper;
int verbose = 0;
char partname[PARTNAME_SIZE], params[PARTNAME_SIZE + 16];
char * loopdev = NULL;
@@ -202,6 +202,7 @@ main(int argc, char **argv){
initpts();
init_crc32();
+ lower = upper = 0;
type = device = diskdevice = NULL;
memset(&all, 0, sizeof(all));
memset(&partname, 0, sizeof(partname));
@@ -238,6 +239,14 @@ main(int argc, char **argv){
case 'v':
verbose = 1;
break;
+ case 'n':
+ p = optarg;
+ lower = atoi(p);
+ if ((p[1] == '-') && p[2])
+ upper = atoi(p+2);
+ else
+ upper = lower;
+ break;
case 'p':
delim = optarg;
break;
@@ -256,7 +265,7 @@ main(int argc, char **argv){
}
if (dm_prereq(DM_TARGET, 0, 0, 0) && (what == ADD || what == DELETE)) {
- fprintf(stderr, "device mapper prerequisites not met\n");
+ fprintf(stderr, "device mapper prerequisites not met\n");
exit(1);
}
@@ -316,12 +325,14 @@ main(int argc, char **argv){
if (!mapname)
mapname = device + off;
- fd = open(device, O_RDONLY);
+ fd = open(device, O_RDONLY|O_LARGEFILE);
if (fd == -1) {
perror(device);
exit(1);
}
+ if (!lower)
+ lower = 1;
/* add/remove partitions to the kernel devmapper tables */
for (i = 0; i < ptct; i++) {
@@ -355,10 +366,10 @@ main(int argc, char **argv){
slices[j].minor = m++;
- printf("%s%s%d : 0 %lu %s %lu\n",
+ printf("%s%s%d : 0 %llu %s %lu\n",
mapname, delim, j+1,
- (unsigned long) slices[j].size, device,
- (unsigned long) slices[j].start);
+ slices[j].size, device,
+ slices[j].start);
}
/* Loop to resolve contained slices */
d = c;
@@ -376,10 +387,10 @@ main(int argc, char **argv){
slices[j].minor = m++;
start = slices[j].start - slices[k].start;
- printf("%s%s%d : 0 %lu %s%s%d %lu\n",
+ printf("%s%s%d : 0 %llu /dev/dm-%d %lu\n",
mapname, delim, j+1,
- (unsigned long) slices[j].size,
- mapname, delim, k, start);
+ slices[j].size,
+ slices[k].minor, start);
c--;
}
/* Terminate loop if nothing more to resolve */
@@ -390,7 +401,7 @@ main(int argc, char **argv){
break;
case DELETE:
- for (j = n-1; j >= 0; j--) {
+ for (j = 0; j < n; j++) {
if (safe_sprintf(partname, "%s%s%d",
mapname, delim, j+1)) {
fprintf(stderr, "partname too small\n");
@@ -420,7 +431,7 @@ main(int argc, char **argv){
break;
case ADD:
- for (j = 0, c = 0; j < n; j++) {
+ for (j=0, c = 0; j<n; j++) {
if (slices[j].size == 0)
continue;
@@ -457,7 +468,7 @@ main(int argc, char **argv){
&slices[j].minor);
if (verbose)
- printf("add map %s (%d:%d): 0 %lu %s %s\n",
+ printf("add map %s (%d:%d): 0 %llu %s %s\n",
partname, slices[j].major,
slices[j].minor, slices[j].size,
DM_TARGET, params);
@@ -466,7 +477,6 @@ main(int argc, char **argv){
d = c;
while (c) {
for (j = 0; j < n; j++) {
- unsigned long start;
int k = slices[j].container - 1;
if (slices[j].size == 0)
@@ -477,7 +487,7 @@ main(int argc, char **argv){
continue;
/* Skip all simple slices */
- if (slices[j].container == 0)
+ if (k < 0)
continue;
/* Check container slice */
@@ -492,11 +502,10 @@ main(int argc, char **argv){
}
strip_slash(partname);
- start = slices[j].start - slices[k].start;
if (safe_sprintf(params, "%d:%d %lu",
slices[k].major,
slices[k].minor,
- start)) {
+ (unsigned long)slices[j].start)) {
fprintf(stderr, "params too small\n");
exit(1);
}
@@ -515,12 +524,9 @@ main(int argc, char **argv){
&slices[j].minor);
if (verbose)
- printf("add map %s (%d:%d): 0 %lu %s\n",
- partname,
- slices[j].major,
- slices[j].minor,
- slices[j].size,
- params);
+ printf("add map %s : 0 %llu %s %s\n",
+ partname, slices[j].size,
+ DM_TARGET, params);
c--;
}
/* Terminate loop */
--- kpartx.debian/kpartx.h 2007-08-03 07:05:37.000000000 +1000
+++ kpartx/kpartx.h 2009-01-21 23:02:40.656432000 +1100
@@ -21,7 +21,7 @@
*/
struct slice {
unsigned long start;
- unsigned long size;
+ unsigned long long size;
int container;
int major;
int minor;