Hi,

 I've a weird problem, sometimes a random file "stucks" after 1-2 weeks
uptime on a xfs partition within a vs. the xfs is laying on lvm2 &
dm-crypto, userspace is 32bit Debian, kernel is AMD64. I using vs since
2.6.14.4-2.1.x but I changed to 2.6.15.2-2.1.0.5.1 when this problem
appeared and I hoped that it will solve it - but unfortunately not..
today I got another "stucked" file, the process goes to state 'D' if I
try to stat or open it. another files in this directory are working
well.
 It's a highly loaded file server and I never had same problem outside
the vs, so I think it isn't a generic xfs problem. I've noticed that
too, this lockup happens on mass rm'ing of files, but of course not on
every mass rm.. as I said, it's a really big file server with millions
of files, please think twice before you say "xfs? vs? it's works for me
at home!" :)


the details..
dmesg is empty on the host, no panic or same.

this kernel process is stucked on the host:
 root     15062  0.4  0.0     0    0 ?        D    Mar06   6:15 [pdflush]


vs:/# uname -a 
Linux vs 2.6.15.2-vs2.1.0.5.1 #4 SMP Tue Feb 14 18:15:09 CET 2006 x86_64 
GNU/Linux
vs:/# uptime
 18:26:21 up 9 days,  8:36,  2 users,  load average: 30.5, 30.5, 30.2

 - it's not "real" load, I've kicked out the users, no network traffic

vs:/# cat /proc/mounts 
rootfs / rootfs rw 0 0
/dev/root / ext3 rw,data=ordered 0 0
/dev/mapper/blv /mirror/pub xfs rw,nosuid,nodev,noexec 0 0
/dev/mapper/bvlv /mirror/pub/fsn xfs rw,nosuid,nodev,noexec 0 0
none /proc proc rw,nodiratime,nodev 0 0
none /tmp tmpfs rw,nodev 0 0
none /dev/pts devpts rw 0 0
/dev2/root2 /bin ext3 ro,nodev,data=ordered 0 0
/dev2/root2 /sbin ext3 ro,nodev,data=ordered 0 0
/dev2/root2 /lib ext3 ro,nodev,data=ordered 0 0
/dev2/root2 /lib/modules ext3 ro,nodev,data=ordered 0 0
/dev2/root2 /usr ext3 ro,nodev,data=ordered 0 0
/dev2/root2 /usr/local ext3 ro,nodev,data=ordered 0 0
/dev2/root2 /etc/terminfo ext3 ro,nodev,data=ordered 0 0
/dev2/root2 /etc/alternatives ext3 ro,nodev,data=ordered 0 0


vs:/# ps axu    (too long to paste here, cutted)

USER       PID %CPU %MEM   VSZ  RSS TTY      STAT START   TIME COMMAND
root         1  0.0  0.0  1516  464 ?        S    Feb26   0:01 init [3]         
root     11733  0.0  0.0  1848  540 ?        SN   Feb26   0:01 
/usr/sbin/inetutils-inetd -R 120
root     11757  0.0  0.0  3368  516 ?        SNs  Feb26   0:00 /usr/sbin/sshd
root     11799  0.0  0.0  1764  588 ?        SNs  Feb26   0:00 /usr/sbin/cron
#497     11763  0.0  0.0  4128  960 ?        DN   09:20   0:31 ftpd
#492     19892  0.0  0.0  4140  960 ?        DN   09:31   0:06 ftpd
#497     26474  0.0  0.0  4008  928 ?        DN   09:39   0:00 ftpd
#492     28038  0.0  0.0  4128  956 ?        DN   09:40   0:00 ftpd
#497      2061  0.0  0.0  4008  928 ?        DN   09:48   0:00 ftpd
#497      2405  0.0  0.0  4008  928 ?        DN   09:49   0:00 ftpd
#497      3524  0.0  0.0  4128  956 ?        DN   09:50   0:00 ftpd
root     16748  0.0  0.0  1912  508 ?        DN   11:30   0:00 
/opt/bin/myglinsert 3 bla bla
root     25866  0.0  0.0  1916  508 ?        DN   13:30   0:00 
/opt/bin/myglinsert 3 bla bla
...

vs:/# ls -l /proc/19892/fd/
lrwx------  1 root root 64 Mar  7 18:32 0 -> socket:[33836916]
lrwx------  1 root root 64 Mar  7 18:32 1 -> socket:[33836916]
l-wx------  1 root root 64 Mar  7 18:32 2 -> /dev/null
lrwx------  1 root root 64 Mar  7 18:32 6 -> /mirror/pub/lacee/site
lrwx------  1 root root 64 Mar  7 18:32 7 -> socket:[33850456]

vs:/# ls -l /proc/2405/fd/
lrwx------  1 root root 64 Mar  7 18:34 0 -> socket:[33884876]
lrwx------  1 root root 64 Mar  7 18:34 1 -> socket:[33884876]
l-wx------  1 root root 64 Mar  7 18:34 2 -> /dev/null
lr-x------  1 root root 64 Mar  7 18:34 6 -> /mirror/pub/lacee/site

..the rest is same..


vs:/# stat /mirror/pub/lacee/site
  File: `/mirror/pub/lacee/site'
  Size: 4096            Blocks: 16         IO Block: 4096   directory
Device: fd14h/64788d    Inode: 1613995234  Links: 3
Access: (0777/drwxrwxrwx)  Uid: (  497/ UNKNOWN)   Gid: ( 9500/ UNKNOWN)
Access: 2006-03-07 18:30:07.769923805 +0100
Modify: 2006-03-07 09:36:30.683606795 +0100
Change: 2006-03-07 09:36:30.683606795 +0100

vs:/# cd /mirror/pub/lacee/site
vs:site# ls | wc -l
84
vs:site# for i in *; do echo "$i"; stat "$i"; done
...
bc-me2bo.r49
[oops, 'stat' locked on this file!]


SysRQ+t - very long, copied only the stucked processes, but not all of
them is here, cause the 'dmesg' buffer is too small and I haven't a
serial console :(

kernel: myglinsert    D ffff81003438c000     0 16748  16740
kernel: ffff810013347ce8 0000000000000086 0000000000000000 0000000000000292
kernel:        0000000000000292 0000000000000000 0000000000000008 
ffff81007b708260
kernel:        0000000000000292 ffffffff880e3596
kernel: Call Trace:<ffffffff880e3596>{:xfs:xfs_iunlock+102} 
<ffffffff8019fa8f>{__d_lookup+159}
kernel:        <ffffffff803deea1>{__down_read+129} 
<ffffffff880fc211>{:xfs:xfs_getattr+65}
kernel:        <ffffffff8810d23b>{:xfs:vn_revalidate+59} 
<ffffffff8019525f>{link_path_walk+415}
kernel:        <ffffffff88109d14>{:xfs:linvfs_getattr+36} 
<ffffffff8018f587>{vfs_getattr+39}
kernel:        <ffffffff8018f620>{vfs_stat+64} 
<ffffffff80232915>{_atomic_dec_and_lock+53}
kernel:        <ffffffff801201ff>{sys32_stat64+31} 
<ffffffff8011fd01>{ia32_sysret+0}

kernel: myglinsert    D ffff81003438c000     0  9169   9166
kernel: ffff810006be9ce8 0000000000000082 0000000000000000 0000000000000292
kernel:        0000000000000292 0000000000000001 0000000000000008 
ffff81007b708260
kernel:        0000000000000292 ffffffff880e3596
kernel: Call Trace:<ffffffff880e3596>{:xfs:xfs_iunlock+102} 
<ffffffff8019fa8f>{__d_lookup+159}
kernel:        <ffffffff803deea1>{__down_read+129} 
<ffffffff880fc211>{:xfs:xfs_getattr+65}
kernel:        <ffffffff8810d23b>{:xfs:vn_revalidate+59} 
<ffffffff8019525f>{link_path_walk+415}
kernel:        <ffffffff88109d14>{:xfs:linvfs_getattr+36} 
<ffffffff8018f587>{vfs_getattr+39}
kernel:        <ffffffff8018f620>{vfs_stat+64} 
<ffffffff801201ff>{sys32_stat64+31}
kernel:        <ffffffff8011fd01>{ia32_sysret+0}

kernel: ftpd        D ffff81003438c000     0  2386  11733
kernel: ffff81003da51ce8 0000000000000082 ffff81002db6f978 0000000000000000
kernel:        ffff81003da51c38 0000000000000000 ffff810026f181c0 
ffff81007489ce00
kernel:        ffff81006d851980 ffff81003da51d28
kernel: Call Trace:<ffffffff8019fdf0>{d_rehash+112} 
<ffffffff8019fa8f>{__d_lookup+159}
kernel:        <ffffffff803deea1>{__down_read+129} 
<ffffffff880fc211>{:xfs:xfs_getattr+65}
kernel:        <ffffffff8810d23b>{:xfs:vn_revalidate+59} 
<ffffffff8019525f>{link_path_walk+415}
kernel:        <ffffffff88109d14>{:xfs:linvfs_getattr+36} 
<ffffffff8018f587>{vfs_getattr+39}
kernel:        <ffffffff8018f620>{vfs_stat+64} 
<ffffffff801201ff>{sys32_stat64+31}
kernel:        <ffffffff8011fd01>{ia32_sysret+0}

kernel: glftpd        D ffff81003438c000     0  2525  11733         27183  2386 
(NOTLB)
kernel: ffff810005f13ce8 0000000000000086 ffff810005f13c40 0000000000000000
kernel:        0000000000000286 0000000000000000 00000001ffffffff 
0000000000000000
kernel:        ffff810005f13d48 ffff810005f13cc8
kernel: Call Trace:<ffffffff8019fa8f>{__d_lookup+159} 
<ffffffff803deea1>{__down_read+129}
kernel:        <ffffffff880fc211>{:xfs:xfs_getattr+65} 
<ffffffff8810d23b>{:xfs:vn_revalidate+59}
kernel:        <ffffffff8019525f>{link_path_walk+415} 
<ffffffff88109d14>{:xfs:linvfs_getattr+36}
kernel:        <ffffffff8018f587>{vfs_getattr+39} 
<ffffffff8018f620>{vfs_stat+64}
kernel:        <ffffffff801201ff>{sys32_stat64+31} 
<ffffffff8011fd01>{ia32_sysret+0}

kernel: cclean.php    D ffff81003438c000     0 19166      1         13109  6766 
(NOTLB)
kernel: ffff81002688bce8 0000000000000082 ffff81002688bc40 0000000000000000
kernel:        0000000000000292 0000000000000001 00000001ffffffff 
0000000000000000
kernel:        0000000055960e80 ffff81002688a000
kernel: Call Trace:<ffffffff8019fa8f>{__d_lookup+159} 
<ffffffff803deea1>{__down_read+129}
kernel:        <ffffffff880fc211>{:xfs:xfs_getattr+65} 
<ffffffff8810d23b>{:xfs:vn_revalidate+59}
kernel:        <ffffffff8019525f>{link_path_walk+415} 
<ffffffff88109d14>{:xfs:linvfs_getattr+36}
kernel:        <ffffffff8018f587>{vfs_getattr+39} 
<ffffffff8018f620>{vfs_stat+64}
kernel:        <ffffffff801201ff>{sys32_stat64+31} 
<ffffffff8011fd01>{ia32_sysret+0}

kernel: glftpd        D ffff81003438c000     0 27183  11733         30995  2525 
(NOTLB)
kernel: ffff81005c3afa28 0000000000000082 0000000000000000 ffffffff803ddec3
kernel:        ffff81005c3afa48 0000000000000001 ffff81003787f0e0 
0000000000000092
kernel:        ffff81003c1dfb40 0000000000000000
kernel: Call Trace:<ffffffff803ddec3>{thread_return+0} 
<ffffffff803df1d8>{__down+152}
kernel:        <ffffffff8012dd50>{default_wake_function+0} 
<ffffffff803defca>{__down_failed+53}
kernel:        <ffffffff88106951>{:xfs:.text.lock.xfs_buf+25} 
<ffffffff88104d94>{:xfs:_pagebuf_find+372}
kernel:        <ffffffff88104e82>{:xfs:xfs_buf_get_flags+82} 
<ffffffff88104f8a>{:xfs:xfs_buf_read_flags+26}
kernel:        <ffffffff880f7feb>{:xfs:xfs_trans_read_buf+427} 
<ffffffff880b401c>{:xfs:xfs_alloc_read_agf+108}
kernel:        <ffffffff880b3a93>{:xfs:xfs_alloc_fix_freelist+291}
kernel:        <ffffffff80169b54>{cache_alloc_refill+100} 
<ffffffff803dee32>{__down_read+18}
kernel:        <ffffffff880b45b8>{:xfs:xfs_free_extent+152} 
<ffffffff880dd434>{:xfs:xfs_efd_init+68}
kernel:        <ffffffff880f865b>{:xfs:xfs_trans_get_efd+43} 
<ffffffff880c32d8>{:xfs:xfs_bmap_finish+232}
kernel:        <ffffffff880e53f4>{:xfs:xfs_itruncate_finish+420} 
<ffffffff880eb3a1>{:xfs:xfs_log_reserve+193}
kernel:        <ffffffff880fe15e>{:xfs:xfs_inactive+558} 
<ffffffff8810bfb1>{:xfs:linvfs_clear_inode+161}
kernel:        <ffffffff801a0b10>{clear_inode+224} 
<ffffffff801a1bfd>{generic_delete_inode+205}
kernel:        <ffffffff801a1e6b>{iput+123} <ffffffff801972c3>{sys_unlink+259}
kernel:        <ffffffff8011fd01>{ia32_sysret+0}

kernel: glftpd        D ffff81003438c000     0 30995  11733         32325 27183 
(NOTLB)
kernel: ffff81002bb1da28 0000000000000082 0000000000000292 ffffffff8035631a
kernel:        0000000000011200 0000000000000000 ffff810021f6d6c0 
ffffffff80227596
kernel:        ffff810002faab80 ffff810001e099e0
kernel: Call Trace:<ffffffff8035631a>{dm_request+122} 
<ffffffff80227596>{generic_make_request+262}
kernel:        <ffffffff8012c07b>{try_to_wake_up+1179} 
<ffffffff803df1d8>{__down+152}
kernel:        <ffffffff8012dd50>{default_wake_function+0} 
<ffffffff803defca>{__down_failed+53}
kernel:        <ffffffff88106951>{:xfs:.text.lock.xfs_buf+25} 
<ffffffff88104d94>{:xfs:_pagebuf_find+372}
kernel:        <ffffffff88104e82>{:xfs:xfs_buf_get_flags+82} 
<ffffffff88104f8a>{:xfs:xfs_buf_read_flags+26}
kernel:        <ffffffff880f7feb>{:xfs:xfs_trans_read_buf+427} 
<ffffffff880b401c>{:xfs:xfs_alloc_read_agf+108}
kernel:        <ffffffff880f7075>{:xfs:_xfs_trans_commit+757} 
<ffffffff880b3a93>{:xfs:xfs_alloc_fix_freelist+291}
kernel:        <ffffffff880f8917>{:xfs:xfs_trans_log_inode+39} 
<ffffffff803dee32>{__down_read+18}
kernel:        <ffffffff880b45b8>{:xfs:xfs_free_extent+152} 
<ffffffff880dd434>{:xfs:xfs_efd_init+68}
kernel:        <ffffffff880f865b>{:xfs:xfs_trans_get_efd+43} 
<ffffffff880c32d8>{:xfs:xfs_bmap_finish+232}
kernel:        <ffffffff880e53f4>{:xfs:xfs_itruncate_finish+420} 
<ffffffff880eb3a1>{:xfs:xfs_log_reserve+193}
kernel:        <ffffffff880fe15e>{:xfs:xfs_inactive+558} 
<ffffffff8810bfb1>{:xfs:linvfs_clear_inode+161}
kernel:        <ffffffff801a0b10>{clear_inode+224} 
<ffffffff801a1bfd>{generic_delete_inode+205}
kernel:        <ffffffff801a1e6b>{iput+123} <ffffffff801972c3>{sys_unlink+259}
kernel:        <ffffffff8011fd01>{ia32_sysret+0}

kernel: ls            D ffff81003438c000     0 11984      1          6766 31054 
(NOTLB)
kernel: ffff81001e1d1ce8 0000000000000082 0000000000002002 0000000000000000
kernel:        0000000000002000 0000000000000001 0000000000000000 
ffffffff880b0b5b
kernel:        0000000000000292 ffff8100218a15a8
kernel: Call Trace:<ffffffff880b0b5b>{:xfs:xfs_acl_get_attr+91} 
<ffffffff8019fa8f>{__d_lookup+159}
kernel:        <ffffffff803deea1>{__down_read+129} 
<ffffffff880fc211>{:xfs:xfs_getattr+65}
kernel:        <ffffffff8810d23b>{:xfs:vn_revalidate+59} 
<ffffffff8019525f>{link_path_walk+415}
kernel:        <ffffffff88109d14>{:xfs:linvfs_getattr+36} 
<ffffffff8018f587>{vfs_getattr+39}
kernel:        <ffffffff8018f68d>{vfs_lstat+61} 
<ffffffff80232915>{_atomic_dec_and_lock+53}
kernel:        <ffffffff8012024f>{sys32_lstat64+31} 
<ffffffff801a3d14>{mntput_no_expire+36}
kernel:        <ffffffff8018502e>{filp_close+110} 
<ffffffff801850fe>{sys_close+174}
kernel:        <ffffffff8011fd01>{ia32_sysret+0}

kernel: glftpd        D ffff81003438c000     0 18288  11733         18297 17046 
(NOTLB)
kernel: ffff8100271adce8 0000000000000082 0000000000000000 0000000000000292
kernel:        0000000000000292 0000000000000000 0000000000000008 
ffff81007b708260
kernel:        0000000000000292 ffffffff880e3596
kernel: Call Trace:<ffffffff880e3596>{:xfs:xfs_iunlock+102} 
<ffffffff8019fa8f>{__d_lookup+159}
kernel:        <ffffffff803deea1>{__down_read+129} 
<ffffffff880fc211>{:xfs:xfs_getattr+65}
kernel:        <ffffffff8810d23b>{:xfs:vn_revalidate+59} 
<ffffffff8019525f>{link_path_walk+415}
kernel:        <ffffffff88109d14>{:xfs:linvfs_getattr+36} 
<ffffffff8018f587>{vfs_getattr+39}
kernel:        <ffffffff8018f620>{vfs_stat+64} 
<ffffffff801a1ee4>{update_atime+68}
kernel:        <ffffffff801201ff>{sys32_stat64+31} 
<ffffffff8011fd01>{ia32_sysret+0}


interesting parts of the kernel .config:
...
CONFIG_XFS_FS=m
CONFIG_XFS_EXPORT=y
# CONFIG_XFS_QUOTA is not set
# CONFIG_XFS_SECURITY is not set
CONFIG_XFS_POSIX_ACL=y
# CONFIG_XFS_RT is not set
...
CONFIG_VSERVER=y
CONFIG_VSERVER_SECURITY=y
CONFIG_VSERVER_LEGACYNET=y
CONFIG_VSERVER_LEGACY=y
# CONFIG_VSERVER_LEGACY_VERSION is not set
CONFIG_VSERVER_DYNAMIC_IDS=y
# CONFIG_VSERVER_NGNET is not set
CONFIG_VSERVER_COWBL=y                (hm, this one is shady for me..!)
CONFIG_VSERVER_PROC_SECURE=y
CONFIG_VSERVER_HARDCPU=y
# CONFIG_VSERVER_IDLETIME is not set
# CONFIG_VSERVER_IDLELIMIT is not set
# CONFIG_INOXID_NONE is not set
# CONFIG_INOXID_UID16 is not set
# CONFIG_INOXID_GID16 is not set
CONFIG_INOXID_UGID24=y
# CONFIG_INOXID_INTERN is not set
# CONFIG_INOXID_RUNTIME is not set
# CONFIG_XID_TAG_NFSD is not set
# CONFIG_XID_PROPAGATE is not set
# CONFIG_VSERVER_DEBUG is not set
...




thanks for your attention


-- 
 d

_______________________________________________
Vserver mailing list
Vserver@list.linux-vserver.org
http://list.linux-vserver.org/mailman/listinfo/vserver

Reply via email to