Hi Thomas,
Here are the outputs requested. The first output is when rbh-find is
running and the second one when it gets stuck or to be more precise it is
not generating any output any more.
Command executed: nohup rbh-find -f /etc/robinhood.d/tmpfs/lustre.conf
-type f -atime +90d -ls > purge_list.txt &
A)
[root@hpc1 ~]# gstack 441
#0 0x0000003df340e4ed in read () from /lib64/libpthread.so.0
#1 0x00007f007e3208d9 in vio_read_buff () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#2 0x00007f007e3219ef in my_real_read () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#3 0x00007f007e321de5 in my_net_read () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#4 0x00007f007e31c2da in cli_safe_read () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#5 0x00007f007e31caa9 in cli_read_query_result () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#6 0x00007f007e31aefc in mysql_real_query () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#7 0x000000000044844d in _db_exec_sql ()
#8 0x000000000043c864 in listmgr_get_by_pk ()
#9 0x000000000044246f in ListMgr_GetNext ()
#10 0x000000000040aa19 in list_bulk ()
#11 0x000000000040c1fa in main ()
mysql> show processlist;
+------+-----------+------------------+------------------+---------+--------+--------------+------------------------------------------------------------------------------------------------------+
| Id | User | Host | db | Command | Time
| State | Info
|
+------+-----------+------------------+------------------+---------+--------+--------------+------------------------------------------------------------------------------------------------------+
| 1116 | slurm | hpc1.local:59434 | slurm_acct_db | Sleep | 2006
| | NULL
|
| 1119 | slurm | hpc1.local:33625 | slurm_acct_db | Sleep | 220
| | NULL
|
| 1732 | robinhood | localhost | robinhood_lustre | Query | 171175
| NULL | SELECT CONCAT( n, '/', p) INTO p
|
| 1778 | robinhood | localhost | robinhood_lustre | Query | 87611
| NULL | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:2218003E' |
| 1781 | robinhood | localhost | robinhood_lustre | Query | 86300
| NULL | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:22180001' |
| 1799 | robinhood | localhost | robinhood_lustre | Query | 85521
| Sending data | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:22180001' |
| 1802 | robinhood | localhost | robinhood_lustre | Query | 85250
| NULL | SELECT CONCAT( n, '/', p) INTO p
|
| 1844 | robinhood | localhost | robinhood_lustre | Query | 0
| init | SELECT
owner,gr_name,size,last_access,last_mod,type,mode,nlink,link,this_path(parent_id,name)
FROM E |
| 1849 | root | localhost | NULL | Query | 0
| NULL | show processlist
|
+------+-----------+------------------+------------------+---------+--------+--------------+------------------------------------------------------------------------------------------------------+
9 rows in set (0.01 sec)
B)
[root@hpc1 ~]# gstack 441
#0 0x0000003df340e4ed in read () from /lib64/libpthread.so.0
#1 0x00007f007e3208d9 in vio_read_buff () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#2 0x00007f007e3219ef in my_real_read () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#3 0x00007f007e321de5 in my_net_read () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#4 0x00007f007e31c2da in cli_safe_read () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#5 0x00007f007e31caa9 in cli_read_query_result () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#6 0x00007f007e31aefc in mysql_real_query () from
/opt/rocks/lib/mysql/libmysqlclient_r.so.16
#7 0x000000000044844d in _db_exec_sql ()
#8 0x000000000043c864 in listmgr_get_by_pk ()
#9 0x000000000044246f in ListMgr_GetNext ()
#10 0x000000000040aa19 in list_bulk ()
#11 0x000000000040c1fa in main ()
mysql> show processlist;
+------+-----------+------------------+------------------+---------+--------+--------------+------------------------------------------------------------------------------------------------------+
| Id | User | Host | db | Command | Time
| State | Info
|
+------+-----------+------------------+------------------+---------+--------+--------------+------------------------------------------------------------------------------------------------------+
| 1116 | slurm | hpc1.local:59434 | slurm_acct_db | Sleep | 2169
| | NULL
|
| 1119 | slurm | hpc1.local:33625 | slurm_acct_db | Sleep | 82
| | NULL
|
| 1732 | robinhood | localhost | robinhood_lustre | Query | 171338
| statistics | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:22180001' |
| 1778 | robinhood | localhost | robinhood_lustre | Query | 87774
| statistics | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:2218003E' |
| 1781 | robinhood | localhost | robinhood_lustre | Query | 86463
| Sending data | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:2218003E' |
| 1799 | robinhood | localhost | robinhood_lustre | Query | 85684
| Sending data | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:2218003E' |
| 1802 | robinhood | localhost | robinhood_lustre | Query | 85413
| Sending data | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:22180001' |
| 1844 | robinhood | localhost | robinhood_lustre | Query | 140
| Sending data | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:2218003E' |
| 1851 | root | localhost | NULL | Query | 0
| NULL | show processlist
|
+------+-----------+------------------+------------------+---------+--------+--------------+------------------------------------------------------------------------------------------------------+
9 rows in set (0.00 sec)
mysql> show processlist;
+------+-----------+------------------+------------------+---------+--------+------------+------------------------------------------------------------------------------------------------------+
| Id | User | Host | db | Command | Time
| State | Info
|
+------+-----------+------------------+------------------+---------+--------+------------+------------------------------------------------------------------------------------------------------+
| 1116 | slurm | hpc1.local:59434 | slurm_acct_db | Sleep | 2537
| | NULL
|
| 1119 | slurm | hpc1.local:33625 | slurm_acct_db | Sleep | 151
| | NULL
|
| 1732 | robinhood | localhost | robinhood_lustre | Query | 171706
| NULL | SELECT CONCAT( n, '/', p) INTO p
|
| 1778 | robinhood | localhost | robinhood_lustre | Query | 88142
| init | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:2218003E' |
| 1781 | robinhood | localhost | robinhood_lustre | Query | 86831
| NULL | SELECT CONCAT( n, '/', p) INTO p
|
| 1799 | robinhood | localhost | robinhood_lustre | Query | 86052
| NULL | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:22180001' |
| 1802 | robinhood | localhost | robinhood_lustre | Query | 85781
| NULL | SELECT CONCAT( n, '/', p) INTO p
|
| 1844 | robinhood | localhost | robinhood_lustre | Query | 508
| statistics | SELECT parent_id, name INTO pid, n from NAMES where id=
NAME_CONST('pid',_latin1'DAB1E06F:22180001' |
| 1854 | root | localhost | NULL | Query | 0
| NULL | show processlist
|
+------+-----------+------------------+------------------+---------+--------+------------+------------------------------------------------------------------------------------------------------+
9 rows in set (0.00 sec)
Cheers,
Wissam
On Wed, Jul 8, 2015 at 5:54 PM, LEIBOVICI Thomas <[email protected]>
wrote:
> Hi Wissam,
>
> To understand what is going on, can you run the following commands:
>
> 1) gstack the rbh-find process (gstack is part of "gdb" RPM)
> # gstack <pid>
>
> 2) list current DB requests:
> # mysql <rbh_db>
>
> mysql> SHOW PROCESSLIST;
>
> Regards,
>
>
>
> On 07/08/15 09:50, Wissam Ali wrote:
>
> Hi Support,
> I have recently finished a scan of scratch. We are currently using version
> 2.5.1-1 on a lustre 1.8 filesystem. I am running an rbh-find to get a list
> of files older than 90 days. After 15 minutes rbh-find gets stuck and does
> not produce any ouput. I am not so sure why it gets stuck. I don't see any
> error on the robinhood log files.This is the first time it happens to me.
> Below are the commands executed for rbh-find
>
> 1. nohup rbh-find -f /etc/robinhood.d/tmpfs/lustre.conf -nobulk -type f
> -atime +90d -ls > purge_list.txt &
> 2. nohup rbh-find -f /etc/robinhood.d/tmpfs/lustre.conf -type f -atime
> +90d -ls > purge_list.txt &
> 3. rbh-find -f /etc/robinhood.d/tmpfs/lustre.conf -type f -atime +90d -ls
>
> Command used to scan lustre filesytem:
> nohup robinhood -S -O -f /etc/robinhood.d/tmpfs/lustre.conf &
>
> I have also attached my lustre.conf file.
>
> Regards,
>
> --
>
> *Wissam Ali *HPC System Administrator
> Office Tel (UAE): +971-2-628-4751
> Mobile Tel (UAE): +971-50-106-9689
> Time Zone: GMT+4hrs
> Weekends: Friday + Saturday
> For HPC support please create a ticket at https://nyu.service-now.com
>
> NYU Abu Dhabi, Saadiyat Campus
> Computational Research Building A2
> P.O. Box 129188
> Abu Dhabi, United Arab Emirates
>
>
> ------------------------------------------------------------------------------
> Don't Limit Your Business. Reach for the Cloud.
> GigeNET's Cloud Solutions provide you with the tools and support that
> you need to offload your IT needs and focus on growing your business.
> Configured For All Businesses. Start Your Cloud
> Today.https://www.gigenetcloud.com/
>
>
>
> _______________________________________________
> robinhood-support mailing
> [email protected]https://lists.sourceforge.net/lists/listinfo/robinhood-support
>
>
>
--
*Wissam Ali*HPC System Administrator
Office Tel (UAE): +971-2-628-4751
Mobile Tel (UAE): +971-50-106-9689
Time Zone: GMT+4hrs
Weekends: Friday + Saturday
For HPC support please create a ticket at https://nyu.service-now.com
NYU Abu Dhabi, Saadiyat Campus
Computational Research Building A2
P.O. Box 129188
Abu Dhabi, United Arab Emirates
------------------------------------------------------------------------------
Don't Limit Your Business. Reach for the Cloud.
GigeNET's Cloud Solutions provide you with the tools and support that
you need to offload your IT needs and focus on growing your business.
Configured For All Businesses. Start Your Cloud Today.
https://www.gigenetcloud.com/
_______________________________________________
robinhood-support mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/robinhood-support