Hi,

Am 05.05.2011 um 10:50 schrieb Stefano Bridi:

> Hi all
> I'm tring to configure openmpi with tight integration in an old SGE
> installation (6.1u3).
> In the past I had configured all the rsh/rlogin/qlogin "daemon" and
> "command" to "/usr/sbin/sshd -i" and "/usr/bin/ssh".
> Now  I'm tring to go back to the builtin configuration but I get the
> strange error in the subject:

AFAIK the -builtin- method appeared at some time for the 6.2 series. So you 
either have to upgrade or stay with ssh/rsh.

-- Reuti


> error: 1: rsh_daemon "builtin" is not an absolute path
> 
> If I test with an interactive job
> "qrsh -pe orte 4"
> I get the almost the same error
> error: 1: rlogin_daemon "builtin" is not an absolute path
> 
> plus  a queue in error state..
> 
> Anyone can tell me what is going on?
> 
> thanks
> stef
> 
> Here some detail of the setup:
> 
> There are two queues for this test: "n0000.q" and "n0001.q" configured
> in the same way.
> There is one dedicated parallel environment "orte"
> 
> 
> # qconf -sp orte
> pe_name           orte
> slots             8
> user_lists        NONE
> xuser_lists       NONE
> start_proc_args   /bin/true
> stop_proc_args    /bin/true
> allocation_rule   $round_robin
> control_slaves    TRUE
> job_is_first_task FALSE
> urgency_slots     min
> 
> 
> # qconf -sconf
> global:
> execd_spool_dir              /sge/default/spool
> mailer                       /bin/mail
> xterm                        /usr/bin/X11/xterm
> load_sensor                  none
> prolog                       none
> epilog                       none
> shell_start_mode             posix_compliant
> login_shells                 sh,ksh,csh,tcsh
> min_uid                      0
> min_gid                      0
> user_lists                   none
> xuser_lists                  none
> projects                     none
> xprojects                    none
> enforce_project              false
> enforce_user                 auto
> load_report_time             00:00:40
> max_unheard                  00:05:00
> reschedule_unknown           00:00:00
> loglevel                     log_warning
> administrator_mail           [email protected]
> set_token_cmd                none
> pag_cmd                      none
> token_extend_time            none
> shepherd_cmd                 none
> qmaster_params               none
> execd_params                 enable_windomacc=true
> reporting_params             accounting=true reporting=false \
>                             flush_time=00:00:15 joblog=false sharelog=00:00:00
> finished_jobs                100000
> gid_range                    20000-20100
> qlogin_command               builtin
> qlogin_daemon                builtin
> rlogin_daemon                builtin
> rlogin_command               builtin
> max_aj_instances             2000
> max_aj_tasks                 75000
> max_u_jobs                   0
> max_jobs                     0
> auto_user_oticket            0
> auto_user_fshare             0
> auto_user_default_project    none
> auto_user_delete_time        86400
> delegated_file_staging       false
> reprioritize                 false
> rsh_daemon                   builtin
> rsh_command                  builtin
> 
> # qconf -sconf n0000
> configuration n0000 not defined
> 
> # qconf -sconf n0001
> configuration n0001 not defined
> 
> # qconf -sq n0000.q
> qname                 n0000.q
> hostlist              n0000
> seq_no                0
> load_thresholds       np_load_avg=1.75
> suspend_thresholds    NONE
> nsuspend              1
> suspend_interval      00:05:00
> priority              0
> min_cpu_interval      00:05:00
> processors            UNDEFINED
> qtype                 BATCH INTERACTIVE
> ckpt_list             NONE
> pe_list               test2-smp test2-smp test3 fds test4 test1-smp test1-mmp 
> \
>                      orte
> rerun                 FALSE
> slots                 4
> tmpdir                /tmp
> shell                 /bin/bash
> prolog                NONE
> epilog                NONE
> shell_start_mode      posix_compliant
> starter_method        NONE
> suspend_method        NONE
> resume_method         NONE
> terminate_method      NONE
> notify                00:00:60
> owner_list            NONE
> user_lists            NONE
> xuser_lists           NONE
> subordinate_list      NONE
> complex_values        NONE
> projects              NONE
> xprojects             NONE
> calendar              NONE
> initial_state         default
> s_rt                  INFINITY
> h_rt                  INFINITY
> s_cpu                 INFINITY
> h_cpu                 INFINITY
> s_fsize               INFINITY
> h_fsize               INFINITY
> s_data                INFINITY
> h_data                INFINITY
> s_stack               INFINITY
> h_stack               INFINITY
> s_core                INFINITY
> h_core                INFINITY
> s_rss                 INFINITY
> h_rss                 INFINITY
> s_vmem                INFINITY
> h_vmem                INFINITY
> #
> 
> The test job I'm using is
> ------------------8<------------------8<----------------8<-------------------
> #!/bin/sh
> 
> #$ -N prova
> #$ -pe orte 4
> #$ -cwd
> 
> /sw/openmpi/141/bin/mpirun -v -np 4 -mca btl openib,self -mca ras
> gridengine mppexe-openmpi-141
> 
> ------------------8<------------------8<----------------8<-------------------
> 
> the stderr of this job is:
> 
> ------------------8<------------------8<----------------8<-------------------
> error: 1: rsh_daemon "builtin" is not an absolute path
> 
> --------------------------------------------------------------------------
> A daemon (pid 8206) died unexpectedly with status 1 while attempting
> to launch so we are aborting.
> 
> There may be more information reported by the environment (see above).
> 
> This may be because the daemon was unable to find all the needed shared
> libraries on the remote node. You may set your LD_LIBRARY_PATH to have the
> location of the shared libraries on the remote nodes and this will
> automatically be forwarded to the remote nodes.
> --------------------------------------------------------------------------
> --------------------------------------------------------------------------
> mpirun noticed that the job aborted, but has no info as to the process
> that caused that situation.
> --------------------------------------------------------------------------
> mpirun: clean termination accomplished
> ------------------8<------------------8<----------------8<-------------------
> _______________________________________________
> users mailing list
> [email protected]
> https://gridengine.org/mailman/listinfo/users


_______________________________________________
users mailing list
[email protected]
https://gridengine.org/mailman/listinfo/users

Reply via email to