> Begin forwarded message:
> 
> From: "Gary E. Gorbet" <gegor...@gmail.com>
> Subject: cluster status function in Airavata
> Date: October 7, 2015 at 5:30:22 PM CDT
> To: SciGaP Dev <d...@scigap.org>
> 
> The two text files attached illustrate a Gateway function that is currently 
> performed by direct ssh commands. I would like to see that functionality 
> moved to Airavata for two main reasons:
> 
> (1) Any refinements for changes on clusters or for new clusters would be 
> centralized; and
> 
> (2) Gateway servers on multiple hosts would referenced common APIs.
> 
> The first attached file - cluster_status.php-local_status.txt - is the code 
> portion from a local_status() function within the UltraScan gateway script 
> cluster_status.php. This code portion shows the ssh commands issued for each 
> of a list of clusters. The parsed command output reveals the count of running 
> jobs and queued jobs for each cluster. That information is used on the 
> gateway submit page to hint at likely wait-in-queue-status time. My proposal 
> is that a Thrift client API would return this information, using code on the 
> Thrift server similar to that in the sample PHP script.
> 
> The second attached file shows a bash shell script executed remotely on the 
> Jureca cluster at the PRACE Juelich center in Germany. This special script 
> was found to be necessary because none of the normal queue status commands ( 
> sinfo, qstat, ... ) were found to return reliable information. The script 
> shown basically just issues squeue commands and counts the lines returned as 
> a way of counting PENDING and RUNNING jobs in the “batch” queue on Jureca.
> 
> The attached image file shows how the job count information is used at the 
> submit stage of the UltraScan gateway.
> 
> - Gary
> 
> 
> 
> 
> 
> 
...  
// Get local cluster status

function local_status()
{
   global $self;
   global $data;

   //$clusters = array( "alamo", "lonestar", "stampede", "comet", "gordon" );
   $clusters = array( "alamo", "lonestar", "stampede", "comet", "gordon", 
"jureca", "jacinto" );
   foreach ( $clusters as $clname )
   {
      $a      = Array();
      switch( $clname )
      {
         case 'alamo':
         {
            $host   = "u...@alamo.uthscsa.edu";
            $qstat  = `ssh $host '/usr/bin/qstat -B 2>&1|tail -1'`;
            $sparts = preg_split( '/\s+/', $qstat );
            $que    = $sparts[ 3 ];
            $run    = $sparts[ 4 ];
            $sta    = $sparts[ 10 ];
            if ( $sta == "Active" )
               $sta    = "up";
            else
               $sta    = "down";
            break;
         }
         case 'jacinto':
         {
            $host   = "u...@jacinto.uthscsa.edu";
            $qstat  = `ssh $host '/opt/torque/bin/qstat -B 2>&1|tail -1'`;
            $sparts = preg_split( '/\s+/', $qstat );
            $que    = $sparts[ 3 ];
            $run    = $sparts[ 4 ];
            $sta    = $sparts[ 9 ];
            if ( $sta == "Active" )
               $sta    = "up";
            else
               $sta    = "down";
            break;
         }
         case 'stampede':
         {
            $host   = "u...@stampede.tacc.utexas.edu";
            $qstat  = `ssh $host '/usr/local/bin/showq 2>&1|tail -1'`;
            $sparts = preg_split( '/\s+/', $qstat );
            $tot    = $sparts[ 2 ];
            $run    = $sparts[ 5 ];
            $que    = $sparts[ 8 ];
            $sta    = "up";
            if ( $tot == ''  ||  $tot == '0' )
               $sta    = "down";
            break;
         }
         case 'lonestar':
         {
            $host   = "u...@lonestar.tacc.utexas.edu";
            $qstat  = `ssh $host 'showq 2>&1|tail -1'`;
            $sparts = preg_split( '/\s+/', $qstat );
            $tot    = $sparts[ 2 ];
            $run    = '0';
            $que    = '0';
            $sta    = "up";
            if ( $tot == ''  ||  $tot == '0' )
            {
               $sta    = "down";
            }
            else
            {
               $run    = $sparts[ 5 ];
               $que    = $sparts[ 8 ];
            }
            break;
         }
         case 'comet':
         {
            $host   = "u...@comet.sdsc.edu";
            $qstat  = `ssh $host '/usr/bin/sinfo -s -p compute -o "%a %F" |tail 
-1'`;
            $sparts = preg_split( '/\s+/', $qstat );
            $sta    = $sparts[ 0 ];
            $knts   = $sparts[ 1 ];
            $sparts = preg_split( '/\//', $knts );
            $run    = $sparts[ 0 ];
            $que    = $sparts[ 1 ];
            if ( $sta == "" )
               $sta    = "down";
            break;
         }
         case 'gordon':
         {
            $host   = "u...@gordon.sdsc.edu";
            $qstat  = `ssh $host '/opt/torque/bin/qstat -B 2>&1|tail -1'`;
            $sparts = preg_split( '/\s+/', $qstat );
            $que    = $sparts[ 3 ];
            $run    = $sparts[ 4 ];
            $sta    = $sparts[ 10 ];
            if ( $sta == "Active" )
               $sta    = "up";
            else
               $sta    = "down";
            break;
         }
         case 'jureca':
         {
            $host   = "sw...@jureca.fz-juelich.de";
            $qstat  = `ssh $host '~swus1/scripts/qstat-jureca 2>&1'`;
            $sparts = preg_split( '/\s+/', $qstat );
            $sta    = $sparts[ 0 ];
            $run    = $sparts[ 1 ];
            $que    = $sparts[ 2 ];
            break;
         }
      }

      if ( $sta == "down" )
      {
         $que    = "0";
         $run    = "0";
      }

      $a[ 'cluster' ] = $clname;
      $a[ 'queued'  ] = $que;
      $a[ 'running' ] = $run;
      $a[ 'status'  ] = $sta;

      $data[] = $a;

      if ( $clname == 'alamo'  ||  $clname == 'jacinto' )
      {
         $a[ 'cluster' ] = $clname . "-local";
         $data[] = $a;
      }
   }
}

...
#!/bin/bash
#       qstat-jureca    - count queued/running jobs on Jureca

JRUN=`squeue -t RUNNING -p batch|wc -l`
JQUE=`squeue -t PENDING -p batch|wc -l`
JQUE=`expr $JQUE - 1`
JRUN=`expr $JRUN - 1`
JOBS=`expr $JQUE + $JRUN`
QSTA="up"
if [ $JOBS -lt 1 ]; then
  QSTA="down"
fi

echo "$QSTA $JRUN $JQUE $JOBS"

> 
> 
> 

Reply via email to