#!/usr/bin/perl

###############################################################################
#
# 			     auto-blast-human                                       #
#									      #
#it takes a file having qurey sequences and splits them to individiual sequences#then it does the blasting aginst the target database....
#then it parses the output according to the specification of the user and writes#those that satisfy the condition in to a new file.
#									      #
# output file name is uniquehits               				      #
#									      #
#	this modifed script (18th sep) compares the plsmodium seq with        #
#       all human swiss prot entries by blast and writes the seq name of      #
#       the pfa which is satisfying the given conditions...                   #
#                                                                             #
###############################################################################




print " enter the file which contains the fasta sequences to be blasted :\n";
$fn = <>;
chomp ($fn);

@fullarray = fastaspliter($fn); #sub to make indi fasta seq in to array element

 $fileno ="0";   
$count = '0';
foreach $seq (@fullarray)
{
    
   ++$fileno;
    $outfile = "sequence_";
    $outfilename = $outfile.$fileno;
 
    unless (open(WRITE, ">$outfilename"))
    { print "could not open the file to write \n";}
    print WRITE ">$seq";
    close (WRITE);   
    
    $blastoutname = 'blast_out_'.$fileno ;
    system ("blastall -p blastp -d humanprot -o $blastoutname -i $outfilename");
   sleep 3;
    blastparsing($blastoutname);         #sub parses the stuff
    
    $count = ++$count;
    print "$count th sequence is over\n";
   

    system ("rm $blastoutname");         # removes unwanted things
    system ("rm $outfilename" );
}
exit;




#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------

# it is to split no of fasta sequences from  a file given its name.......
#BUG:the resultent array 'll consist of each sequence as an element of the array
# but the very first elment of the array will NOT have anything in it.....

sub fastaspliter{
my($fn) = @_;
use strict;
use warnings;

my(@array)=[];
my($fasta)="";
my(@fasta)=[];

chomp ($fn);

unless (open (FILE, $fn))
{ print " could not open the fasta file $fn \n "; }
@array = <FILE>;
$fasta = join ('', @array);
@fasta = split ('>',$fasta);

shift (@fasta);   # removes first empty element............

return @fasta;
}
#-------------------------------------------------------------------------------


###############################################################################
#*******  blast-parser-sub        sub routine
#
#to extract information from the blast out put.....
#
#it parses out the blast outputs which are having a minimum level of bitscore /#   e-value, ientity, positives...... 
#
# u can limit the size of hit compared to the original query or to any length..
#
# to know the identities, e values...scores... gaps....etc..
#
###############################################################################



sub blastparsing {

use strict;
use warnings;

my ($fn_in) = @_;


my $file_line = "";      #declaring the variables....
my $fullhit ="";
my @name = [];
my $fn_out = "";
my @fullhits = [];
my $singlehit = "";
my $eachline = "";
my @alllines = [];
my @arraye= [];
my $evalue ="";
my @arrayscore =[];
my $score = "";
my $bitscore = "";
my @array2 = [];
my $identities = "";
my $positive = "";
my $gaps = "";
my $idntity_up = "";
my $total_length = "";
my $perc_identities = "";
my $perc_positives =   "";
my $perc_gaps = "";
my $half_query_length = "";
my $querylength = "";
my $tail ="";
my $control ="";
my $filename_out = "";
my $set = "";
chomp ($fn_in);                 # opens the file

unless (open(READ, $fn_in))
{ print " could not open the $fn_in file to read........\n";}

while ($file_line=<READ> )
{
    $fullhit .= $file_line;
    if ($file_line =~ m/Query=/ )        #finds the query name...
    {	
	@name = split ('\s+',$file_line);
	$tail = '_out';
	$fn_out = $name[1].$tail;
    }
    
    elsif ($file_line =~ m/letters\)/)
    {
	$file_line =~ s/letters\)//;
	$file_line =~ s/\s*\(//;
	$querylength = $file_line;
	chomp ($querylength);
	$querylength =~ s/\s//g;
    }

}
close (READ);
@fullhits = split ('>ref', $fullhit);     #splits each hit in to inid'l file

shift (@fullhits);
$control = '';
foreach $singlehit (@fullhits)       # extracts information for single hit.....
{
    @alllines = split ('\n', $singlehit);
    
    
    foreach $eachline (@alllines) {
	if ($eachline =~ m/Score =/)
	{ 
	    @arraye= split ('Expect = ', $eachline);
	    $evalue = $arraye[1];
	    @arrayscore = split ('\s+', $eachline); 
	    $score = $arrayscore[3];
	    $bitscore = $arrayscore[5];
	    chop $bitscore; chop $bitscore; #removes the paranthesis.....
	    $bitscore = reverse $bitscore;
	    chop $bitscore;
	    $bitscore = reverse $bitscore;
	    next;
	}
	
	elsif ($eachline =~ m/Identities =/)
	{
	    @array2 = split ('\s+', $eachline);
	    $identities =  $array2[3];
	    $positive = $array2[7];
	    $gaps = $array2[11];
	    $idntity_up = $identities;
	    $idntity_up =~ s/\/\d+//g;
	    $total_length = $identities;    
	    $total_length =~ s/\d+\///g; 
	    $positive =~ s/\/\d+//g;
	   # $gaps =~ s/\/\d+//g;
	     next;	
	}
	else 
	{
	    next;
	}
    }
    $perc_identities = ($idntity_up/$total_length)*100; # calculates the data..
    $perc_positives = ($positive/$total_length)*100;
    #   $perc_gaps = ($gaps/$total_length)*100;
    $half_query_length = $querylength/2;

#
#sets the condition for parsing out...........
#
#


    if ($perc_identities > 50 && $total_length > $half_query_length )

    {   
	$control = 'Y';
	#print "$control";
	$set .= $control;
	#$filename_out = 'uniquehits';
	#unless ( open(RITE, ">>$fn_out"))
	#{print "could not open the file to write......"; }
	#print RITE ">$singlehit\n";
	#close (RITE);     
    }
    else
    {
	$control = 'N';
	#print "$control";
	$set .= $control;
	next;
    }
    
}
print "$set\n";
if (!($set =~ m/Y/))
{
    print "got one more\n";
    $filename_out = 'uniquehits';
    unless ( open(WRITE, ">>$filename_out"))
    {print "could not open the file to write......"; }
    print WRITE ">$name[1]\n";
    close (WRITE);
}
 
}
