I cleaned up the code a little. So, here it is for anyone interested:

#!usr/bin/perl
# By Michael S. Robeson II with the help from the folks at lernperl.org and bioperl.org
# 10/16/2004
# Last updated: 10/17/2004
# This script was made for the purpose of searching for indels (gaps) in aligned
# DNA or protein sequences that are in FASTA format. It tallys up all of the different
# size gaps within each sequence string. While it does this it counts the number of
# times each gap of a given size is represented in each sequence and at the same time
# reports all of the positions that that particular "gap-size" or indel appears.
# contact: [EMAIL PROTECTED] if you have questions or comments


use warnings;
use strict;

#########################
# Introduction
#########################

print "\n\t**Welcome to Mike Robeson's Gap-Counting Script!**\n
A - Just be sure that your sequence alignment file
is in FASTA format!
B - Make sure there are no duplicate names within an individual file!
C - Output file will be based on the name of the input file. It is named
by appending \'indel_list_\' to the name of your input file.\n\n";


###############################
# Open Sequence Data & OUTFILE
###############################

print "Enter in the name of the DNA sequence file:\n";
chomp (my $dna_seq = <STDIN>);

open(DNA_SEQ, $dna_seq)
        or die "Can't open file: $!\n";

open(OUTFILE, ">indel_list_"."$dna_seq")
        or die "Can't open outfile: $!\n";

#################################
# Read sequence data into a hash
#################################

my %sequences;

$/ = '>';    

print "\n***Discovered the following DNA sequences:***\n";

while ( <DNA_SEQ> ) {
        chomp;
        next unless s/^\s*(.+)//;
        my $name = $1;                          
        s/\s//g;                                        
        $sequences{$name} = $_;         
        print "$name found!\n";               
                                                                
        print"\n";
}
close DNA_SEQ;

######################################
# Iterate over gaps and write to file
######################################

foreach (keys %sequences) {                     
        print "\t\t\>\>\>\>\>\> $_ \<\<\<\<\<\<\n";               
        print OUTFILE "\>\>\>\>\>\> $_ \<\<\<\<\<\<\n";   
        
        my $dna = $sequences{$_};               
        my %gap_data;                                   
        my %position;                                   

        while ($dna =~ /(\-+)/g) {
                my $gap_length = length $1;
                my $gap_pos = pos ($dna) - $gap_length + 1;
                $gap_data{$gap_length}++;
                $position{$gap_length} .= "$gap_pos"." \n";
        }
        
        my @indels = keys (%gap_data);                  
        my @keys = sort { $a <=> $b} @indels;     

foreach my $key (@keys) {
print "Indel size:\t$key\tTimes found:\t$gap_data{$key}\n";
print OUTFILE "Indel size:\t$key\tTimes found:\t$gap_data{$key}\n";
print "Positions:\n";
print OUTFILE "Positions:\n";
print "$position{$key}";
print OUTFILE "$position{$key}";
print "\n";
print OUTFILE "\n";
}
}



-- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] <http://learn.perl.org/> <http://learn.perl.org/first-response>




Reply via email to