I cleaned up the code a little. So, here it is for anyone interested:
#!usr/bin/perl
# By Michael S. Robeson II with the help from the folks at lernperl.org and bioperl.org
# 10/16/2004
# Last updated: 10/17/2004
# This script was made for the purpose of searching for indels (gaps) in aligned
# DNA or protein sequences that are in FASTA format. It tallys up all of the different
# size gaps within each sequence string. While it does this it counts the number of
# times each gap of a given size is represented in each sequence and at the same time
# reports all of the positions that that particular "gap-size" or indel appears.
# contact: [EMAIL PROTECTED] if you have questions or comments
use warnings; use strict;
######################### # Introduction #########################
print "\n\t**Welcome to Mike Robeson's Gap-Counting Script!**\n
A - Just be sure that your sequence alignment file
is in FASTA format!
B - Make sure there are no duplicate names within an individual file!
C - Output file will be based on the name of the input file. It is named
by appending \'indel_list_\' to the name of your input file.\n\n";
############################### # Open Sequence Data & OUTFILE ###############################
print "Enter in the name of the DNA sequence file:\n"; chomp (my $dna_seq = <STDIN>);
open(DNA_SEQ, $dna_seq) or die "Can't open file: $!\n";
open(OUTFILE, ">indel_list_"."$dna_seq") or die "Can't open outfile: $!\n";
################################# # Read sequence data into a hash #################################
my %sequences;
$/ = '>';
print "\n***Discovered the following DNA sequences:***\n";
while ( <DNA_SEQ> ) { chomp; next unless s/^\s*(.+)//; my $name = $1; s/\s//g; $sequences{$name} = $_; print "$name found!\n"; print"\n"; } close DNA_SEQ;
###################################### # Iterate over gaps and write to file ######################################
foreach (keys %sequences) { print "\t\t\>\>\>\>\>\> $_ \<\<\<\<\<\<\n"; print OUTFILE "\>\>\>\>\>\> $_ \<\<\<\<\<\<\n"; my $dna = $sequences{$_}; my %gap_data; my %position;
while ($dna =~ /(\-+)/g) { my $gap_length = length $1; my $gap_pos = pos ($dna) - $gap_length + 1; $gap_data{$gap_length}++; $position{$gap_length} .= "$gap_pos"." \n"; } my @indels = keys (%gap_data); my @keys = sort { $a <=> $b} @indels;
foreach my $key (@keys) {
print "Indel size:\t$key\tTimes found:\t$gap_data{$key}\n";
print OUTFILE "Indel size:\t$key\tTimes found:\t$gap_data{$key}\n";
print "Positions:\n";
print OUTFILE "Positions:\n";
print "$position{$key}";
print OUTFILE "$position{$key}";
print "\n";
print OUTFILE "\n";
}
}
-- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] <http://learn.perl.org/> <http://learn.perl.org/first-response>