I am trying to merge two file based on a SNP_A-###### list in each file. For
some reason, the regular expression in the if loop does not work and I can
not match the key values generated from hash to the string from the input
file. Could anybody help me detect the problem?
Thanks,
AG
#!usr/local/bin/perl
use strict;
use warnings;
open (DATA, "C:/perl/work/A172cn.txt") or die "Can not open file $!\n";
open (DATA2, "C:/perl/work/a127_gdas.txt") or die "Can not open file $! \n";
while(<DATA>)
{
my $mykey;
my $myvalue;
my %Hash;
my %mainhash = ();
next unless /^SNP/;
%Hash=getkeyvalue($mykey,$myvalue);
foreach $mykey (keys(%Hash))
{
my $inline ;
while($inline = <DATA2>)
{
next unless /SNP/;
#print "mykey $mykey my value: $Hash{$mykey} \n";
if($inline =~ m/($mykey)/)
{
print "$mykey $Hash{$mykey} $inline \n";
}
}
}
}
sub getkeyvalue
{
my @line = ();
my $value;
my $col;
@line = split('\t', $_);
$col = $line[0];
chomp $col;
$value =join("\t", $line[1], $line[2]);
return ($col, $value);
}
#__DATA__
#SNP_A-1509443 3 3776202
#SNP_A_1518557 3 3776202
#SNP_A_1514538 5 5350951
#SNP_A_1516403 1 5483872
#BFFX-BioB-M_at P P P P P A P
#[snip]
#__DATA2__
#SNP ID dbSNP RS ID Chromosome Physical Position TSC ID
A172_Call A172_Call Zone
#7085 SNP_A-1509443 rs1393064 1 2882121 TSC0565952 AA
0.02861
#4900 SNP_A-1518557 rs966321 1 3985402 TSC0273278 AA
0.152388
#8258 SNP_A-1517286 rs1599169 1 4804829 TSC0694296 BB
0.538696
#10947 SNP_A-1516024 rs580309 1 4982250 TSC1478148 AA
0.569713
#7794 SNP_A-1514538 rs1414379 1 5468765 TSC0609730 AA
0.299872
#9130 SNP_A-1516403 rs1890191 1 5596686 TSC0913001 AA
0.221319
#7214 SNP_A-1518687 rs1396904 1 6605831 TSC0574502 BB
0.040226
#526 SNP_A-1509959 rs950493 1 6654350 TSC0042354 BB
0.123611
#4345 SNP_A-1515791 rs845263 1 7133863 TSC0218512
NoCall 0.814947
#7914 SNP_A-1512212 rs1418490 1 7134783 TSC0617931 BB
0.077556
#4470 SNP_A-1513560 rs705695 1 7145191 TSC0246331 AA
0.700697
#8386 SNP_A-1519671 rs228651 1 7620645 TSC0730553 AA
0.09444
#4854 SNP_A-1515942 rs966134 1 8082754 TSC0272985 BB
0.212891
#637 SNP_A-1509129 rs205474 1 10542407 TSC0043572
BB 0.122514
#9481 SNP_A-1512107 rs1281034 1 10706737 TSC0984465
NoCall 10
#432 SNP_A-1514390 rs718206 1 11004020 TSC0041639
BB 0.66461
#10471 SNP_A-1518041 rs2206321 1 12221853 TSC1262794
AA 0.058009
#[snip]
here's another way...
use strict;
open DATA, 'data'
or die 'Open DATA failed';
my %hash;
print "Build hash\n";
while (<DATA>){
next unless /^SNP/;
chomp;
my ($snp_key,$value) = split /\t/;
$snp_key =~ s/ +$//g; # if appears you may have whitespace at the end
print "Key <$snp_key> exist more than once!\n"
if exists($hash{$snp_key});
print "<$snp_key> = $value\n"; # use for testing
$hash{$snp_key}= $value;
}
close DATA;
open DATA2, 'data2'
or die 'Open DATA2 failed';
print "Look for same key\n";
while (my $data=<DATA2>) {
chomp($data);
(undef, my $snp_key) = split /\t/,$data;
print "<$snp_key>\n"; # use for testing
next unless $snp_key =~ /^SNP/;
print join(',','found = ',$snp_key,$hash{$snp_key},$data), "\n"
if exists($hash{$snp_key});
}
close DATA2;
Hope this gives you some other ideas
jwm
--
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
<http://learn.perl.org/> <http://learn.perl.org/first-response>