I'm hoping someone can point me in the right direction. I'm stuck on
figuring out how to handle the situation where session A initiates a TCP
client session B; however, by the time TCP session B is ready to do it's
thing, session A is way ahead and session B is now lost without the data
from session A. 
 
I proved there is a timing issue, by changing line 57 from
$_[KERNEL]->yield('_start') to $_[KERNEL]->delay('_start',5). The delay
provides sufficient time for session B to do its work; However, I could
never be sure if the delay is long enough. Although this "solved" the
problem, I'm sure it's just a bandaid. The real solution is perhaps
employing somekind of postback call, but I'm lost and would appreciate some
help.
 
By the way, the purpose of the code is to connect to Yahoo's API via the TCP
connection session and download and store the top results for a given
keyword. For example, if the keyword is "dog food", store the top ten
results in a local mysql database. 
 
[EMAIL PROTECTED]
Position Research, Inc.
Search engine results by research
tel: (760) 480-8291 fax: (760) 480-8271
www.PositionResearch.com <http://www.positionresearch.com/> 
 
 
---------------------------------------------------------------- CODE
----------------------------------------------------------------------------
-------
 
#!/usr/bin/perl
use warnings;
use strict;
$|++;
use POE;
use POE::Component::Client::TCP;
 
POE::Component::My::KeywordTopPages->spawn();
POE::Kernel->run();
exit 0;
 

1. BEGIN {
2.
#---------------------------------------------------------------------------
----------
3. package POE::Component::My::KeywordTopPages;
4.
#---------------------------------------------------------------------------
----------
5. use POE::Session;
6. use DBI;
7. 
8. my $dbh = DBI->connect('DBI:mysql:xray:localhost','xxx','yyyyy',
{RaiseError => 1});
9. 
10.
#---------------------------------------------------------------------------
----------
11. sub spawn {
12.
#---------------------------------------------------------------------------
----------
13.    my $class = shift;
14.    POE::Session->create (
15.         package_states => [$class => [qw(_start GetTopPages
StoreTopPage)]],
16.    );
17. }
18.
19.
#---------------------------------------------------------------------------
----------
20. sub _start
21.
#---------------------------------------------------------------------------
----------
22. {  # retrieve one keyword-engine pair from the db
23.    my $keyword = '';
24.    my $sql = "SELECT jep.job_id, jep.engine_id, jep.phrase_id, k.phrase
FROM job_engine_phrases jep, keywords k
25.                 WHERE tp_gather_start IS NULL AND jep.phrase_id =
k.phrase_id /* _start */";
26.
27.    ($_[HEAP]->{J_ID}, $_[HEAP]->{E_ID}, $_[HEAP]->{K_ID}, $keyword) =
$dbh->selectrow_array($sql);
28.
29.    if ($keyword)
30.    {
31.       $sql = "UPDATE job_engine_phrases SET tp_gather_start = NOW() /*
_start */
32.                 WHERE job_id = $_[HEAP]->{J_ID} AND engine_id =
$_[HEAP]->{E_ID} AND phrase_id = $_[HEAP]->{K_ID}";
33.       $dbh->do($sql);
34.
35.       $_[KERNEL]->yield('GetTopPages', $keyword);
36.    }
37.    else # delay kernel -- loop and try again
38.    {
39.       $_[KERNEL]->delay('_start', 10);
40.    }
41.}
42.
#---------------------------------------------------------------------------
----------
43. sub GetTopPages
44.
#---------------------------------------------------------------------------
----------
45. { # retrieve top pages for keyword from engine
46.    my ( $kernel, $heap, $session, $keyword ) = @_[ KERNEL, HEAP,
SESSION, ARG0 ];
47.
48.    POE::Component::Client::TCP->new (
49.      RemoteAddress     => '127.0.0.1',
50.      RemotePort        => '31000',
51.      ConnectTimeout    => 2,
52.      Connected         => sub { $_[HEAP]->{server}->put($keyword); },
53.      Filter            => 'POE::Filter::Line',
54.      ServerInput       => sub { &StoreTopPage($kernel, $heap, $session,
$_[ARG0]); },
55.    );
56.
57.    $_[KERNEL]->delay('_start',3);
57b.  ########  $_[KERNEL]->yield('_start');
58. }
59. 
60.
#---------------------------------------------------------------------------
----------
61. sub StoreTopPage
62.
#---------------------------------------------------------------------------
----------
63. { # store the url after normalizing it
64.     my ( $kernel, $heap, $session, $url ) = @_;
65. 
66.    my $sql = "SELECT page_id FROM pages WHERE url = '$url' /*
StoreTopPage */";
67.
68.     $heap->{J_ID} or die "Dying on heap J_ID: $heap->{J_ID}\n";
69. 
70.     if ( my $page_id = $dbh->selectrow_array($sql) )
71.     {
72.       print "Nothing to do here in StoreTopPage\n";
73.    }
74.    else
75.    {
76.       $sql = "INSERT INTO pages (url) VALUES ('$url') /* StoreTopPage
*/";
77.       $dbh->do("$sql");
78.    }
79. }
 
 
 
 

Reply via email to