On Mon, May 16, 2005 at 01:33:15PM +0000, Nan Jiang wrote:
> While I think <Topic/> and <ExternalPage/> are not randomly intermixed as 
> <Topic/> nodes are generated in relevant categories such as <Arts/> -> 
> <Arts/Movie> -> <Arts/Movie/Title> and then if the <Topic/> has <link/> 
> children which means it is a final category, then <ExternalPage/> nodes 
> appeared immediatly below the <Topic/> with the same order as <link/>.
> 

The problem is that you completely misunderstood the idea of XMLtwig. You 
parse as you go. Here is the code that gives somewhat similar to your 
output. Don't get surprised by the ->simplify I use to deconstruct twigs - I 
am just used to it and it is merely a matter of style. You can very well use 
parent firstchild att and family. And remember - when working with XML::Twig 
Data::Dumper takes a whole new meaning :)

#!/usr/bin/perl

use warnings;
use strict;
use XML::Twig;

my $xml = '<RDF>
<Topic r:id="Top">
<catid>1</catid>
</Topic>

<ExternalPage about="">
<topic>Top/</topic>
</ExternalPage>

<Topic r:id="Top/Arts">
<catid>2</catid>
</Topic>

<Topic r:id="Top/Arts/Movies/Titles/1/10_Rillington_Place">
<catid>205108</catid>
<link r:resource="http://www.britishhorrorfilms.co.uk/rillington.shtml"/>
<link 
r:resource="http://www.shoestring.org/mmi_revs/10-rillington-place.html"/>
</Topic>

<ExternalPage about="http://www.britishhorrorfilms.co.uk/rillington.shtml";>
<d:Title>British Horror Films: 10 Rillington Place</d:Title>
<d:Description>Review which looks at plot especially the shocking features
of it.</d:Description>
<topic>Top/Arts/Movies/Titles/1/10_Rillington_Place</topic>
</ExternalPage>

<ExternalPage
about="http://www.shoestring.org/mmi_revs/10-rillington-place.html";>
<d:Title>MMI Movie Review: 10 Rillington Place</d:Title>
<d:Description>Review includes plot, real life story behind the film and
realism in the film.</d:Description>
<topic>Top/Arts/Movies/Titles/1/10_Rillington_Place</topic>
</ExternalPage>
</RDF>';

my %want_links;

my $parser = XML::Twig->new (   twig_handlers => {  'Topic' => \&_topic_handler,
                                                    'ExternalPage' => 
\&_links_handler },
                        );

$parser->parse($xml);   #parse XML data

exit 0;


sub _topic_handler {

    my ($twig, $child) = @_;
    my $topic = $child->simplify (forcearray => 1);

    if ($topic->{link}) {
        %want_links = map { $_->{'r:resource'}, $topic->{'r:id'} } 
@{$topic->{link}};   #generate hash 'link_name' => 'directory'
    }
    else {
        %want_links = ();       #reset the hash since we are working on a new 
topic (no more external links)
    }

    $twig->purge;
}

sub _links_handler {

    my ($twig, $child) = @_;
    my $ext_page = $child->simplify (forcearray => 1);

    if ($want_links{$ext_page->{about}}) {
        #chdir $want_links{$ext_page->{about}}  #commented out since I don't 
have that dir
        print join ("\n",   $want_links{$ext_page->{about}},
                            $ext_page->{'d:Title'}[0],
                            $ext_page->{'d:Description'}[0],
                    );
        print "\n\n";
    }

    $twig->purge;
}


-- 
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
<http://learn.perl.org/> <http://learn.perl.org/first-response>


Reply via email to