moseley 2002/06/29 22:45:16 Modified: src/search SwishSpiderConfig.pl make.pl search.tt Log: Updated the indexing to assign unique sections IDs to docs instead of just trying to limit by hits on words in the path. Revision Changes Path 1.10 +80 -26 modperl-docs/src/search/SwishSpiderConfig.pl Index: SwishSpiderConfig.pl =================================================================== RCS file: /home/cvs/modperl-docs/src/search/SwishSpiderConfig.pl,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- SwishSpiderConfig.pl 19 Apr 2002 19:53:33 -0000 1.9 +++ SwishSpiderConfig.pl 30 Jun 2002 05:45:16 -0000 1.10 @@ -1,11 +1,15 @@ # this is the modified default spider config file that comes with swish-e. +# Perldoc swish.cgi for docs on the format of this file # -# a few custom callbacks are located after the @servers definition section. +# a few custom callbacks are located after the @servers definition section +# these are used to split files into sections. my $base_path = $ENV{MODPERL_SITE} || die "must set \$ENV{MODPERL_SITE}"; $base_path =~ s[/$][]; +# Used to fetch the available "sections" +my $CHECKBOX_DATA = 'checkboxes.storable'; @servers = ( @@ -63,7 +67,6 @@ # Find the <head> section for use in all split pages my $head = $tree->look_down( '_tag', 'head' ); - # Now create a new "document" for each create_page( $head->clone, $_->clone, \%params ) for $tree->look_down( '_tag', 'div', 'class', 'index-section' ); @@ -73,7 +76,8 @@ ## so don't index it. $tree->delete; return 0; - + + # old code below to index pages that don't have sections defined. # Indexed the page in sections, just return @@ -102,34 +106,35 @@ my $uri = $params->{uri}; + # Grab the first <a name="..."> tag that indicates this section. + # and adjust the path - # Grab the section link, and create a new title + if ( my $name = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('name')) } ) ) { + $uri->fragment( $name->attr('name') ); + } + - my $name = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('name')) } ); - - if ( $name ) { + # Now grab the first <a href="..">description</a> tag + if ( my $link = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('href')) } ) ) { - my @a_content; - - my $section_name = $name->attr('name'); - $uri->fragment( $section_name ); + my $description = $link->as_text; - if ( ! (@a_content = $name->content_list) ) { - $section_name =~ tr/_/ /; - @a_content = ( $section_name ); - } + if ( $description ) { - # Modify or create the title + # Modify or create the title - my $title = $head->look_down('_tag', 'title'); + my $title = $head->look_down('_tag', 'title'); + + if ( $title ) { + $title->push_content( ": $description" ); - if ( $title ) { - $title->push_content( ': ', @a_content ); - } else { - my $title = HTML::Element->new('title'); - $title->push_content( @a_content ); - $head->push_content( $title ); + } else { # Create a new title + + my $title = HTML::Element->new('title'); + $title->push_content( $description ); + $head->push_content( $title ); + } } } @@ -142,9 +147,11 @@ if ( $uri =~ m!$base_path/(.+)$! ) { my $path = $1; - $path =~ s{/?[^/]+$}{}; # remove file name, if one - my $meta = HTML::Element->new('meta', name=> 'section', content => $path); - $head->push_content( $meta ); + + if ( my $sections = map_path_to_sections( $path ) ) { + my $meta = HTML::Element->new('meta', name=> 'section', content => $sections); + $head->push_content( $meta ); + } } # Add the total document length, which is different than the section length @@ -171,8 +178,55 @@ $params->{found}++; # set flag; + $doc->delete; } + +my %section_names; + +sub map_path_to_sections { + my $path = shift; + + %section_names = fetch_sections( $CHECKBOX_DATA ) + unless %section_names; + + + my @sections; + for ( keys %section_names ) { + my $test = quotemeta( $_ ); + push @sections, $section_names{ $_ } if $path =~ /^$test/; + } + + return @sections ? join(' ', @sections ) : undef; +} + + + + + +use Storable; +sub fetch_sections { + my $file = shift; + + my $items_array = retrieve( $file ); + die unless $items_array; + + my %sections; + recurse_sections( \%sections, $items_array ); + return %sections; + +} + +sub recurse_sections { + my ( $sections, $items_array ) = @_; + + for ( @$items_array ) { + # grab the path and its associated section ID + $sections->{ $_->{path} } = $_->{section}; + recurse_sections( $sections, $_->{subs} ) if $_->{subs}; + } +} + 1; 1.3 +66 -7 modperl-docs/src/search/make.pl Index: make.pl =================================================================== RCS file: /home/cvs/modperl-docs/src/search/make.pl,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- make.pl 29 May 2002 07:21:46 -0000 1.2 +++ make.pl 30 Jun 2002 05:45:16 -0000 1.3 @@ -2,10 +2,58 @@ use strict; use Storable; -# This must match up with .swishcgi.conf setting +=head1 NAME + +make.pl -- program to generate data needed for searching + +=head1 Description + +make.pl uses input contained within that defines "sections" of the site based +on path names. These name can then be used when searching with swish to limit +searches to just these areas of the site. + +When indexing the site with swish-e each file is taged with meta data that indicates +which section or sections it belongs to. + +The input format is described in the source of this file. + +make.pl creates two ouptut files: + +=over 4 + +=item search_options + +A template toolkit include file for defining an array of section names and a hash that +maps the section names to nice descriptions. This data is used to +create the select box on the side bar during site generation (by running bin/build). + +=item checkboxes.storable + +A perl data structure used for use in the F<search.cgi> script to generate the nested +checkboxes for the advanced search feature. This allows selecting more than one +area of the site at a time. + +This file is saved using the Storable perl module, and is read in by the +search script (F<swish.cgi>) configuration parameter file F<.swishcgi.conf> and +made available to Template-Toolkit when F<swish.cgi> is running. + +This file is also read when indexing with swish-e (see F<SwishSpiderConfig.pl>) and is used to +map path names into section names. + +=back + +Running this program is described in the F<README> file contained in +the F<src/search> directory of the mod_perl site distribution. + + +=cut + + +# This must match up with .swishcgi.conf setting and SwishSpiderConfig.pl my $CHECKBOX_DATA = 'checkboxes.storable'; # This is used for all pages -- it's the array and has for the sidebar search +# It contains an array parsable by Template Toolkit. my $SEARCH_OPTIONS = 'search_options'; @@ -21,7 +69,7 @@ 0, download, Download, Download 0, docs, Documentation, All Docs 1, docs/1.0, mod_perl 1.0 Docs, 1.0 Docs - 2, docs/1.0/guide, Guide, + 2, docs/1.0/guide, Guide 2, docs/1.0/win32, Win32 2, docs/1.0/api, API 1, docs/2.0, mod_perl 2.0 Docs, 2.0 Docs @@ -39,11 +87,16 @@ + # Split the above items out into a hash. + + my $section_id = 'SecA'; + my @items_flat = map { s/^\s+//; s/\s+$//; + $_ = $section_id++ . ", $_"; my %h; - @h{qw/indent value label short/} = split m!\s*,\s*!; + @h{qw/section indent path label short/} = split m!\s*,\s*!; $h{short} ||= ( $h{label} || 'missing description' ); @@ -51,16 +104,22 @@ } split /\n/, $items; - my $array_values = join "\n", map { ' ' x (( $_->{indent}+2 ) * 4) . qq["$_->{value}"] } @items_flat; + + # Build the data parsable by Template-Toolkit + + my $array_values = join "\n", map { ' ' x (( $_->{indent}+2 ) * 4) . qq["$_->{section}"] } @items_flat; + my $hash_values = join "\n", map { my $dots = '..' x $_->{indent}; my $spaces = ' ' x (( $_->{indent}+2 ) * 4); - qq[$spaces"$_->{value}" => "$dots$_->{short}" ] + qq[$spaces"$_->{section}" => "$dots$_->{short}" ] } @items_flat; + + my $check_box_array = build_array( [EMAIL PROTECTED] ); -#use Data::Dumper; +#use Data::Dumper; #print Dumper $check_box_array; store( $check_box_array, $CHECKBOX_DATA ); # store for swish.cgi @@ -99,7 +158,7 @@ #============================================================================== # Subroutine that builds the data structure expected by template toolkit -# TT uses values .value, .label, and .subs. See search.tt for example +# TT uses values .section, .label, and .subs. See search.tt for example # # # 1.16 +1 -1 modperl-docs/src/search/search.tt Index: search.tt =================================================================== RCS file: /home/cvs/modperl-docs/src/search/search.tt,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- search.tt 29 Apr 2002 22:30:09 -0000 1.15 +++ search.tt 30 Jun 2002 05:45:16 -0000 1.16 @@ -71,7 +71,7 @@ <ul> [%- FOREACH sec = subs -%] - <li class="search-list">[% CGI.checkbox('sbm', 0, sec.value, sec.label); %] + <li class="search-list">[% CGI.checkbox('sbm', 0, sec.section, sec.label); %] [%- IF sec.subs -%][%- PROCESS sub_items subs=sec.subs -%][%- END -%]</li> [%- END -%]
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]