stas 02/03/23 01:11:33
Modified: tmpl/custom/html page_body search
src/search README SwishSpiderConfig.pl swish.conf
Log:
sync with the latest swish
Submitted by: Bill Moseley <[EMAIL PROTECTED]>
Reviewed by: stas
Revision Changes Path
1.20 +7 -5 modperl-docs/tmpl/custom/html/page_body
Index: page_body
===================================================================
RCS file: /home/cvs/modperl-docs/tmpl/custom/html/page_body,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- page_body 22 Mar 2002 02:02:16 -0000 1.19
+++ page_body 23 Mar 2002 09:11:33 -0000 1.20
@@ -15,11 +15,13 @@
INCLUDE page_toc toc=doc.toc;
# render the content
- "<!-- SwishCommand index -->";
+ # index_section is used to break up the doc into sections for indexing
+
FOREACH sec = doc.body;
'<div class="index_section">';
+ '<!-- SwishCommand index -->';
sec;
- "<br><br>";
+ "<br><br>\n";
IF loop.count == loop.size;
INCLUDE navbar_local_bottom
nav=doc.nav
@@ -27,9 +29,9 @@
ELSE;
INCLUDE top_link;
END;
- "<br><br>";
+ "<br><br>\n";
+ '<!-- SwishCommand noindex -->';
"</div>\n\n";
END;
- "<!-- SwishCommand noindex -->";
%]
-<!-- end content-->
\ No newline at end of file
+<!-- end content-->
1.11 +1 -1 modperl-docs/tmpl/custom/html/search
Index: search
===================================================================
RCS file: /home/cvs/modperl-docs/tmpl/custom/html/search,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- search 22 Mar 2002 19:22:41 -0000 1.10
+++ search 23 Mar 2002 09:11:33 -0000 1.11
@@ -18,7 +18,7 @@
<td class="menu-border" width="1"><br class="smallbr"></td>
<td class="search" width="2" align="center">
<input type="submit" name="submit" value="Search"
class="submit-but">
- <input type="hidden" name="section" value=""[%
doc.dir.path_from_base %]"">
+ <input type="hidden" name="sbm" value=""[%
doc.dir.path_from_base %]"">
</td>
<td class="menu-border" width="1"><br class="smallbr"></td>
</tr>
1.7 +6 -5 modperl-docs/src/search/README
Index: README
===================================================================
RCS file: /home/cvs/modperl-docs/src/search/README,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- README 22 Mar 2002 19:22:41 -0000 1.6
+++ README 23 Mar 2002 09:11:33 -0000 1.7
@@ -123,21 +123,22 @@
=item *
Since we want to be able to search any sub-section of the site, the
-search form includes the hidden variable C<section>. For example:
+search form includes the hidden variable C<sbm> (mnemonics: 'search by
+meta'). For example:
- <input type="checkbox" name="section" value="docs/1.0/guide" />
+ <input type="checkbox" name="sbm" value="docs/1.0/guide" />
will search all the documents under I<docs/1.0/guide> directory.
-the correct value for the C<section> variable are set in the template when
+the correct value for the C<sbm> variable are set in the template when
the site is created.
The main search page I</search/swish.cgi>, has multiply checkboxes for
-the for the C<section> variable so you can limit searches to only selected
+the for the C<sbm> variable so you can limit searches to only selected
sections.
The C<$ENV{MODPERL_SITE}> mentioned earlier is matched against the
-C<section> variable to extract only the wanted subsets of the hits:
+C<sbm> variable to extract only the wanted subsets of the hits:
$uri =~ m!$ENV{MODPERL_SITE}{/([^/]+)/.+$!
1.5 +36 -9 modperl-docs/src/search/SwishSpiderConfig.pl
Index: SwishSpiderConfig.pl
===================================================================
RCS file: /home/cvs/modperl-docs/src/search/SwishSpiderConfig.pl,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- SwishSpiderConfig.pl 22 Mar 2002 02:02:15 -0000 1.4
+++ SwishSpiderConfig.pl 23 Mar 2002 09:11:33 -0000 1.5
@@ -4,7 +4,8 @@
my $base_path = $ENV{MODPERL_SITE} || die "must set \$ENV{MODPERL_SITE}";
-die "Don't use trailing slash in MODPERL_SITE" if $base_path =~ m!/$!;
+$base_path =~ s[/$][];
+
@servers = (
@@ -52,18 +53,38 @@
my $tree = HTML::TreeBuilder->new;
- $tree->parse( ${$params{content}} ); # Why not allow a scalar ref?
+ $tree->store_comments(1);
+
+ $tree->parse( ${$params{content}} ); # Why not allow a scalar ref?
$tree->eof;
+
+ # Find the <head> section for use in all split pages
my $head = $tree->look_down( '_tag', 'head' );
- for my $section ( $tree->look_down( '_tag', 'div', 'class',
'index_section' ) ) {
- create_page( $head->clone, $section->clone, \%params )
- }
+
+ # Now create a new "document" for each
+ create_page( $head->clone, $_->clone, \%params )
+ for $tree->look_down( '_tag', 'div', 'class', 'index_section' );
+
+
+ # Indexed the page in sections, just return
+ return 0 if $params{found};
+
+ # No sections found, so index the entire page (probably index.html)
+
+ # Stip base_path
+ #my $url = $params{uri}->as_string;
+ #$url =~ s/^$base_path//;
+
+ my $new_content = $tree->as_HTML(undef,"\t");
+ output_content( $params{server}, $params{content},
+ $params{uri}, $params{response} );
+
$tree->delete;
- return !$params{found}; # tell spider.pl to not index the page
+ return 0; # don't index
}
sub create_page {
@@ -95,11 +116,14 @@
# Extract out part of the path to use for limiting searches to parts of
the document tree.
- if ( $uri =~ m!$base_path/([^/]+)/.+$! ) {
- my $meta = HTML::Element->new('meta', name=> 'section', content =>
$1);
+ if ( $uri =~ m!$base_path/(.+)$! ) {
+ my $path = $1;
+ $path =~ s{[^/]$}{}; # remove file name, if one
+ my $meta = HTML::Element->new('meta', name=> 'section', content =>
$path);
$head->push_content( $meta );
}
-
+
+
my $body = HTML::Element->new('body');
my $doc = HTML::Element->new('html');
@@ -107,6 +131,9 @@
$body->push_content( $section );
$doc->push_content( $head, $body );
+ # If we want to stip the base_path
+ #my $url = $uri->as_string;
+ #$url =~ s/$base_path//;
my $new_content = $doc->as_HTML(undef,"\t");
output_content( $params->{server}, \$new_content,
1.5 +1 -0 modperl-docs/src/search/swish.conf
Index: swish.conf
===================================================================
RCS file: /home/cvs/modperl-docs/src/search/swish.conf,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- swish.conf 3 Mar 2002 11:27:22 -0000 1.4
+++ swish.conf 23 Mar 2002 09:11:33 -0000 1.5
@@ -17,3 +17,4 @@
#BuzzWords in highlighting --
#How about counting highlighted terms individually in the highlight module
#so every term is highlighted at least once, with a total of say five.
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]