? patch Index: ChangeLog =================================================================== RCS file: /cvsroot/htdig/htdig/ChangeLog,v retrieving revision 1.187.2.206 diff -3 -u -p -r1.187.2.206 ChangeLog --- ChangeLog 2001/05/06 03:06:47 1.187.2.206 +++ ChangeLog 2001/05/30 10:17:02 @@ -1,3 +1,17 @@ +Wed May 30 11:30:43 CEST 2001 Gabriele Bartolini + + * htsearch/htsearch.cc: two new attributes, used by htsearch, have + been added: restrict and exclude. They can now give more control + to template customisation through configuration files, allowing + to restrict or exclude URLs from search without passing + any CGI variables (although this specification overrides the + configuration one). + * htcommon/defaults.cc: ditto + * htdoc/attrs.html: ditto (to be reviewed and lengthened -- my + english sucks!) + * htdoc/cf_byname.html: ditto + * htdoc/cf_byprog.html: ditto + Sat May 5 21:43:32 2001 Geoff Hutchison * configure.in, configure: Add tests for wait.h, sys/wait.h, Index: htcommon/defaults.cc =================================================================== RCS file: /cvsroot/htdig/htdig/htcommon/defaults.cc,v retrieving revision 1.43.2.15 diff -3 -u -p -r1.43.2.15 defaults.cc --- htcommon/defaults.cc 2000/02/15 22:08:34 1.43.2.15 +++ htcommon/defaults.cc 2001/05/30 10:17:04 @@ -57,6 +57,7 @@ ConfigDefaults defaults[] = {"endings_word2root_db", "${common_dir}/word2root.db"}, {"excerpt_length", "300"}, {"excerpt_show_top", "false"}, + {"exclude", ""}, {"exclude_urls", "/cgi-bin/ .cgi"}, {"external_parsers", ""}, {"extra_word_characters", ""}, @@ -120,6 +121,7 @@ ConfigDefaults defaults[] = {"prev_page_text", "[prev]"}, {"remove_bad_urls", "true"}, {"remove_default_doc", "index.html"}, + {"restrict", ""}, {"robotstxt_name", "htdig"}, {"script_name", ""}, {"search_algorithm", "exact:1"}, Index: htdoc/attrs.html =================================================================== RCS file: /cvsroot/htdig/htdig/htdoc/attrs.html,v retrieving revision 1.27.2.35 diff -3 -u -p -r1.27.2.35 attrs.html --- htdoc/attrs.html 2000/02/25 16:22:22 1.27.2.35 +++ htdoc/attrs.html 2001/05/30 10:17:05 @@ -1741,6 +1741,50 @@
+ exclude +
+
+
+
+ type: +
+
+ string list +
+
+ used by: +
+
+ htsearch +
+
+ default: +
+
+ <empty> +
+
+ description: +
+
+ If a URL contains any of the space separated patterns, + it will be discarded in the searching phase. This is + used to exclude URLs parts from the configuration file + without passing a form variable through the htsearch + CGI. +
+
+ example: +
+
+ exclude: cgi-bin +
+
+
+
+
+
+
exclude_urls
@@ -5030,6 +5074,47 @@ +
+
+ + +
+
+
+ restrict +
+
+
+
+ type: +
+
+ string list +
+
+ used by: +
+
+ htsearch +
+
+ default: +
+
+ <empty> +
+
+ description: +
+
+ This attribute is used by htsearch in order to restrict + URLs to be searched in the configuration file. +
+
+ example: +
+
+ restrict: http://www.vh1.com/
Index: htdoc/cf_byname.html =================================================================== RCS file: /cvsroot/htdig/htdig/htdoc/cf_byname.html,v retrieving revision 1.18.2.16 diff -3 -u -p -r1.18.2.16 cf_byname.html --- htdoc/cf_byname.html 2000/02/15 22:08:36 1.18.2.16 +++ htdoc/cf_byname.html 2001/05/30 10:17:05 @@ -56,6 +56,7 @@ * endings_word2root_db
* excerpt_length
* excerpt_show_top
+ * exclude
* exclude_urls
* external_parsers
* extra_word_characters
Index: htdoc/cf_byprog.html =================================================================== RCS file: /cvsroot/htdig/htdig/htdoc/cf_byprog.html,v retrieving revision 1.17.2.16 diff -3 -u -p -r1.17.2.16 cf_byprog.html --- htdoc/cf_byprog.html 2000/02/15 22:08:36 1.17.2.16 +++ htdoc/cf_byprog.html 2001/05/30 10:17:05 @@ -132,6 +132,7 @@ * endings_word2root_db
* excerpt_length
* excerpt_show_top
+ * exclude
* extra_word_characters
* iso_8601
* logging
@@ -158,6 +159,7 @@ * page_number_text
* prefix_match_character
* prev_page_text
+ * restrict
* script_name
* search_algorithm
* search_results_footer
Index: htsearch/htsearch.cc =================================================================== RCS file: /cvsroot/htdig/htdig/htsearch/htsearch.cc,v retrieving revision 1.24.2.9 diff -3 -u -p -r1.24.2.9 htsearch.cc --- htsearch/htsearch.cc 2000/02/15 22:20:02 1.24.2.9 +++ htsearch/htsearch.cc 2001/05/30 10:17:06 @@ -104,24 +104,6 @@ main(int ac, char **av) cgi input(optind < ac ? av[optind] : none); // - // Compile the URL limit pattern. - // - if (input.exists("restrict")) - { - char *sep = input["restrict"]; - while ((sep = strchr(sep, '\001')) != NULL) - *sep++ = '|'; - limit_to.Pattern(input["restrict"]); - } - if (input.exists("exclude")) - { - char *sep = input["exclude"]; - while ((sep = strchr(sep, '\001')) != NULL) - *sep++ = '|'; - exclude_these.Pattern(input["exclude"]); - } - - // // Setup the configuration database. First we read the compiled defaults. // Then we override those with defaults read in from the configuration // file, and finally we override some attributes with information we @@ -189,6 +171,24 @@ main(int ac, char **av) config.Add(form_vars[i], input[form_vars[i]]); } + // + // Compile the URL limit pattern. + // + if (strlen(config["restrict"])) + { + char *sep = config["restrict"]; + while ((sep = strchr(sep, '\001')) != NULL) + *sep++ = '|'; + limit_to.Pattern(config["restrict"]); + } + if (strlen(config["exclude"])) + { + char *sep = config["exclude"]; + while ((sep = strchr(sep, '\001')) != NULL) + *sep++ = '|'; + exclude_these.Pattern(config["exclude"]); + } + // Ctype-like functions for what constitutes a word. HtWordType::Initialize(config);