OK, I promised a patch for the valid_punctuation problem. Here's a patch
that adds the original user input, with punctuation, to the StringMatch
used for excerpts.

However, I just noticed excerpt hilighting seems broken on my system. So I
can't test it out. :-( I did put in debugging output, so I know it's
setting the StringMatch correctly.

If someone could test this, I'd appreciate it. If someone can figure out
why my excerpt hilighting isn't working, I'd be very, very happy.

-Geoff

Index: htsearch.cc
===================================================================
RCS file: /opt/htdig/cvs/htdig3/htsearch/htsearch.cc,v
retrieving revision 1.22
diff -c -3 -r1.22 htsearch.cc
*** htsearch.cc 1999/01/21 13:41:24     1.22
--- htsearch.cc 1999/01/29 05:30:40
***************
*** 106,113 ****

  ResultList *htsearch(char *, List &, Parser *);

! void setupWords(char *, List &, int, Parser *);
! void createLogicalWords(List &, String &, StringMatch &);
  void reportError(char *);
  void convertToBoolean(List &words);
  void doFuzzy(WeightWord *, List &, List &);
--- 35,42 ----

  ResultList *htsearch(char *, List &, Parser *);

! void setupWords(char *, List &, int, Parser *, String &);
! void createLogicalWords(List &, String &, String &);
  void reportError(char *);
  void convertToBoolean(List &words);
  void doFuzzy(WeightWord *, List &, List &);
***************
*** 133,138 ****
--- 62,69 ----
      StringMatch               limit_to;
      StringMatch               exclude_these;
      String            logicalWords;
+     String              origPattern;
+     String              logicalPattern;
      StringMatch               searchWordsPattern;
      StringList                requiredWords;
      int                 i;
***************
*** 266,280 ****
      originalWords.chop(" \t\r\n");
      setupWords(originalWords, searchWords,
               strcmp(config["match_method"], "boolean") == 0,
!              parser);

      //
      // Convert the list of WeightWord objects to a pattern string
      // that we can compile.
      //
!     createLogicalWords(searchWords, logicalWords, searchWordsPattern);

      //
      // If required keywords were given in the search form, we will
      // modify the current searchWords list to include the required
      // words.
--- 197,220 ----
      originalWords.chop(" \t\r\n");
      setupWords(originalWords, searchWords,
               strcmp(config["match_method"], "boolean") == 0,
!              parser, origPattern);

      //
      // Convert the list of WeightWord objects to a pattern string
      // that we can compile.
      //
!     createLogicalWords(searchWords, logicalWords, logicalPattern);

+     //
+     // Assemble the full pattern for excerpt matching and highlighting
      //
+     origPattern += logicalPattern;
+     searchWordsPattern.Pattern(origPattern);
+     searchWordsPattern.IgnoreCase();
+     if (debug)
+       cout << "Excerpt pattern: " << origPattern << "\n";
+
+     //
      // If required keywords were given in the search form, we will
      // modify the current searchWords list to include the required
      // words.
***************
*** 336,342 ****


//*****************************************************************************
void
! createLogicalWords(List &searchWords, String &logicalWords, StringMatch &wm)
  {
      String            pattern;
      int                       i;
--- 276,282 ----


//*****************************************************************************
void
! createLogicalWords(List &searchWords, String &logicalWords, String &wm)
  {
      String            pattern;
      int                       i;
***************
*** 368,375 ****
            pattern << ww->word;
        }
      }
!     wm.IgnoreCase();
!     wm.Pattern(pattern);

      if (debug)
      {
--- 308,314 ----
            pattern << ww->word;
        }
      }
!     wm = pattern;

      if (debug)
      {
***************
*** 395,404 ****


//*****************************************************************************
// void setupWords(char *allWords, List &searchWords,
! //                                    int boolean, Parser *parser)
  //
  void
! setupWords(char *allWords, List &searchWords, int boolean, Parser *parser)
  {
      List      tempWords;
      int               i;
--- 334,344 ----


//*****************************************************************************
// void setupWords(char *allWords, List &searchWords,
! //               int boolean, Parser *parser, String &originalPattern)
  //
  void
! setupWords(char *allWords, List &searchWords, int boolean, Parser *parser,
!          String &originalPattern)
  {
      List      tempWords;
      int               i;
***************
*** 456,463 ****
                    word << (char) t;
                    t = *pos++;
                }
!               word.remove(valid_punctuation);
!               pos--;
                if (boolean && mystrcasecmp(word.get(), "and") == 0)
                {
                    tempWords.Add(new WeightWord("&", -1.0));
--- 396,402 ----
                    word << (char) t;
                    t = *pos++;
                }
!
                if (boolean && mystrcasecmp(word.get(), "and") == 0)
                {
                    tempWords.Add(new WeightWord("&", -1.0));
***************
*** 472,477 ****
--- 411,419 ----
                }
                else
                {
+                   // Add word to excerpt matching list
+                   originalPattern << word << "|";
+                   word.remove(valid_punctuation);
                    WeightWord  *ww = new WeightWord(word, 1.0);
                    if (!badWords.IsValid(word) ||
                        word.length() < minimum_word_length)
***************
*** 484,489 ****
--- 426,432 ----
                        tempWords.Add(ww);
                    }
                }
+               pos--;
                break;
            }
        }


------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED] containing the single word "unsubscribe" in
the SUBJECT of the message.

Reply via email to