Hi - Quite a while back I offered to work on a patch that would support
multiple excerpts, which was a feature requested by a couple users. Alas,
after much procrastinating, I have a first cut at it ;)

For lack of a better idea, I used max_excerpts as the name for the
configuration attribute. If this attribute is not set, or set to one,
my additions aren't even touched, except for the attribute code itself
and a conditional. So in "theory", it *can't* break anthing using a
current configuration file, regardless of anything stupid I might have
otherwise done.

The code could probably be smarter about handling cases where terms
occur multiple times within the excerpt_length. But I was concerned
about burning too much time hunting for the additional occurances.
I think it would also sort of violate the excerpt_length attribute.

There is a running copy of the patched code at
http://www.tngenweb.org/ss/tngenwebss.html which is configured for a
maximum of three excerpts.

This is my first attempt to even touch the HtDig code, so be gentle ;)
Please let me know if you see any problems or have any suggestions.

Jim Cole

*** htdig-3.1.5/htsearch/Display.cc.orig        Thu Mar  2 18:24:06 2000
--- htdig-3.1.5/htsearch/Display.cc     Sat Mar  4 14:58:49 2000
*************** Display::excerpt(DocumentRef *ref, Strin
*** 1148,1153 ****
--- 1148,1154 ----
        }
      }
      else
+     if ( first == 0 || config.Value( "max_excerpts" ) == 1 )
      {
        int     headLength = strlen(head);
        int     length = config.Value("excerpt_length", 50);
*************** Display::excerpt(DocumentRef *ref, Strin
*** 1190,1196 ****
--- 1191,1297 ----
            *text << config["end_ellipses"];
        }
      }
+     else
+     {
+       *text = buildExcerpts( head, urlanchor, fanchor );
+     }
+ 
      return text;
+ }
+ 
+ //*****************************************************************************
+ // Handle cases where multiple document excerpts are requested.
+ //
+ const String
+ Display::buildExcerpts( char *head, String urlanchor, int fanchor )
+ {
+   if ( !config.Boolean( "add_anchors_to_excerpt" ) )
+   {
+     fanchor = 0;
+   }
+ 
+   int    headLength    = strlen( head );
+   int    excerptNum    = config.Value( "max_excerpts", 1 );
+   int    excerptLength = config.Value( "excerpt_length", 50 );
+   int    lastPos       = 0;
+   int    curPos        = 0;
+ 
+   String text;
+ 
+   for ( int i = 0; i < excerptNum; ++i )
+   {
+     int which, termLength;
+ 
+     int nextPos = allWordsPattern->FindFirstWord( head + lastPos,
+                                                   which, termLength );
+ 
+     if ( nextPos < 0 )
+     {
+       // Ran out of matching terms
+       break;
+     }
+     else
+     {
+       // Determine offset from beginning of head
+       curPos = lastPos + nextPos;
+     }
+ 
+     // Slip a break in since there is another excerpt coming
+     if ( i != 0 )
+     {
+       text << "<br>\n";
+     }
+ 
+     // Determine where excerpt starts
+     char *start = &head[curPos] - excerptLength / 2;
+ 
+     if ( start < head )
+     {
+       start = head;
+     }
+     else
+     {
+       text << config["start_ellipses"];
+ 
+       while ( *start && HtIsStrictWordChar( *start ) )
+       {
+         start++;
+       }
+     }
+ 
+     // Determine where excerpt ends
+     char *end = start + excerptLength;
+ 
+     if ( end > head + headLength )
+     {
+       end = head + headLength;
+ 
+       text << hilight( start, urlanchor, fanchor );
+     }
+     else
+     {
+       while ( *end && HtIsStrictWordChar( *end ) )
+       {
+         end++;
+       }
+ 
+       // Save end char so that it can be restored
+       char endChar = *end;
+ 
+       *end = '\0';
+ 
+       text << hilight(start, urlanchor, fanchor);
+       text << config["end_ellipses"];
+ 
+       *end = endChar;
+     }
+ 
+     // No more words left to examine in head
+     if ( (lastPos = curPos + termLength) > headLength )
+      break;
+   }
+ 
+   return text;
  }
  
  //*****************************************************************************
*** htdig-3.1.5/htsearch/Display.h.orig Thu Mar  2 18:24:13 2000
--- htdig-3.1.5/htsearch/Display.h      Sat Mar  4 10:00:39 2000
*************** protected:
*** 159,164 ****
--- 159,165 ----
      void              expandVariables(char *);
      void              outputVariable(char *);
      String            *excerpt(DocumentRef *ref, String urlanchor, int fanchor, int 
&first);
+     const String        buildExcerpts( char *head, String urlanchor, int fanchor );
      char              *hilight(char *str, String urlanchor, int fanchor);
      void              setupTemplates();
      void              setupImages();
*** htdig-3.1.5/htcommon/defaults.cc.orig       Sat Mar  4 10:10:22 2000
--- htdig-3.1.5/htcommon/defaults.cc    Sat Mar  4 10:10:14 2000
*************** ConfigDefaults  defaults[] =
*** 87,92 ****
--- 87,93 ----
      {"max_description_length",                "60"},
      {"max_descriptions",                "5"},
      {"max_doc_size",                  "100000"},
+     {"max_excerpts",                    "1" },
      {"max_head_length",                       "512"},
      {"max_hop_count",                 "999999"},
      {"max_keywords",                  "-1"},
------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED] 
You will receive a message to confirm this. 

Reply via email to