Hi - Quite a while back I offered to work on a patch that would support
multiple excerpts, which was a feature requested by a couple users. Alas,
after much procrastinating, I have a first cut at it ;)
For lack of a better idea, I used max_excerpts as the name for the
configuration attribute. If this attribute is not set, or set to one,
my additions aren't even touched, except for the attribute code itself
and a conditional. So in "theory", it *can't* break anthing using a
current configuration file, regardless of anything stupid I might have
otherwise done.
The code could probably be smarter about handling cases where terms
occur multiple times within the excerpt_length. But I was concerned
about burning too much time hunting for the additional occurances.
I think it would also sort of violate the excerpt_length attribute.
There is a running copy of the patched code at
http://www.tngenweb.org/ss/tngenwebss.html which is configured for a
maximum of three excerpts.
This is my first attempt to even touch the HtDig code, so be gentle ;)
Please let me know if you see any problems or have any suggestions.
Jim Cole
*** htdig-3.1.5/htsearch/Display.cc.orig Thu Mar 2 18:24:06 2000
--- htdig-3.1.5/htsearch/Display.cc Sat Mar 4 14:58:49 2000
*************** Display::excerpt(DocumentRef *ref, Strin
*** 1148,1153 ****
--- 1148,1154 ----
}
}
else
+ if ( first == 0 || config.Value( "max_excerpts" ) == 1 )
{
int headLength = strlen(head);
int length = config.Value("excerpt_length", 50);
*************** Display::excerpt(DocumentRef *ref, Strin
*** 1190,1196 ****
--- 1191,1297 ----
*text << config["end_ellipses"];
}
}
+ else
+ {
+ *text = buildExcerpts( head, urlanchor, fanchor );
+ }
+
return text;
+ }
+
+ //*****************************************************************************
+ // Handle cases where multiple document excerpts are requested.
+ //
+ const String
+ Display::buildExcerpts( char *head, String urlanchor, int fanchor )
+ {
+ if ( !config.Boolean( "add_anchors_to_excerpt" ) )
+ {
+ fanchor = 0;
+ }
+
+ int headLength = strlen( head );
+ int excerptNum = config.Value( "max_excerpts", 1 );
+ int excerptLength = config.Value( "excerpt_length", 50 );
+ int lastPos = 0;
+ int curPos = 0;
+
+ String text;
+
+ for ( int i = 0; i < excerptNum; ++i )
+ {
+ int which, termLength;
+
+ int nextPos = allWordsPattern->FindFirstWord( head + lastPos,
+ which, termLength );
+
+ if ( nextPos < 0 )
+ {
+ // Ran out of matching terms
+ break;
+ }
+ else
+ {
+ // Determine offset from beginning of head
+ curPos = lastPos + nextPos;
+ }
+
+ // Slip a break in since there is another excerpt coming
+ if ( i != 0 )
+ {
+ text << "<br>\n";
+ }
+
+ // Determine where excerpt starts
+ char *start = &head[curPos] - excerptLength / 2;
+
+ if ( start < head )
+ {
+ start = head;
+ }
+ else
+ {
+ text << config["start_ellipses"];
+
+ while ( *start && HtIsStrictWordChar( *start ) )
+ {
+ start++;
+ }
+ }
+
+ // Determine where excerpt ends
+ char *end = start + excerptLength;
+
+ if ( end > head + headLength )
+ {
+ end = head + headLength;
+
+ text << hilight( start, urlanchor, fanchor );
+ }
+ else
+ {
+ while ( *end && HtIsStrictWordChar( *end ) )
+ {
+ end++;
+ }
+
+ // Save end char so that it can be restored
+ char endChar = *end;
+
+ *end = '\0';
+
+ text << hilight(start, urlanchor, fanchor);
+ text << config["end_ellipses"];
+
+ *end = endChar;
+ }
+
+ // No more words left to examine in head
+ if ( (lastPos = curPos + termLength) > headLength )
+ break;
+ }
+
+ return text;
}
//*****************************************************************************
*** htdig-3.1.5/htsearch/Display.h.orig Thu Mar 2 18:24:13 2000
--- htdig-3.1.5/htsearch/Display.h Sat Mar 4 10:00:39 2000
*************** protected:
*** 159,164 ****
--- 159,165 ----
void expandVariables(char *);
void outputVariable(char *);
String *excerpt(DocumentRef *ref, String urlanchor, int fanchor, int
&first);
+ const String buildExcerpts( char *head, String urlanchor, int fanchor );
char *hilight(char *str, String urlanchor, int fanchor);
void setupTemplates();
void setupImages();
*** htdig-3.1.5/htcommon/defaults.cc.orig Sat Mar 4 10:10:22 2000
--- htdig-3.1.5/htcommon/defaults.cc Sat Mar 4 10:10:14 2000
*************** ConfigDefaults defaults[] =
*** 87,92 ****
--- 87,93 ----
{"max_description_length", "60"},
{"max_descriptions", "5"},
{"max_doc_size", "100000"},
+ {"max_excerpts", "1" },
{"max_head_length", "512"},
{"max_hop_count", "999999"},
{"max_keywords", "-1"},
------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED]
You will receive a message to confirm this.