Greetings Andy,
Try the attached patch. It is very rough-and-ready (since I'm in the
processes of also trying to add other functionality) but it should
get you going.
Let me know how you get on.
Cheers,
Lachlan
On Friday 31 January 2003 07:49, [EMAIL PROTECTED] wrote:
> ...it would not find on double colons...
diff -cr -x CVS ../cvs/htdig/htcommon/HtWordReference.h ./htcommon/HtWordReference.h
*** ../cvs/htdig/htcommon/HtWordReference.h Sat Feb 2 09:49:28 2002
--- ./htcommon/HtWordReference.h Wed Feb 5 23:30:56 2003
***************
*** 20,25 ****
--- 20,26 ----
//
// Flags
+ // (If extra flags added, also update htsearch.cc:setupWords.)
//
#define FLAG_TEXT 0
#define FLAG_CAPITAL 1
***************
*** 30,35 ****
--- 31,45 ----
#define FLAG_AUTHOR 32
#define FLAG_LINK_TEXT 64
#define FLAG_URL 128
+
+ // For field-restricted search, at least one of these flags must be set
+ // in document. (255 = OR of the above...)
+ #define FLAGS_MUSTMATCH 255
+
+ // The following are not stored in the database, but are used by WeightWord
+ #define FLAG_EXACT 8192
+ #define FLAG_HIDDEN 16384
+ #define FLAG_IGNORE 32768
// The remainder are undefined
class HtWordReference : public WordReference
diff -cr -x CVS ../cvs/htdig/htsearch/WeightWord.cc ./htsearch/WeightWord.cc
*** ../cvs/htdig/htsearch/WeightWord.cc Sat Feb 2 09:49:35 2002
--- ./htsearch/WeightWord.cc Wed Feb 5 22:54:30 2003
***************
*** 63,68 ****
--- 63,84 ----
this->weight = weight;
}
+ //***************************************************************************
+ // WeightWord::WeightWord(char *word, double weight, unsigned int flags)
+ //
+ WeightWord::WeightWord(char *word, double weight, unsigned int flags)
+ {
+ records = 0;
+
+ // ideally, these flags should all jsut be stored in a uint...
+ isExact = ((flags & FLAG_EXACT) != 0);
+ isHidden = ((flags & FLAG_HIDDEN) != 0);
+ isIgnore = ((flags & FLAG_IGNORE) != 0);
+
+ set(word);
+ this->weight = weight;
+ }
+
//***************************************************************************
// WeightWord::~WeightWord()
diff -cr -x CVS ../cvs/htdig/htsearch/WeightWord.h ./htsearch/WeightWord.h
*** ../cvs/htdig/htsearch/WeightWord.h Sat Feb 2 09:49:35 2002
--- ./htsearch/WeightWord.h Wed Feb 5 22:55:46 2003
***************
*** 19,24 ****
--- 19,25 ----
#include "htString.h"
#include "WordRecord.h"
+ #include "HtWordReference.h" // for FLAG_...
class WeightWord : public Object
{
***************
*** 28,33 ****
--- 29,35 ----
//
WeightWord();
WeightWord(char *word, double weight);
+ WeightWord(char *word, double weight, unsigned int flags);
WeightWord(WeightWord *);
virtual ~WeightWord();
diff -cr -x CVS ../cvs/htdig/htsearch/htsearch.cc ./htsearch/htsearch.cc
*** ../cvs/htdig/htsearch/htsearch.cc Wed Feb 5 22:05:58 2003
--- ./htsearch/htsearch.cc Wed Feb 5 23:41:46 2003
***************
*** 512,517 ****
--- 512,519 ----
unsigned char t;
String word;
const String prefix_suffix = config->Find("prefix_match_character");
+
+ static Dictionary *colonPrefixes = NULL;
while (*pos)
{
while (1)
***************
*** 534,539 ****
--- 536,542 ----
tempWords.Add(new WeightWord(s, -1.0));
break;
}
+ #if 0
else if (HtIsWordChar(t) || t == ':' ||
(strchr(prefix_suffix, t) != NULL) || (t >= 161 && t <= 255))
{
***************
*** 544,549 ****
--- 547,600 ----
word << (char) t;
t = *pos++;
}
+ #else
+ else if (HtIsWordChar(t) ||
+ (strchr(prefix_suffix, t) != NULL) || (t >= 161 && t <= 255))
+ {
+ unsigned int fieldFlag = 0;
+ word = 0;
+ do // while recognised prefix, followed by ':'
+ {
+ while (t && (HtIsWordChar(t) ||
+ (strchr(prefix_suffix, t) != NULL) || (t >= 161 && t <= 255)))
+ {
+ word << (char) t;
+ t = *pos++;
+ }
+ if (t == ':') // e.g. "author:word" to search
+ { // only in author field
+ if (colonPrefixes == NULL)
+ {
+ String val;
+ colonPrefixes = new Dictionary ();
+ val=0;val<<FLAG_TEXT; colonPrefixes->Add("text", new String (val));
+ val=0;val<<FLAG_CAPITAL; colonPrefixes->Add("capital", new String (val));
+ val=0;val<<FLAG_TITLE; colonPrefixes->Add("title", new String (val));
+ val=0;val<<FLAG_HEADING; colonPrefixes->Add("heading", new String (val));
+ val=0;val<<FLAG_KEYWORDS; colonPrefixes->Add("keyword", new String (val));
+ val=0;val<<FLAG_DESCRIPTION;colonPrefixes->Add("descr", new String (val));
+ val=0;val<<FLAG_AUTHOR; colonPrefixes->Add("author", new String (val));
+ val=0;val<<FLAG_LINK_TEXT; colonPrefixes->Add("link", new String (val));
+ val=0;val<<FLAG_URL; colonPrefixes->Add("url", new String (val));
+
+ val=0;val<<FLAG_EXACT; colonPrefixes->Add("exact", new String (val));
+ val=0;val<<FLAG_HIDDEN; colonPrefixes->Add("hidden", new String (val));
+ }
+ word.lowercase();
+ t = *pos++;
+ Object *flag;
+ // if valid word straight after colon, and know prefix
+ if (t && (HtIsWordChar (t) ||
+ (strchr(prefix_suffix, t) != NULL) ||
+ (t >= 161 && t <= 255))
+ && (flag = colonPrefixes->Find (word.get())))
+ {
+ fieldFlag |= ((String*)flag)->as_integer();
+ word = 0;
+ }
+ }
+ } while (!word);
+ #endif
pos--;
if (boolean && (mystrcasecmp(word.get(), "+") == 0
***************
*** 565,571 ****
{
// Add word to excerpt matching list
originalPattern << word << "|";
! WeightWord *ww = new WeightWord(word, 1.0);
if(HtWordNormalize(word) & WORD_NORMALIZE_NOTOK)
ww->isIgnore = 1;
tempWords.Add(ww);
--- 616,622 ----
{
// Add word to excerpt matching list
originalPattern << word << "|";
! WeightWord *ww = new WeightWord(word, 1.0, fieldFlag);
if(HtWordNormalize(word) & WORD_NORMALIZE_NOTOK)
ww->isIgnore = 1;
tempWords.Add(ww);