This patch should help speed up the performance of the HtWordType class's
inline functions. (That should help offset the performance penalty of the
compound word handling patch I hope to work on next.)
--- ../htdig-3.1.2.bak/htlib/HtWordType.h Wed Apr 21 21:47:58 1999
+++ ../htdig-3.1.2/htlib/HtWordType.h Tue Aug 24 13:28:31 1999
@@ -15,8 +15,8 @@
// Inline friend-functions are used together with an all-statics
// class (name that pattern!) to spare the user from having
// to manage the valid_punctuation and extra_word_characters
-// attributes, while in theory still having the runtime
-// performance of strchr() + isalnum().
+// attributes, while in theory still having better runtime
+// performance than strchr() + isalnum().
//
class HtWordType
@@ -40,6 +40,7 @@ private:
char *extra_word_characters; // Likewise.
char *other_chars_in_word; // Attribute "valid_punctuation" plus
// "extra_word_characters".
+ char chrtypes[256]; // quick lookup table for types
} statics;
// These methods are not supposed to be implemented (or accessed).
@@ -48,19 +49,25 @@ private:
void operator=(const HtWordType &);
};
+// Bits to set in chrtypes[]:
+#define HtWt_Alpha 0x01
+#define HtWt_Digit 0x02
+#define HtWt_Extra 0x04
+#define HtWt_ValidPunct 0x08
+
// One for characters that when put together are a word
// (including punctuation).
inline int
HtIsWordChar(int c)
{
- return isalnum(c) || (c && strchr(HtWordType::statics.other_chars_in_word, c));
+ return (HtWordType::statics.chrtypes[(unsigned char)c] &
+(HtWt_Alpha|HtWt_Digit|HtWt_Extra|HtWt_ValidPunct)) != 0;
}
// Similar, but no punctuation characters.
inline int
HtIsStrictWordChar(int c)
{
- return isalnum(c) || (c && strchr(HtWordType::statics.extra_word_characters, c));
+ return (HtWordType::statics.chrtypes[(unsigned char)c] &
+(HtWt_Alpha|HtWt_Digit|HtWt_Extra)) != 0;
}
// Let caller get rid of getting and holding a configuration parameter.
--- ../htdig-3.1.2.bak/htlib/HtWordType.cc Wed Apr 21 21:47:58 1999
+++ ../htdig-3.1.2/htlib/HtWordType.cc Tue Aug 24 13:28:52 1999
@@ -23,4 +23,17 @@ HtWordType::Initialize(Configuration &co
HtWordType::statics.extra_word_characters = extra_word_chars;
HtWordType::statics.valid_punctuation = valid_punct;
HtWordType::statics.other_chars_in_word = punct_and_extra;
+ HtWordType::statics.chrtypes[0] = 0;
+ for (int i = 1; i < 256; i++)
+ {
+ HtWordType::statics.chrtypes[i] = 0;
+ if (isalpha(i))
+ HtWordType::statics.chrtypes[i] |= HtWt_Alpha;
+ if (isdigit(i))
+ HtWordType::statics.chrtypes[i] |= HtWt_Digit;
+ if (strchr(extra_word_chars, i))
+ HtWordType::statics.chrtypes[i] |= HtWt_Extra;
+ if (strchr(valid_punct, i))
+ HtWordType::statics.chrtypes[i] |= HtWt_ValidPunct;
+ }
}
--
Gilles R. Detillieux E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba Phone: (204)789-3766
Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930
------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED] containing the single word unsubscribe in
the SUBJECT of the message.