This patch should help speed up the performance of the HtWordType class's
inline functions.  (That should help offset the performance penalty of the
compound word handling patch I hope to work on next.)

--- ../htdig-3.1.2.bak/htlib/HtWordType.h       Wed Apr 21 21:47:58 1999
+++ ../htdig-3.1.2/htlib/HtWordType.h   Tue Aug 24 13:28:31 1999
@@ -15,8 +15,8 @@
 // Inline friend-functions are used together with an all-statics
 // class (name that pattern!) to spare the user from having
 // to manage the valid_punctuation and extra_word_characters
-// attributes, while in theory still having the runtime
-// performance of strchr() + isalnum().
+// attributes, while in theory still having better runtime
+// performance than strchr() + isalnum().
 //
 
 class HtWordType
@@ -40,6 +40,7 @@ private:
     char *extra_word_characters; // Likewise.
     char *other_chars_in_word;   // Attribute "valid_punctuation" plus
                                  // "extra_word_characters".
+    char chrtypes[256];          // quick lookup table for types
   } statics;
 
   // These methods are not supposed to be implemented (or accessed).
@@ -48,19 +49,25 @@ private:
   void operator=(const HtWordType &);
 };
 
+// Bits to set in chrtypes[]:
+#define HtWt_Alpha     0x01
+#define HtWt_Digit     0x02
+#define HtWt_Extra     0x04
+#define HtWt_ValidPunct        0x08
+
 // One for characters that when put together are a word
 // (including punctuation).
 inline int
 HtIsWordChar(int c)
 {
-  return isalnum(c) || (c && strchr(HtWordType::statics.other_chars_in_word, c));
+  return (HtWordType::statics.chrtypes[(unsigned char)c] & 
+(HtWt_Alpha|HtWt_Digit|HtWt_Extra|HtWt_ValidPunct)) != 0;
 }
 
 // Similar, but no punctuation characters.
 inline int
 HtIsStrictWordChar(int c)
 {
-  return isalnum(c) || (c && strchr(HtWordType::statics.extra_word_characters, c));
+  return (HtWordType::statics.chrtypes[(unsigned char)c] & 
+(HtWt_Alpha|HtWt_Digit|HtWt_Extra)) != 0;
 }
 
 // Let caller get rid of getting and holding a configuration parameter.
--- ../htdig-3.1.2.bak/htlib/HtWordType.cc      Wed Apr 21 21:47:58 1999
+++ ../htdig-3.1.2/htlib/HtWordType.cc  Tue Aug 24 13:28:52 1999
@@ -23,4 +23,17 @@ HtWordType::Initialize(Configuration &co
   HtWordType::statics.extra_word_characters = extra_word_chars;
   HtWordType::statics.valid_punctuation = valid_punct;
   HtWordType::statics.other_chars_in_word = punct_and_extra;
+  HtWordType::statics.chrtypes[0] = 0;
+  for (int i = 1; i < 256; i++)
+  {
+    HtWordType::statics.chrtypes[i] = 0;
+    if (isalpha(i))
+       HtWordType::statics.chrtypes[i] |= HtWt_Alpha;
+    if (isdigit(i))
+       HtWordType::statics.chrtypes[i] |= HtWt_Digit;
+    if (strchr(extra_word_chars, i))
+       HtWordType::statics.chrtypes[i] |= HtWt_Extra;
+    if (strchr(valid_punct, i))
+       HtWordType::statics.chrtypes[i] |= HtWt_ValidPunct;
+  }
 }


-- 
Gilles R. Detillieux              E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930

------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED] containing the single word unsubscribe in
the SUBJECT of the message.

Reply via email to