Thanks.

I think a better way should be to have conversion tables for every
language which allow:
        
        return conversion_table[atoi(entity + 1)] // for the first patch

        and:

        *position = conversion_table[(unsigned)*position] // for the
second
one
  
instead of all the if . this way the slowdown should be lower.

the problem is that the locales SHOULD include this sort of conversion
tables, and functions to deal with them, but AFAIK it's not the case, so
we have to made them by ourselves.

what I don't know is what table size we really need and what to put in it.
I think for ISO_8859_1 the 256 first characters suffice, but I don't know
for others.

(and the search must be case insensitive too).

Jerome ALET - [EMAIL PROTECTED] - http://cortex.unice.fr/~jerome
Faculte de Medecine de Nice - http://noe.unice.fr - Tel: 04 93 37 76 30 
28 Avenue de Valombrose - 06107 NICE Cedex 2 - FRANCE

On Tue, 14 Dec 1999, Iosif Fettich wrote:
> --------------
> The patches are for version 3.1.3, in directory htdig:
> 
> *********************
> SGMLEntities.cc:
> *********************
> 
> 164,183c164
> < //PATCH to make romanian ISO_8859_2 chars fit into plain ASCII//
> <         unsigned char x;
> <         x = atoi (entity + 1);
> <         if (x == 227 || x == 226 || x == 225 ) return 'a';
> <         if (x == 195 || x == 194 || x == 193 ) return 'A';
> <         if (x == 233) return 'e';
> <         if (x == 201) return 'E';
> <         if (x == 238 || x == 237) return 'i';
> <         if (x == 206 || x == 205) return 'I';
> <         if (x == 243 || x == 245 || x == 246) return 'o';
> <         if (x == 211 || x == 213 || x == 214) return 'O';
> <         if (x == 186) return 's';
> <         if (x == 170) return 'S';
> <         if (x == 254) return 't';
> <         if (x == 222) return 'T';
> <         if (x == 250 || x == 251 || x == 252) return 'u';
> <         if (x == 218 || x == 219 || x == 220) return 'U';
> <         return x;
> < //END OF PATCH
> < //  return atoi(entity + 1);
> ---
> >     return atoi(entity + 1);
> 
> 
> ****************
> HTML.cc
> ****************
> 
> 162,184d161
> < 
> < //PATCH to make romanian ISO_8859_2 chars fit into plain ASCII//
> <     start = position;
> <     while (*position)
> <     {
> <         if (*position == 227 || *position == 226 || *position == 225 ) *position = 
>'a';
> <         else if (*position == 195 || *position == 194 || *position == 193 ) 
>*position = 'A';
> <         else if (*position == 233) *position = 'e';
> <         else if (*position == 201) *position = 'E';
> <         else if (*position == 238 || *position == 237) *position = 'i';
> <         else if (*position == 206 || *position == 205) *position = 'I';
> <         else if (*position == 243 || *position == 245 || *position == 246) 
>*position = 'o';
> <         else if (*position == 211 || *position == 213 || *position == 214) 
>*position = 'O';
> <         else if (*position == 186) *position = 's';
> <         else if (*position == 170) *position = 'S';
> <         else if (*position == 254) *position = 't';
> <         else if (*position == 222) *position = 'T';
> <         else if (*position == 250 || *position == 251 || *position == 252) 
>*position = 'u';
> <         else if (*position == 218 || *position == 219 || *position == 220) 
>*position = 'U';
> <         ++position;
> <     }
> <     position = start;
> < //END OF PATCH
> 


------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED] 
You will receive a message to confirm this. 

Reply via email to